From f2d4262bf3c2ff293e1e51b450cb92a7a636bcda Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 17 Jun 2026 03:33:47 +0000 Subject: [PATCH 1/2] fix: strip trailing FORMAT clause before sending query The Formo query API wraps submitted SQL in a paginating subquery with its own FORMAT JSON: SELECT * FROM () LIMIT 100 FORMAT JSON ClickHouse forbids a FORMAT clause inside a subquery, so a user query that ends in its own FORMAT clause (e.g. `... FORMAT CSV`) produced a nested FORMAT and a 400 from the API. A trailing FORMAT can never take effect through this endpoint anyway -- the outer FORMAT JSON always wins, and output shaping is the CLI's `--format` job. Add stripTrailingFormatClause in src/lib/sql.ts to remove a trailing top-level FORMAT clause and trailing semicolons before the query is sent. The scan is aware of string literals, quoted identifiers, and comments, and is anchored to the end of the statement, so it never touches a FORMAT nested in a subquery or an identifier/function such as formatDateTime(...). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01D7VzXR4q2UWHrYdU2139MS --- src/commands/query.ts | 6 +- src/lib/sql.ts | 158 ++++++++++++++++++++++++++++++++++++++++++ test/lib/sql.test.ts | 157 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 320 insertions(+), 1 deletion(-) create mode 100644 src/lib/sql.ts create mode 100644 test/lib/sql.test.ts diff --git a/src/commands/query.ts b/src/commands/query.ts index d514c69..99cff2f 100644 --- a/src/commands/query.ts +++ b/src/commands/query.ts @@ -1,5 +1,6 @@ import { Cli, z } from 'incur' import { createClient, requireApiKey } from '../lib/client' +import { stripTrailingFormatClause } from '../lib/sql' export const query = Cli.create('query', { description: 'SQL analytics query commands', @@ -8,7 +9,10 @@ export const query = Cli.create('query', { export function queryRunRun(sql: string) { requireApiKey() const client = createClient() - return client.post('/v0/query/', { query: sql }) + // The API wraps the query in a paginating subquery with its own + // `FORMAT JSON`. ClickHouse forbids a `FORMAT` clause inside a subquery, so + // strip any trailing `FORMAT`/semicolon before sending to avoid a 400. + return client.post('/v0/query/', { query: stripTrailingFormatClause(sql) }) } query.command('run', { diff --git a/src/lib/sql.ts b/src/lib/sql.ts new file mode 100644 index 0000000..c3d4b26 --- /dev/null +++ b/src/lib/sql.ts @@ -0,0 +1,158 @@ +/** + * SQL helpers for the query command. + * + * The Formo query API wraps the SQL you submit so it can paginate the result + * set and force a machine-readable response: + * + * SELECT * FROM () LIMIT FORMAT JSON + * + * ClickHouse does not allow a `FORMAT` clause inside a subquery, so if your + * query ends in its own `FORMAT` clause the wrapped statement becomes: + * + * SELECT * FROM (SELECT ... FORMAT CSV) LIMIT 100 FORMAT JSON + * + * which ClickHouse rejects with a 400. A trailing `FORMAT` (or a trailing + * semicolon) can never take effect through this endpoint anyway — the outer + * `FORMAT JSON` always wins, and output shaping is the CLI's `--format` job — + * so we remove it before sending and let the server wrap a clean query. + */ + +/** + * Strip a trailing, top-level `FORMAT ` clause and any trailing + * semicolons from a SQL statement. + * + * The scan is aware of string literals, quoted identifiers, and comments, so + * `FORMAT`-looking text inside them is never mistaken for a real clause. The + * match is anchored to the end of the statement, so a `FORMAT` nested inside + * parentheses (a subquery) or part of an identifier/function such as + * `formatDateTime(...)` — or a column aliased `format` — is left untouched. + * + * Returns the original input unchanged when there is nothing to strip. + */ +export function stripTrailingFormatClause(sql: string): string { + if (!sql) return sql + + const masked = maskLiterals(sql) + // A genuine trailing FORMAT clause: the keyword `format` preceded by a word + // boundary (so `formatDateTime` or a column aliased `format` is safe), + // followed by exactly one identifier (the format name) and nothing else. + const formatClause = /(^|[^A-Za-z0-9_])format\s+[A-Za-z_][A-Za-z0-9_]*\s*$/i + + let end = sql.length + let didStrip = false + // Peel top-level semicolons and a trailing FORMAT clause repeatedly so any + // ordering collapses to the bare query, e.g. `... FORMAT CSV;`, + // `...; FORMAT CSV`, or `... FORMAT CSV ;`. Trailing whitespace and comments + // are only ever skipped to *look* past them — never removed on their own. + for (;;) { + let e = end + while (e > 0 && /\s/.test(masked[e - 1])) e-- + if (e === 0) break + + if (masked[e - 1] === ';') { + end = e - 1 + didStrip = true + continue + } + + const match = formatClause.exec(masked.slice(0, e)) + if (match) { + // Cut at the `format` keyword, after the leading word-boundary char. + end = match.index + match[1].length + didStrip = true + continue + } + + break + } + + if (!didStrip) return sql + + // Tidy the real whitespace now left dangling where the clause used to be. + // Comments are not whitespace, so any genuine comment is preserved. + const stripped = sql.slice(0, end).replace(/\s+$/, '') + // Never manufacture an empty query from non-empty input (degenerate inputs + // such as a bare `FORMAT JSON`): let the original surface its own error. + return stripped === '' ? sql : stripped +} + +/** + * Return a copy of `sql` with the *contents* of string literals, quoted + * identifiers, and comments replaced by spaces, preserving the original length + * so indices stay aligned with the source. Quote delimiters are kept; comments + * are blanked entirely. This lets the clause scanner reason about real SQL code + * without tripping over keywords that merely appear inside literals or comments. + */ +function maskLiterals(sql: string): string { + const out: string[] = [] + const n = sql.length + let i = 0 + + while (i < n) { + const c = sql[i] + const next = i + 1 < n ? sql[i + 1] : '' + + // Line comment: -- ... to end of line + if (c === '-' && next === '-') { + while (i < n && sql[i] !== '\n') { + out.push(' ') + i++ + } + continue + } + + // Block comment: /* ... */ + if (c === '/' && next === '*') { + out.push(' ', ' ') + i += 2 + while (i < n && !(sql[i] === '*' && sql[i + 1] === '/')) { + out.push(' ') + i++ + } + if (i < n) { + out.push(' ', ' ') + i += 2 + } + continue + } + + // String literal or quoted identifier: '...', "...", `...` + if (c === "'" || c === '"' || c === '`') { + const quote = c + out.push(quote) + i++ + while (i < n) { + const d = sql[i] + // Backslash escapes are honored inside single-quoted strings. + if (d === '\\' && quote === "'") { + out.push(' ') + i++ + if (i < n) { + out.push(' ') + i++ + } + continue + } + if (d === quote) { + // A doubled delimiter is an escaped quote, not the terminator. + if (i + 1 < n && sql[i + 1] === quote) { + out.push(' ', ' ') + i += 2 + continue + } + out.push(quote) + i++ + break + } + out.push(' ') + i++ + } + continue + } + + out.push(c) + i++ + } + + return out.join('') +} diff --git a/test/lib/sql.test.ts b/test/lib/sql.test.ts new file mode 100644 index 0000000..96a7277 --- /dev/null +++ b/test/lib/sql.test.ts @@ -0,0 +1,157 @@ +import { expect } from 'chai'; +import { stripTrailingFormatClause } from '../../src/lib/sql'; + +/** + * Mirror what the API does to a submitted query: wrap it in a paginating + * subquery with a single outer FORMAT JSON. Used to assert the stripped query + * produces a statement with no FORMAT inside the parentheses. + */ +function wrap(inner: string): string { + return `SELECT * FROM (${inner}) LIMIT 100 FORMAT JSON`; +} + +/** Count real `FORMAT ` clauses (ignores formatDateTime, etc.). */ +function countFormatClauses(sql: string): number { + const matches = sql.match(/\bformat\s+[A-Za-z_][A-Za-z0-9_]*/gi); + return matches ? matches.length : 0; +} + +describe('lib/sql / stripTrailingFormatClause', function () { + describe('strips a trailing top-level FORMAT clause', function () { + it('removes FORMAT CSV', function () { + expect(stripTrailingFormatClause('SELECT * FROM events FORMAT CSV')).to.equal( + 'SELECT * FROM events', + ); + }); + + it('removes FORMAT CSVWithNames', function () { + expect( + stripTrailingFormatClause('SELECT a, b FROM t FORMAT CSVWithNames'), + ).to.equal('SELECT a, b FROM t'); + }); + + it('removes FORMAT JSON', function () { + expect(stripTrailingFormatClause('SELECT 1 FORMAT JSON')).to.equal('SELECT 1'); + }); + + it('is case-insensitive on the keyword and name', function () { + expect(stripTrailingFormatClause('SELECT 1 format json')).to.equal('SELECT 1'); + expect(stripTrailingFormatClause('SELECT 1 Format JSONEachRow')).to.equal( + 'SELECT 1', + ); + }); + + it('handles newlines and extra whitespace before FORMAT', function () { + expect( + stripTrailingFormatClause('SELECT 1\n FROM t\nFORMAT TabSeparated'), + ).to.equal('SELECT 1\n FROM t'); + }); + }); + + describe('handles trailing semicolons', function () { + it('removes a bare trailing semicolon', function () { + expect(stripTrailingFormatClause('SELECT 1;')).to.equal('SELECT 1'); + }); + + it('removes a semicolon after a FORMAT clause', function () { + expect(stripTrailingFormatClause('SELECT 1 FORMAT CSV;')).to.equal('SELECT 1'); + }); + + it('removes a FORMAT clause that follows a semicolon', function () { + expect( + stripTrailingFormatClause('SELECT x FROM t ORDER BY x; FORMAT CSV'), + ).to.equal('SELECT x FROM t ORDER BY x'); + }); + + it('removes whitespace and multiple trailing semicolons around FORMAT', function () { + expect(stripTrailingFormatClause('SELECT 1 FORMAT CSV ; ')).to.equal('SELECT 1'); + }); + }); + + describe('does not truncate FORMAT-like identifiers (no false positives)', function () { + it('keeps a formatDateTime(...) call with no trailing clause', function () { + const sql = 'SELECT formatDateTime(ts, \'%Y-%m-%d\') AS day FROM events'; + expect(stripTrailingFormatClause(sql)).to.equal(sql); + }); + + it('strips only the real clause, keeping formatDateTime(...) intact', function () { + expect( + stripTrailingFormatClause( + 'SELECT formatDateTime(ts) AS day, count() FROM events GROUP BY day FORMAT CSV', + ), + ).to.equal('SELECT formatDateTime(ts) AS day, count() FROM events GROUP BY day'); + }); + + it('keeps a column/alias literally named format', function () { + const sql = 'SELECT id AS format FROM t'; + expect(stripTrailingFormatClause(sql)).to.equal(sql); + }); + + it('keeps a real FORMAT clause even when a column is named format', function () { + expect( + stripTrailingFormatClause('SELECT format FROM t FORMAT JSON'), + ).to.equal('SELECT format FROM t'); + }); + }); + + describe('is aware of quotes, comments, and parentheses', function () { + it('ignores FORMAT inside a string literal', function () { + const sql = "SELECT 'FORMAT CSV' AS note FROM t"; + expect(stripTrailingFormatClause(sql)).to.equal(sql); + }); + + it('ignores FORMAT inside a line comment', function () { + const sql = 'SELECT 1 -- FORMAT CSV'; + expect(stripTrailingFormatClause(sql)).to.equal(sql); + }); + + it('ignores FORMAT inside a block comment', function () { + const sql = 'SELECT 1 /* FORMAT CSV */'; + expect(stripTrailingFormatClause(sql)).to.equal(sql); + }); + + it('ignores a FORMAT nested inside a subquery (not trailing)', function () { + const sql = 'SELECT * FROM (SELECT 1 FORMAT CSV) AS x'; + expect(stripTrailingFormatClause(sql)).to.equal(sql); + }); + }); + + describe('no-ops', function () { + it('leaves a plain query untouched', function () { + const sql = 'SELECT count(*) FROM events'; + expect(stripTrailingFormatClause(sql)).to.equal(sql); + }); + + it('returns empty input unchanged', function () { + expect(stripTrailingFormatClause('')).to.equal(''); + }); + + it('does not manufacture an empty query from a bare FORMAT clause', function () { + expect(stripTrailingFormatClause('FORMAT JSON')).to.equal('FORMAT JSON'); + }); + }); + + describe('produces a query that wraps cleanly (the bug being fixed)', function () { + const cases = [ + 'SELECT * FROM events FORMAT CSV', + 'SELECT a, b FROM t FORMAT CSVWithNames;', + 'SELECT x FROM t ORDER BY x; FORMAT JSON', + 'SELECT formatDateTime(ts) AS day FROM events FORMAT CSV', + ]; + + cases.forEach(function (sql) { + it(`leaves no FORMAT inside the parens for: ${sql}`, function () { + const wrapped = wrap(stripTrailingFormatClause(sql)); + // Exactly one FORMAT clause survives — the outer one the API adds. + expect(countFormatClauses(wrapped)).to.equal(1); + // ...and it sits at the very end, outside the subquery parentheses. + expect(wrapped).to.match(/\)\s+LIMIT\s+100\s+FORMAT\s+JSON$/); + const innerParens = wrapped.slice( + wrapped.indexOf('(') + 1, + wrapped.lastIndexOf(')'), + ); + expect(countFormatClauses(innerParens)).to.equal(0); + }); + }); + }); +}); From e886e206b2898f98971f55665f484cdb3e46d8ac Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 17 Jun 2026 03:57:54 +0000 Subject: [PATCH 2/2] fix: treat # as a line comment when masking SQL ClickHouse accepts `#` and `#!` single-line comments (MySQL compatibility). maskLiterals only handled `--` and block comments, so a `#` comment ending in a FORMAT-like token could be wrongly stripped, and a real trailing FORMAT followed by a `#` comment was missed. Fold `#` into the line-comment branch and cover both directions with tests. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01D7VzXR4q2UWHrYdU2139MS --- src/lib/sql.ts | 5 +++-- test/lib/sql.test.ts | 11 +++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/lib/sql.ts b/src/lib/sql.ts index c3d4b26..f16877b 100644 --- a/src/lib/sql.ts +++ b/src/lib/sql.ts @@ -92,8 +92,9 @@ function maskLiterals(sql: string): string { const c = sql[i] const next = i + 1 < n ? sql[i + 1] : '' - // Line comment: -- ... to end of line - if (c === '-' && next === '-') { + // Line comment: -- ... or # ... to end of line. ClickHouse accepts `#` + // and `#!` line comments for MySQL compatibility. + if ((c === '-' && next === '-') || c === '#') { while (i < n && sql[i] !== '\n') { out.push(' ') i++ diff --git a/test/lib/sql.test.ts b/test/lib/sql.test.ts index 96a7277..909c13f 100644 --- a/test/lib/sql.test.ts +++ b/test/lib/sql.test.ts @@ -105,6 +105,17 @@ describe('lib/sql / stripTrailingFormatClause', function () { expect(stripTrailingFormatClause(sql)).to.equal(sql); }); + it('ignores FORMAT inside a # line comment (MySQL-style)', function () { + const sql = 'SELECT 1 # FORMAT CSV'; + expect(stripTrailingFormatClause(sql)).to.equal(sql); + }); + + it('strips a real FORMAT clause trailed by a # comment', function () { + expect(stripTrailingFormatClause('SELECT 1 FORMAT CSV # note')).to.equal( + 'SELECT 1', + ); + }); + it('ignores FORMAT inside a block comment', function () { const sql = 'SELECT 1 /* FORMAT CSV */'; expect(stripTrailingFormatClause(sql)).to.equal(sql);