diff --git a/KERNEL_REV b/KERNEL_REV index 126043c4..cd127107 100644 --- a/KERNEL_REV +++ b/KERNEL_REV @@ -1 +1 @@ -b676275f234b11a68cb4c8a2955d69cb3b786d97 \ No newline at end of file +34b4c202cc127021c923903d59c841daa2b44fef \ No newline at end of file diff --git a/lib/contracts/IDBSQLClient.ts b/lib/contracts/IDBSQLClient.ts index 80aa2763..de67070e 100644 --- a/lib/contracts/IDBSQLClient.ts +++ b/lib/contracts/IDBSQLClient.ts @@ -22,6 +22,10 @@ type AuthOptions = oauthClientId?: string; oauthClientSecret?: string; useDatabricksOAuthInAzure?: boolean; + // OAuth scopes to request. When omitted, the kernel backend defaults the + // U2M flow to `['sql', 'offline_access']` (parity with the Thrift driver's + // `defaultOAuthScopes`), overriding the kernel's bare `all-apis offline_access`. + oauthScopes?: Array; } | { authType: 'custom'; diff --git a/lib/kernel/KernelAuth.ts b/lib/kernel/KernelAuth.ts index 11b50a87..e4aa7c0c 100644 --- a/lib/kernel/KernelAuth.ts +++ b/lib/kernel/KernelAuth.ts @@ -28,6 +28,25 @@ import { buildUserAgentString } from '../utils'; */ const U2M_DEFAULT_REDIRECT_PORT = 8030; +// U2M OAuth scopes default. Matches the standalone Thrift driver's +// `defaultOAuthScopes` (lib/connection/auth/DatabricksOAuth/OAuthScope.ts): +// `['sql', 'offline_access']`. The kernel's bare default is +// `['all-apis', 'offline_access']`; the `databricks-sql-connector` OAuth app is +// registered for the `sql` scope, so we pass the Thrift-parity scopes explicitly +// unless the caller overrides via `oauthScopes`. +const U2M_DEFAULT_SCOPES = ['sql', 'offline_access']; + +// M2M OAuth scopes default. Matches the standalone Thrift driver (`getScopes` +// forces `['all-apis']` for the client-credentials flow) and the kernel's own +// M2M default (`m2m.rs` → `['all-apis']`). Overridable via `oauthScopes` +// (parity with pyo3, which forwards `scopes` on M2M). +const M2M_DEFAULT_SCOPES = ['all-apis']; + +// Default OAuth client id — identical to the Thrift driver's +// `DatabricksOAuthManager.defaultClientId` and the kernel napi's own U2M default. +// Used for `oauthClientId ?? default`, mirroring Thrift's `getClientId()`. +const DEFAULT_OAUTH_CLIENT_ID = 'databricks-sql-connector'; + /** * Shape consumed by the napi-binding's `openSession()` (see * `native/kernel/index.d.ts`). Mirrors `ConnectionOptions` in the binding's @@ -189,12 +208,15 @@ export type KernelNativeConnectionOptions = KernelSessionDefaults & authMode: 'OAuthM2m'; oauthClientId: string; oauthClientSecret: string; + oauthScopes?: Array; } | { hostName: string; httpPath: string; authMode: 'OAuthU2m'; oauthRedirectPort: number; + oauthScopes?: Array; + oauthClientId?: string; } ); @@ -541,6 +563,7 @@ export function buildKernelConnectionOptions(options: ConnectionOptions): Kernel const oauth = options as { oauthClientId?: string; oauthClientSecret?: string; + oauthScopes?: Array; azureTenantId?: string; useDatabricksOAuthInAzure?: boolean; persistence?: unknown; @@ -579,40 +602,18 @@ export function buildKernelConnectionOptions(options: ConnectionOptions): Kernel ); } - // Flow selector — DELIBERATELY DIFFERENT from thrift's - // `DBSQLClient.createAuthProvider` (`DBSQLClient.ts:216`), which keys off - // the secret (`oauthClientSecret === undefined ? U2M : M2M`). kernel keys off - // `oauthClientId` *presence* (the "do I have an id?" signal) instead, so a - // user who set an id but typoed/forgot the secret gets the actionable M2M - // "secret is required" error rather than being silently routed to U2M - // (which would hide their intent). Cost: `id + no secret` throws here - // where thrift would run U2M, and kernel U2M has no custom-client-id support - // (see buildKernelConnectionOptions header). The U2M arm still defends against an id - // sneaking through: fires only when `oauthClientId` is provided as - // a blank-reserved literal (e.g., whitespace, `"null"`, `"undefined"`) - // alongside an absent/blank secret — both `idIsBlank` and - // `secretIsBlank` are true so U2M wins routing, but the caller's - // intent to use U2M with a partially-set id is ambiguous and - // rejected explicitly. - const idIsBlank = - oauth.oauthClientId === undefined || - (typeof oauth.oauthClientId === 'string' && isBlankOrReserved(oauth.oauthClientId)); - const secretIsBlank = - oauth.oauthClientSecret === undefined || - (typeof oauth.oauthClientSecret === 'string' && isBlankOrReserved(oauth.oauthClientSecret)); - - if (idIsBlank && secretIsBlank) { - // U2M — neither id nor secret supplied. - if (oauth.oauthClientId !== undefined) { - // Defense-in-depth: id was set but blank/reserved literal. - // The kernel hardcodes `client_id = "databricks-cli"` for U2M; - // there's no JS-side override knob. - throw new HiveDriverError( - 'kernel backend: `oauthClientId` is not supported on the OAuth U2M flow; ' + - "the kernel uses the built-in 'databricks-cli' client. " + - 'Omit `oauthClientId` for U2M, or supply `oauthClientSecret` for the M2M flow.', - ); - } + // Flow selector + client-id resolution mirror the Thrift driver EXACTLY + // (`DBSQLClient.createAuthProvider`, DBSQLClient.ts:220): + // flow = oauthClientSecret === undefined ? U2M : M2M (strict undefined) + // clientId = oauthClientId ?? defaultClientId (`??` guards null/undefined only) + // No blank/reserved normalization on the OAuth fields — a present-but- + // degenerate value (`""`, `"undefined"`, whitespace) is forwarded verbatim, + // exactly as Thrift forwards it, so the kernel path does not diverge from the + // Thrift backend. (This intentionally re-imports Thrift's env-stringification + // behaviour: a secret that resolved to `""`/`"undefined"` counts as a real + // secret ⇒ M2M, just like Thrift.) + if (oauth.oauthClientSecret === undefined) { + // U2M (browser) — no secret, exactly like Thrift. if (oauth.persistence !== undefined) { throw new HiveDriverError( 'kernel backend: `persistence` (custom OAuth token store) is not yet wired through ' + @@ -623,24 +624,21 @@ export function buildKernelConnectionOptions(options: ConnectionOptions): Kernel 'when the kernel exposes it.', ); } - return { + const u2m = { ...base, - authMode: 'OAuthU2m', + authMode: 'OAuthU2m' as const, oauthRedirectPort: U2M_DEFAULT_REDIRECT_PORT, + // Scopes default to Thrift parity (`sql offline_access`); overridable. + oauthScopes: + Array.isArray(oauth.oauthScopes) && oauth.oauthScopes.length > 0 ? oauth.oauthScopes : U2M_DEFAULT_SCOPES, }; + // clientId: Thrift uses `oauthClientId ?? default`. Forward it verbatim + // when set; when absent the napi applies the same default + // (`databricks-sql-connector`), so omitting it is identical to Thrift. + return oauth.oauthClientId !== undefined ? { ...u2m, oauthClientId: oauth.oauthClientId } : u2m; } - // M2M. - if (typeof oauth.oauthClientId !== 'string' || isBlankOrReserved(oauth.oauthClientId)) { - throw new AuthenticationError( - 'kernel backend: `oauthClientId` is required (non-empty, non-whitespace) for OAuth M2M.', - ); - } - if (typeof oauth.oauthClientSecret !== 'string' || isBlankOrReserved(oauth.oauthClientSecret)) { - throw new AuthenticationError( - 'kernel backend: `oauthClientSecret` must be a non-empty non-whitespace string for OAuth M2M.', - ); - } + // M2M (client credentials) — a secret is present, exactly like Thrift. if (oauth.persistence !== undefined) { throw new HiveDriverError( 'kernel backend: `persistence` is not supported on OAuth M2M ' + @@ -650,8 +648,12 @@ export function buildKernelConnectionOptions(options: ConnectionOptions): Kernel return { ...base, authMode: 'OAuthM2m', - oauthClientId: oauth.oauthClientId, + // Thrift: `getClientId()` = `oauthClientId ?? defaultClientId`. + oauthClientId: oauth.oauthClientId ?? DEFAULT_OAUTH_CLIENT_ID, oauthClientSecret: oauth.oauthClientSecret, + // Configurable (parity with pyo3); defaults to `['all-apis']`. + oauthScopes: + Array.isArray(oauth.oauthScopes) && oauth.oauthScopes.length > 0 ? oauth.oauthScopes : M2M_DEFAULT_SCOPES, }; } diff --git a/lib/kernel/KernelBackend.ts b/lib/kernel/KernelBackend.ts index 927771df..e0ee420f 100644 --- a/lib/kernel/KernelBackend.ts +++ b/lib/kernel/KernelBackend.ts @@ -19,6 +19,7 @@ import { ConnectionOptions, OpenSessionRequest } from '../contracts/IDBSQLClient import { InternalConnectionOptions } from '../contracts/InternalConnectionOptions'; import { LogLevel } from '../contracts/IDBSQLLogger'; import HiveDriverError from '../errors/HiveDriverError'; +import { serializeQueryTags } from '../utils'; import { getKernelNative, KernelNativeBinding, KernelConnection } from './KernelNativeLoader'; import { decodeNapiKernelError } from './KernelErrorMapping'; import { buildKernelConnectionOptions, buildKernelRetryOptions, KernelNativeConnectionOptions } from './KernelAuth'; @@ -145,6 +146,20 @@ export default class KernelBackend implements IBackend { if (request.configuration !== undefined) { sessionOptions.sessionConf = { ...request.configuration }; } + // Session-level query tags: serialize into the reserved `QUERY_TAGS` + // session conf. The kernel allowlists `QUERY_TAGS` (SESSION_CONF_ALLOWLIST) + // and forwards it onto the SEA `CreateSession` `session_confs`, mirroring + // the Thrift backend's `ThriftBackend.openSession`. Runs after the + // `configuration` merge so `queryTags` takes precedence over an explicit + // `configuration.QUERY_TAGS`, matching the documented contract. + if (request.queryTags !== undefined) { + const serialized = serializeQueryTags(request.queryTags); + if (serialized) { + sessionOptions.sessionConf = { ...(sessionOptions.sessionConf ?? {}), QUERY_TAGS: serialized }; + } else if (sessionOptions.sessionConf) { + delete sessionOptions.sessionConf.QUERY_TAGS; + } + } let nativeConnection: KernelConnection; try { diff --git a/lib/kernel/KernelSessionBackend.ts b/lib/kernel/KernelSessionBackend.ts index 6784042e..84a765cb 100644 --- a/lib/kernel/KernelSessionBackend.ts +++ b/lib/kernel/KernelSessionBackend.ts @@ -156,20 +156,36 @@ export default class KernelSessionBackend implements ISessionBackend { public async executeStatement(statement: string, options: ExecuteStatementOptions): Promise { this.failIfClosed(); + // Per-statement options the kernel backend doesn't honour are treated as + // NO-OPs (logged at warn), not errors — so call sites written for the Thrift + // backend are drop-in on the kernel path. These are perf/format hints the + // kernel governs internally (useCloudFetch / useLZ4Compression) or a feature + // it doesn't expose yet (staging); ignoring them cannot change query results. + // NB: parameter binding (compound/BINARY) is deliberately NOT no-op'd — a + // dropped param would silently change results, so it still throws. if (options.useCloudFetch !== undefined) { - throw new HiveDriverError( - 'kernel executeStatement: useCloudFetch is controlled by the kernel result configuration and is not a per-statement option on kernel', - ); + this.context + .getLogger() + .log( + LogLevel.warn, + 'kernel executeStatement: ignoring per-statement `useCloudFetch` — result fetching is governed by the kernel result configuration (no-op on kernel).', + ); } if (options.useLZ4Compression !== undefined) { - throw new HiveDriverError( - 'kernel executeStatement: useLZ4Compression is not supported on kernel (result compression is governed by the kernel)', - ); + this.context + .getLogger() + .log( + LogLevel.warn, + 'kernel executeStatement: ignoring per-statement `useLZ4Compression` — result compression is governed by the kernel (no-op on kernel).', + ); } if (options.stagingAllowedLocalPath !== undefined) { - throw new HiveDriverError( - 'kernel executeStatement: stagingAllowedLocalPath (volume operations) is not supported on kernel', - ); + this.context + .getLogger() + .log( + LogLevel.warn, + 'kernel executeStatement: ignoring `stagingAllowedLocalPath` — volume/staging operations are not yet supported on the kernel backend (no-op).', + ); } // `runAsync` selects the kernel execution path. NOTE: this is a kernel- diff --git a/native/kernel/index.d.ts b/native/kernel/index.d.ts index 2829489c..0b042121 100644 --- a/native/kernel/index.d.ts +++ b/native/kernel/index.d.ts @@ -33,7 +33,7 @@ * byte-identical Thrift parity, the JS adapter pre-serialises via * `serializeQueryTags` and writes the result into * `statementConf["query_tags"]` directly — see - * `SeaSessionBackend.executeStatement` in the NodeJS driver. This + * `KernelSessionBackend.executeStatement` in the NodeJS driver. This * path is the one the production code uses. */ export interface ExecuteOptions { @@ -99,7 +99,7 @@ export interface NamedTypedValueInput { /** * Authentication mode selector crossing the napi boundary. The string * literals are what napi-rs emits from this `#[napi(string_enum)]` — the - * NodeJS SEA adapter (`SeaAuth`) matches them verbatim (`'Pat'`, + * NodeJS SEA adapter (`KernelAuth`) matches them verbatim (`'Pat'`, * `'OAuthM2m'`, `'OAuthU2m'`). * * Mirrors the kernel [`AuthConfig`] variants this binding supports. @@ -133,6 +133,25 @@ export interface HeaderEntry { name: string value: string } +/** + * Programmatic HTTP/HTTPS proxy configuration, mirroring the kernel's + * internal [`ProxyConfig`]. Supplied as a structured object rather than a + * flattened URL so credentials never have to be percent-encoded into the URL + * and the bypass-host list can be expressed. + * + * - `url` — proxy endpoint, e.g. `"http://proxy.corp.example.com:8080"`. Must + * use the `http://` or `https://` scheme. + * - `username` / `password` — optional proxy basic-auth, applied via + * `reqwest`'s `Proxy::basic_auth` (not embedded in the URL). + * - `bypassHosts` — optional comma-separated host/domain list that should + * bypass the proxy (e.g. `"localhost,*.internal.corp"`). + */ +export interface ProxyInput { + url: string + username?: string + password?: string + bypassHosts?: string +} /** * JS-visible options for opening a Databricks SQL session. * @@ -360,6 +379,33 @@ export interface ConnectionOptions { * [`HttpConfig::overall_timeout`]. */ retryOverallTimeoutSecs?: number + /** + * Programmatic HTTP/HTTPS proxy ([`ProxyInput`]) to route all kernel + * traffic through. Carries the proxy `url`, optional basic-auth + * `username` / `password`, and an optional `bypassHosts` list — mapped + * field-for-field onto the kernel [`ProxyConfig`]. + * + * Omitted ⇒ the kernel does NOT configure a proxy explicitly and + * `reqwest`'s standard behaviour applies — the `HTTPS_PROXY` / + * `HTTP_PROXY` / `NO_PROXY` environment variables are still honoured. + * Setting this **overrides** those env vars. This complements the env-var + * path: callers who cannot set process env vars (e.g. a long-lived Node + * server) can now route a single connection through a proxy + * programmatically. + */ + proxy?: ProxyInput + /** + * Per-connection socket read timeout, in milliseconds. Caps how + * long a single HTTP round-trip may block waiting on the server + * before the request errors out. Maps onto the kernel + * [`HttpConfig::request_timeout`] (the internal reqwest + * `Client::timeout`). + * + * Omitted ⇒ kernel default (120 000 ms / 120 s). Napi-rs + * serialises `u32` as JS `number`; the largest representable value + * (~49.7 days) far exceeds any sensible socket timeout. + */ + socketTimeoutMs?: number } /** * Open a Databricks SQL session and return an opaque `Connection` diff --git a/tests/unit/kernel/auth-edge-cases.test.ts b/tests/unit/kernel/auth-edge-cases.test.ts index 371ac22f..6c837fce 100644 --- a/tests/unit/kernel/auth-edge-cases.test.ts +++ b/tests/unit/kernel/auth-edge-cases.test.ts @@ -67,126 +67,59 @@ describe('KernelAuth — edge cases (input validation + ambiguity)', () => { } }); - // Round-4 NF3-2: presence of `oauthClientId` signals M2M intent. - // A blank/reserved-literal `oauthClientSecret` is then a missing-secret - // typo, not a request to fall back to U2M. Surface the M2M "secret - // required" AuthenticationError so the user fixes the real problem - // rather than swap class to a HiveDriverError pointing at a flow - // they didn't intend to use. - it('rejects mixed-case reserved-literal oauthClientSecret with AuthenticationError when id is set', () => { - const opts: ConnectionOptions = { - host: 'example.cloud.databricks.com', - path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - oauthClientId: 'client-uuid', - oauthClientSecret: 'NULL', - }; - - expect(() => buildKernelConnectionOptions(opts)).to.throw( - AuthenticationError, - /oauthClientSecret.*non-empty.*OAuth M2M/, - ); - }); - - it('rejects whitespace-only oauthClientId on M2M', () => { - const opts: ConnectionOptions = { - host: 'example.cloud.databricks.com', - path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - oauthClientId: ' ', - oauthClientSecret: 'dose-fake-secret', - }; - - expect(() => buildKernelConnectionOptions(opts)).to.throw(AuthenticationError, /oauthClientId.*required/); - }); - - it('rejects whitespace-only oauthClientSecret with AuthenticationError when oauthClientId is set (M2M intent)', () => { - const opts: ConnectionOptions = { - host: 'example.cloud.databricks.com', - path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - oauthClientId: 'client-uuid', - oauthClientSecret: '\n\t', - }; - - expect(() => buildKernelConnectionOptions(opts)).to.throw( - AuthenticationError, - /oauthClientSecret.*non-empty.*OAuth M2M/, - ); - }); - - it('rejects literal "undefined" as oauthClientId on M2M', () => { - const opts: ConnectionOptions = { - host: 'example.cloud.databricks.com', - path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - oauthClientId: 'undefined', - oauthClientSecret: 'dose-fake-secret', - }; - - expect(() => buildKernelConnectionOptions(opts)).to.throw(AuthenticationError, /oauthClientId.*required/); - }); - - it('rejects literal "undefined" as oauthClientSecret with AuthenticationError when id is set (M2M intent)', () => { - const opts: ConnectionOptions = { - host: 'example.cloud.databricks.com', - path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - oauthClientId: 'client-uuid', - oauthClientSecret: 'undefined', - }; - - expect(() => buildKernelConnectionOptions(opts)).to.throw( - AuthenticationError, - /oauthClientSecret.*non-empty.*OAuth M2M/, - ); - }); - - // Round-4 NF3-2: pin the exact class against the round-3 NF-N3 - // regression where M2M-with-empty-secret was routed through the U2M - // arm and raised a bare `HiveDriverError`. `instanceof - // AuthenticationError` correctly returns `false` for a bare - // `HiveDriverError` instance (instanceof is a one-way subclass - // check), so the subclass check IS sufficient to catch the - // regression. We don't add an `error.name` or `constructor.name` - // belt — the former requires `this.name` on the subclass (LE4-1 - // handles that separately for downstream-consumer benefit, not for - // this test), and the latter is bundler-fragile (terser/esbuild - // strip class names without `keep_classnames`). - it('M2M-with-empty-secret throws AuthenticationError, not bare HiveDriverError (class pin)', () => { - const opts: ConnectionOptions = { - host: 'example.cloud.databricks.com', - path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - oauthClientId: 'x', - oauthClientSecret: '', - }; + // Strict Thrift parity: flow = `oauthClientSecret === undefined ? U2M : M2M`, + // and OAuth fields are forwarded VERBATIM (no blank/reserved normalization), + // exactly as the Thrift driver does. A present-but-degenerate secret + // (`""` / whitespace / `"undefined"`) therefore counts as a real secret ⇒ M2M + // — byte-for-byte with Thrift (which only routes to U2M when the secret is + // strictly `undefined`). The id rides through as `oauthClientId ?? default`. + const m2mDegenerateSecret = [ + { label: 'reserved-literal "NULL"', secret: 'NULL' }, + { label: 'whitespace-only', secret: '\n\t' }, + { label: 'literal "undefined"', secret: 'undefined' }, + { label: 'empty string', secret: '' }, + ]; + for (const { label, secret } of m2mDegenerateSecret) { + it(`routes id + ${label} secret to M2M (secret !== undefined ⇒ M2M, like Thrift)`, () => { + const native = buildKernelConnectionOptions({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: secret, + } as ConnectionOptions); + expect(native.authMode).to.equal('OAuthM2m'); + expect((native as { oauthClientId?: string }).oauthClientId).to.equal('client-uuid'); + }); + } - expect(() => buildKernelConnectionOptions(opts)).to.throw( - AuthenticationError, - /oauthClientSecret.*non-empty.*OAuth M2M/, - ); - }); + // Degenerate ids on M2M are forwarded verbatim (`oauthClientId ?? default` + // keeps a non-nullish value), NOT rejected — matching Thrift's getClientId(). + for (const id of [' ', 'undefined']) { + it(`forwards a degenerate oauthClientId (${JSON.stringify(id)}) verbatim on M2M`, () => { + const native = buildKernelConnectionOptions({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: id, + oauthClientSecret: 'dose-fake-secret', + } as ConnectionOptions); + expect(native.authMode).to.equal('OAuthM2m'); + expect((native as { oauthClientId?: string }).oauthClientId).to.equal(id); + }); + } - // Round-5 DA4-2: the round-3 → round-4 test flips left the U2M-arm - // defense-in-depth U2M+id rejection without coverage. It's still - // reachable: when `oauthClientId` is a blank-reserved literal - // (whitespace, `"null"`, `"undefined"`) AND `oauthClientSecret` is - // absent/blank, BOTH `idIsBlank` and `secretIsBlank` are true so - // U2M wins routing — but a non-undefined id signals ambiguity that - // U2M cannot honor (the kernel hardcodes `databricks-cli`). - it('routes a whitespace oauthClientId with no oauthClientSecret to the U2M defense-in-depth rejection', () => { - const opts: ConnectionOptions = { + // No secret ⇒ U2M, and a degenerate id is forwarded verbatim too (Thrift's + // `oauthClientId ?? default` keeps a non-nullish whitespace id). + it('routes a whitespace oauthClientId with no secret to U2M, forwarding the id verbatim', () => { + const native = buildKernelConnectionOptions({ host: 'example.cloud.databricks.com', path: '/sql/1.0/warehouses/abc', authType: 'databricks-oauth', oauthClientId: ' ', - } as unknown as ConnectionOptions; - - expect(() => buildKernelConnectionOptions(opts)).to.throw( - HiveDriverError, - /oauthClientId.*not supported on the OAuth U2M flow/, - ); + } as unknown as ConnectionOptions); + expect(native.authMode).to.equal('OAuthU2m'); + expect((native as { oauthClientId?: string }).oauthClientId).to.equal(' '); }); }); @@ -260,41 +193,21 @@ describe('KernelAuth — edge cases (input validation + ambiguity)', () => { // `process.env.MY_SECRET || ''` shape) should route to U2M, not // to the M2M arm with an "empty secret" rejection. M2M's error // message would never mention U2M, leaving the user stuck. - it('routes blank oauthClientSecret to U2M (not to an M2M-blank-secret rejection)', () => { - const opts: ConnectionOptions = { - host: 'example.cloud.databricks.com', - path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - oauthClientSecret: '', - }; - - const native = buildKernelConnectionOptions(opts); - expect(native.authMode).to.equal('OAuthU2m'); - }); - - it('routes whitespace-only oauthClientSecret to U2M too', () => { - const opts: ConnectionOptions = { - host: 'example.cloud.databricks.com', - path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - oauthClientSecret: ' \t ', - }; - - const native = buildKernelConnectionOptions(opts); - expect(native.authMode).to.equal('OAuthU2m'); - }); - - it('routes literal-"undefined" oauthClientSecret to U2M too', () => { - const opts: ConnectionOptions = { - host: 'example.cloud.databricks.com', - path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - oauthClientSecret: 'undefined', - }; - - const native = buildKernelConnectionOptions(opts); - expect(native.authMode).to.equal('OAuthU2m'); - }); + // Strict Thrift parity: a present-but-degenerate secret (no id) is still a + // defined secret ⇒ M2M (only a strictly-`undefined` secret routes to U2M). + // With no id, the client defaults via `oauthClientId ?? default`. + for (const secret of ['', ' \t ', 'undefined']) { + it(`routes a degenerate-but-present oauthClientSecret (${JSON.stringify(secret)}, no id) to M2M`, () => { + const native = buildKernelConnectionOptions({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientSecret: secret, + } as ConnectionOptions); + expect(native.authMode).to.equal('OAuthM2m'); + expect((native as { oauthClientId?: string }).oauthClientId).to.equal('databricks-sql-connector'); + }); + } }); describe('explicit-undefined vs missing for Azure-direct discriminants', () => { diff --git a/tests/unit/kernel/auth-m2m.test.ts b/tests/unit/kernel/auth-m2m.test.ts index 365e1e68..7b55bcb2 100644 --- a/tests/unit/kernel/auth-m2m.test.ts +++ b/tests/unit/kernel/auth-m2m.test.ts @@ -17,7 +17,6 @@ import expectNativeConnectionOptions from './_helpers/nativeOptions'; import KernelBackend from '../../../lib/kernel/KernelBackend'; import { buildKernelConnectionOptions } from '../../../lib/kernel/KernelAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; -import AuthenticationError from '../../../lib/errors/AuthenticationError'; import HiveDriverError from '../../../lib/errors/HiveDriverError'; import { makeFakeBinding, makeFakeContext } from './_helpers/fakeBinding'; @@ -40,9 +39,34 @@ describe('KernelAuth + KernelBackend — OAuth M2M auth flow', () => { authMode: 'OAuthM2m', oauthClientId: 'client-uuid', oauthClientSecret: 'dose-fake-secret', + oauthScopes: ['all-apis'], }); }); + it('defaults M2M oauthScopes to all-apis (Thrift + kernel parity)', () => { + const native = buildKernelConnectionOptions({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + }); + expect(native.authMode).to.equal('OAuthM2m'); + expect((native as { oauthScopes?: string[] }).oauthScopes).to.deep.equal(['all-apis']); + }); + + it('honors a caller-supplied M2M oauthScopes override (parity with pyo3)', () => { + const native = buildKernelConnectionOptions({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthClientId: 'client-uuid', + oauthClientSecret: 'dose-fake-secret', + oauthScopes: ['sql', 'offline_access'], + } as ConnectionOptions); + expect((native as { oauthScopes?: string[] }).oauthScopes).to.deep.equal(['sql', 'offline_access']); + }); + it('prepends `/` to the path on the M2M branch too', () => { const opts: ConnectionOptions = { host: 'example.cloud.databricks.com', @@ -56,7 +80,7 @@ describe('KernelAuth + KernelBackend — OAuth M2M auth flow', () => { expect(native.httpPath).to.equal('/sql/1.0/warehouses/abc'); }); - it('rejects missing oauthClientId with AuthenticationError', () => { + it('defaults a missing oauthClientId to the default client on M2M (Thrift `?? default`)', () => { const opts = { host: 'example.cloud.databricks.com', path: '/sql/1.0/warehouses/abc', @@ -64,10 +88,14 @@ describe('KernelAuth + KernelBackend — OAuth M2M auth flow', () => { oauthClientSecret: 'dose-fake-secret', } as unknown as ConnectionOptions; - expect(() => buildKernelConnectionOptions(opts)).to.throw(AuthenticationError, /oauthClientId.*required/); + // Secret present ⇒ M2M; `oauthClientId ?? defaultClientId`, exactly like + // Thrift's getClientId(). (No upfront "id required" throw.) + const native = buildKernelConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthM2m'); + expect((native as { oauthClientId?: string }).oauthClientId).to.equal('databricks-sql-connector'); }); - it('rejects empty oauthClientId with AuthenticationError', () => { + it('forwards an empty oauthClientId verbatim on M2M (Thrift `??` keeps "")', () => { const opts = { host: 'example.cloud.databricks.com', path: '/sql/1.0/warehouses/abc', @@ -76,10 +104,14 @@ describe('KernelAuth + KernelBackend — OAuth M2M auth flow', () => { oauthClientSecret: 'dose-fake-secret', } as unknown as ConnectionOptions; - expect(() => buildKernelConnectionOptions(opts)).to.throw(AuthenticationError, /oauthClientId.*required/); + // `'' ?? default` === '' (nullish coalescing only guards null/undefined), + // so Thrift forwards the empty id; we match it verbatim — no normalization. + const native = buildKernelConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthM2m'); + expect((native as { oauthClientId?: string }).oauthClientId).to.equal(''); }); - it('rejects empty oauthClientSecret with AuthenticationError when oauthClientId is set (M2M intent)', () => { + it('routes id + empty secret to M2M (strict Thrift parity: secret !== undefined ⇒ M2M)', () => { const opts: ConnectionOptions = { host: 'example.cloud.databricks.com', path: '/sql/1.0/warehouses/abc', @@ -88,14 +120,11 @@ describe('KernelAuth + KernelBackend — OAuth M2M auth flow', () => { oauthClientSecret: '', }; - // Presence of `oauthClientId` signals M2M intent; an empty secret - // is a typo/missing-env, not a request to fall back to U2M. - // Surface the M2M "secret required" error so the user knows the - // real problem instead of getting routed to a different flow. - expect(() => buildKernelConnectionOptions(opts)).to.throw( - AuthenticationError, - /oauthClientSecret.*non-empty.*OAuth M2M/, - ); + // Routing is strict `oauthClientSecret === undefined ? U2M : M2M`, byte-for- + // byte with Thrift: a present-but-empty secret counts as a secret ⇒ M2M. + const native = buildKernelConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthM2m'); + expect((native as { oauthClientId?: string }).oauthClientId).to.equal('client-uuid'); }); it('rejects azureTenantId with a clear Entra-direct-out-of-scope error', () => { @@ -177,30 +206,31 @@ describe('KernelAuth + KernelBackend — OAuth M2M auth flow', () => { authMode: 'OAuthM2m', oauthClientId: 'client-uuid', oauthClientSecret: 'dose-fake-secret', + oauthScopes: ['all-apis'], }); await session.close(); await backend.close(); }); - it('rejects connect() for missing oauthClientId before touching the binding', async () => { + it('connect() with an M2M secret but no oauthClientId proceeds with the default client (Thrift parity)', async () => { const { binding, calls } = makeFakeBinding(); const backend = new KernelBackend({ nativeBinding: binding, context: makeFakeContext() }); - let caught: unknown; - try { - await backend.connect({ - host: 'example.cloud.databricks.com', - path: '/sql/1.0/warehouses/abc', - authType: 'databricks-oauth', - // eslint-disable-next-line @typescript-eslint/no-explicit-any - oauthClientSecret: 'dose-fake-secret', - } as any); - } catch (e) { - caught = e; - } - expect(caught).to.be.instanceOf(AuthenticationError); - expect(calls).to.have.lengthOf(0); + // Strict Thrift parity: secret present + no id ⇒ M2M with the default + // client (`oauthClientId ?? default`), not an upfront rejection. + await backend.connect({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + // eslint-disable-next-line @typescript-eslint/no-explicit-any + oauthClientSecret: 'dose-fake-secret', + } as any); + await backend.openSession({}); + + expect(calls).to.have.lengthOf(1); + expect((calls[0].args[0] as { authMode?: string; oauthClientId?: string }).authMode).to.equal('OAuthM2m'); + expect((calls[0].args[0] as { oauthClientId?: string }).oauthClientId).to.equal('databricks-sql-connector'); }); }); }); diff --git a/tests/unit/kernel/auth-u2m.test.ts b/tests/unit/kernel/auth-u2m.test.ts index 4cd7ef40..c21493d5 100644 --- a/tests/unit/kernel/auth-u2m.test.ts +++ b/tests/unit/kernel/auth-u2m.test.ts @@ -17,7 +17,6 @@ import expectNativeConnectionOptions from './_helpers/nativeOptions'; import KernelBackend from '../../../lib/kernel/KernelBackend'; import { buildKernelConnectionOptions } from '../../../lib/kernel/KernelAuth'; import { ConnectionOptions } from '../../../lib/contracts/IDBSQLClient'; -import AuthenticationError from '../../../lib/errors/AuthenticationError'; import HiveDriverError from '../../../lib/errors/HiveDriverError'; import { makeFakeBinding, makeFakeContext } from './_helpers/fakeBinding'; @@ -37,21 +36,47 @@ describe('KernelAuth + KernelBackend — OAuth U2M auth flow', () => { intervalsAsString: true, authMode: 'OAuthU2m', oauthRedirectPort: 8030, + oauthScopes: ['sql', 'offline_access'], }); }); - it('rejects oauthClientId without oauthClientSecret as M2M-with-missing-secret', () => { - // Round-4 NF3-2: presence of `oauthClientId` signals M2M intent. - // Routing now keys off the id (the "do I have an id?" signal), - // not the secret. A caller who supplies id but no secret gets the - // M2M "secret is required" error — the actionable message for the - // real problem (typo'd env var, forgot to export it, etc.). - // - // The U2M arm still has a defense-in-depth rejection of a stray - // `oauthClientId` (the kernel hardcodes `databricks-cli` for U2M); - // see [NF-2 / round-1 history]. That defense fires only when - // BOTH id and secret are blank — the M2M arm's stricter checks - // catch this typical caller-error shape first. + it('defaults U2M oauthScopes to Thrift parity (sql offline_access)', () => { + const native = buildKernelConnectionOptions({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + }); + expect(native.authMode).to.equal('OAuthU2m'); + // Matches the standalone Thrift driver's defaultOAuthScopes, NOT the + // kernel's bare `all-apis offline_access` default. + expect((native as { oauthScopes?: string[] }).oauthScopes).to.deep.equal(['sql', 'offline_access']); + }); + + it('honors a caller-supplied U2M oauthScopes override', () => { + const native = buildKernelConnectionOptions({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthScopes: ['all-apis'], + } as ConnectionOptions); + expect((native as { oauthScopes?: string[] }).oauthScopes).to.deep.equal(['all-apis']); + }); + + it('falls back to the default U2M scopes when oauthScopes is an empty array', () => { + const native = buildKernelConnectionOptions({ + host: 'example.cloud.databricks.com', + path: '/sql/1.0/warehouses/abc', + authType: 'databricks-oauth', + oauthScopes: [], + } as ConnectionOptions); + expect((native as { oauthScopes?: string[] }).oauthScopes).to.deep.equal(['sql', 'offline_access']); + }); + + it('routes oauthClientId + no secret to U2M with that id as a custom client (secret-based routing, Thrift parity)', () => { + // Routing keys off the SECRET (matching Thrift): no usable secret ⇒ U2M, + // regardless of the id. A non-blank `oauthClientId` is then honoured as a + // custom U2M client (Thrift forwards `options.oauthClientId` to its U2M + // flow too). (Old id-presence routing rejected this as M2M-missing-secret.) const opts: ConnectionOptions = { host: 'example.cloud.databricks.com', path: '/sql/1.0/warehouses/abc', @@ -59,10 +84,9 @@ describe('KernelAuth + KernelBackend — OAuth U2M auth flow', () => { oauthClientId: 'custom-client', }; - expect(() => buildKernelConnectionOptions(opts)).to.throw( - AuthenticationError, - /oauthClientSecret.*non-empty.*OAuth M2M/, - ); + const native = buildKernelConnectionOptions(opts); + expect(native.authMode).to.equal('OAuthU2m'); + expect((native as { oauthClientId?: string }).oauthClientId).to.equal('custom-client'); }); it('prepends `/` to the path on the U2M branch too', () => { @@ -143,6 +167,7 @@ describe('KernelAuth + KernelBackend — OAuth U2M auth flow', () => { intervalsAsString: true, authMode: 'OAuthU2m', oauthRedirectPort: 8030, + oauthScopes: ['sql', 'offline_access'], }); await session.close(); diff --git a/tests/unit/kernel/execution.test.ts b/tests/unit/kernel/execution.test.ts index d94c6e2e..101d698a 100644 --- a/tests/unit/kernel/execution.test.ts +++ b/tests/unit/kernel/execution.test.ts @@ -551,6 +551,36 @@ describe('KernelBackend', () => { }); }); + it('openSession() serializes session-level queryTags into sessionConf.QUERY_TAGS', async () => { + const connection = new FakeNativeConnection(); + const binding = makeBinding(connection); + const backend = new KernelBackend({ context: makeContext(), nativeBinding: binding }); + await backend.connect({ host: 'h', path: '/p', token: 't' } as ConnectionOptions); + + await backend.openSession({ queryTags: { team: 'eng', env: 'prod' } }); + + // Session-level tags land in the reserved QUERY_TAGS session conf (the + // kernel allowlists it → SEA CreateSession session_confs), mirroring Thrift. + const conf = (binding.openSessionStub.firstCall.args[0] as { sessionConf?: Record }).sessionConf; + expect(conf?.QUERY_TAGS).to.be.a('string'); + expect(conf?.QUERY_TAGS).to.contain('team:eng').and.to.contain('env:prod'); + }); + + it('openSession() queryTags takes precedence over an explicit configuration.QUERY_TAGS', async () => { + const connection = new FakeNativeConnection(); + const binding = makeBinding(connection); + const backend = new KernelBackend({ context: makeContext(), nativeBinding: binding }); + await backend.connect({ host: 'h', path: '/p', token: 't' } as ConnectionOptions); + + await backend.openSession({ + configuration: { QUERY_TAGS: 'manual-raw-value' }, + queryTags: { team: 'eng' }, + }); + + const conf = (binding.openSessionStub.firstCall.args[0] as { sessionConf?: Record }).sessionConf; + expect(conf?.QUERY_TAGS).to.contain('team:eng').and.to.not.equal('manual-raw-value'); + }); + it('openSession() returns a KernelSessionBackend wrapping the napi Connection', async () => { const connection = new FakeNativeConnection(); const binding = makeBinding(connection); @@ -798,25 +828,22 @@ describe('KernelSessionBackend', () => { } }); - // Genuinely unsupported on kernel — rejected (rather than silently ignored) so - // a caller/agent gets signal instead of a no-op. queryTags / queryTimeout / - // rowLimit are NOT here — they are forwarded (asserted above). - for (const { name, options, re } of [ - { name: 'useCloudFetch', options: { useCloudFetch: true }, re: /useCloudFetch/ }, - { name: 'useLZ4Compression', options: { useLZ4Compression: true }, re: /useLZ4Compression/ }, - { name: 'stagingAllowedLocalPath', options: { stagingAllowedLocalPath: '/tmp' }, re: /stagingAllowedLocalPath/ }, + // Unsupported-on-kernel per-statement HINTS are NO-OPs (not errors) so Thrift- + // targeted call sites are drop-in on the kernel path. They are kernel-governed + // perf/format hints (useCloudFetch / useLZ4Compression) or an unexposed feature + // (staging); ignoring them can't change results. (Parameter binding is NOT here + // — a dropped param would change results, so it still throws; asserted above.) + for (const { name, options } of [ + { name: 'useCloudFetch', options: { useCloudFetch: true } }, + { name: 'useLZ4Compression', options: { useLZ4Compression: true } }, + { name: 'stagingAllowedLocalPath', options: { stagingAllowedLocalPath: '/tmp' } }, ] as const) { - it(`executeStatement rejects ${name} rather than silently ignoring it`, async () => { + it(`executeStatement ignores unsupported ${name} (no-op, does not throw)`, async () => { const connection = new FakeNativeConnection(); const session = makeSession(connection); - let thrown: unknown; - try { - await session.executeStatement('SELECT 1', options); - } catch (err) { - thrown = err; - } - expect(thrown).to.be.instanceOf(HiveDriverError); - expect((thrown as Error).message).to.match(re); + // Must resolve to an operation backend — the option is silently ignored. + const op = await session.executeStatement('SELECT 1', options); + expect(op, `${name} should be a no-op`).to.exist; }); }