From 1499ffc7384a375808af38784e4bd22ac3ebe28c Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 11 May 2026 03:47:36 +0000 Subject: [PATCH 1/4] feat(clickhouse): expose LowCardinality/FixedString/CODEC/SAMPLE BY via Feature\OLAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds four OLAP-shaped column and table modifiers to the ClickHouse dialect, exposed through a new `Feature\OLAP` marker interface so the methods are reachable only from the dialect's typed Column/Table subclasses — not from `MySQL`, `PostgreSQL`, `SQLite`, or `MongoDB` builders. - `Column::lowCardinality()` wraps the column type in `LowCardinality(...)`. `Nullable` is applied outside to keep ClickHouse's required wrapping order. - `Table::fixedString($name, $length)` (with a Column-chain forwarder) adds a `FixedString(N)` column for fixed-length values like ISO codes and hash digests. - `Column::codec($spec)` accumulates one or more `CODEC(...)` entries on the column. Multiple calls produce `CODEC(c1, c2, ...)`. - `Table::sampleBy($expression)` (with a Column-chain forwarder) registers a `SAMPLE BY` clause emitted between `ORDER BY` and `TTL` / `SETTINGS`. Rejected on engines that don't take an `ORDER BY` clause. State for `isLowCardinality`, `codecs`, and `sampleBy` lives on `Column\ClickHouse` / `Table\ClickHouse`, so non-OLAP dialects don't expose the methods at all and don't carry the state. The `FixedString` `ColumnType` case is only produced via `Table\ClickHouse::fixedString()`; other dialects' `compileColumnType()` declare a defensive `UnsupportedException` branch to satisfy match exhaustiveness even though the case is unreachable from their builders. --- src/Query/Schema/ClickHouse.php | 21 ++- src/Query/Schema/Column/ClickHouse.php | 49 +++++ src/Query/Schema/ColumnType.php | 1 + src/Query/Schema/Feature/OLAP.php | 18 ++ src/Query/Schema/Forwarder/ClickHouse.php | 9 + src/Query/Schema/MongoDB.php | 1 + src/Query/Schema/MySQL.php | 1 + src/Query/Schema/PostgreSQL.php | 1 + src/Query/Schema/SQLite.php | 1 + src/Query/Schema/Table/ClickHouse.php | 49 +++++ tests/Query/Schema/ClickHouseTest.php | 218 ++++++++++++++++++++++ 11 files changed, 368 insertions(+), 1 deletion(-) create mode 100644 src/Query/Schema/Feature/OLAP.php diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index ced5544..1e5a082 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -11,10 +11,11 @@ use Utopia\Query\Schema\Feature\ColumnComments; use Utopia\Query\Schema\Feature\Databases; use Utopia\Query\Schema\Feature\DropPartition; +use Utopia\Query\Schema\Feature\OLAP; use Utopia\Query\Schema\Feature\TableComments; use Utopia\Query\Schema\Feature\Views; -class ClickHouse extends Schema implements TableComments, ColumnComments, DropPartition, Views, Databases +class ClickHouse extends Schema implements TableComments, ColumnComments, DropPartition, Views, Databases, OLAP { use QuotesIdentifiers; use Trait\Databases; @@ -34,6 +35,7 @@ protected function compileColumnType(Column $column): string $type = match ($column->type) { ColumnType::String, ColumnType::Varchar, ColumnType::Relationship => 'String', + ColumnType::FixedString => 'FixedString(' . ($column->length ?? throw new ValidationException('FixedString requires a length.')) . ')', ColumnType::Text => 'String', ColumnType::MediumText, ColumnType::LongText => 'String', ColumnType::Integer => $column->isUnsigned ? 'UInt32' : 'Int32', @@ -53,6 +55,10 @@ protected function compileColumnType(Column $column): string ColumnType::Serial, ColumnType::BigSerial, ColumnType::SmallSerial => throw new UnsupportedException('SERIAL types are not supported in ClickHouse.'), }; + if ($column instanceof Column\ClickHouse && $column->isLowCardinality) { + $type = 'LowCardinality(' . $type . ')'; + } + if ($column->isNullable) { $type = 'Nullable(' . $type . ')'; } @@ -89,6 +95,10 @@ protected function compileColumnDefinition(Column $column): string $parts[] = 'DEFAULT ' . $this->compileDefaultValue($column->default); } + if ($column instanceof Column\ClickHouse && $column->codecs !== []) { + $parts[] = 'CODEC(' . \implode(', ', $column->codecs) . ')'; + } + if ($column->ttl !== null) { $parts[] = 'TTL ' . $column->ttl; } @@ -226,6 +236,15 @@ public function compileCreate(Table $table, bool $ifNotExists = false): Statemen : ' ORDER BY tuple()'; } + if ($table instanceof Table\ClickHouse && $table->sampleBy !== null) { + if (! $engine->requiresOrderBy()) { + throw new UnsupportedException( + 'SAMPLE BY is only supported on engines that take an ORDER BY clause.' + ); + } + $sql .= ' SAMPLE BY ' . $table->sampleBy; + } + if ($table->ttl !== null) { $sql .= ' TTL ' . $table->ttl; } diff --git a/src/Query/Schema/Column/ClickHouse.php b/src/Query/Schema/Column/ClickHouse.php index 14528a9..2b08e3a 100644 --- a/src/Query/Schema/Column/ClickHouse.php +++ b/src/Query/Schema/Column/ClickHouse.php @@ -2,6 +2,7 @@ namespace Utopia\Query\Schema\Column; +use Utopia\Query\Exception\ValidationException; use Utopia\Query\Schema\Column; use Utopia\Query\Schema\Forwarder; use Utopia\Query\Schema\Table; @@ -13,6 +14,11 @@ class ClickHouse extends Column { use Forwarder\ClickHouse; + public protected(set) bool $isLowCardinality = false; + + /** @var list Column-level CODEC clauses, e.g. ['Delta(4)', 'LZ4'] */ + public protected(set) array $codecs = []; + /** * @param list $columns * @@ -28,4 +34,47 @@ public function primary(array $columns = []): static|Table return $this->table->primary($columns); } + + /** + * Wrap the column type in `LowCardinality(...)`. + * + * Suitable for string columns with a small number of distinct values + * (status enums, type discriminators, country codes). `Nullable` is + * applied outside `LowCardinality` to match ClickHouse's required + * wrapping order: `Nullable(LowCardinality(String))`. + */ + public function lowCardinality(): static + { + $this->isLowCardinality = true; + + return $this; + } + + /** + * Append a column-level CODEC clause. + * + * Multiple calls accumulate and emit `CODEC(c1, c2, ...)`. Pass either + * a bare codec name (`->codec('LZ4')`) or one with arguments + * (`->codec('Delta(4)')`, `->codec('ZSTD(3)')`). The codec string is + * emitted verbatim and must come from a trusted source. + * + * @throws ValidationException if the codec string is empty or contains + * a semicolon. + */ + public function codec(string $codec): static + { + $trimmed = \trim($codec); + + if ($trimmed === '') { + throw new ValidationException('CODEC expression must not be empty.'); + } + + if (\str_contains($trimmed, ';')) { + throw new ValidationException('CODEC expression must not contain ";".'); + } + + $this->codecs[] = $trimmed; + + return $this; + } } diff --git a/src/Query/Schema/ColumnType.php b/src/Query/Schema/ColumnType.php index a7ff7a1..173bb51 100644 --- a/src/Query/Schema/ColumnType.php +++ b/src/Query/Schema/ColumnType.php @@ -5,6 +5,7 @@ enum ColumnType: string { case String = 'string'; + case FixedString = 'fixedstring'; case Varchar = 'varchar'; case Text = 'text'; case MediumText = 'mediumtext'; diff --git a/src/Query/Schema/Feature/OLAP.php b/src/Query/Schema/Feature/OLAP.php new file mode 100644 index 0000000..3468b87 --- /dev/null +++ b/src/Query/Schema/Feature/OLAP.php @@ -0,0 +1,18 @@ +table->vector($name, $dimensions); } + public function fixedString(string $name, int $length): Column\ClickHouse + { + return $this->table->fixedString($name, $length); + } + public function engine(Engine $engine, string ...$args): Table\ClickHouse { return $this->table->engine($engine, ...$args); @@ -44,4 +49,8 @@ public function partitionBy(string $expression): Table\ClickHouse return $this->table->partitionBy($expression); } + public function sampleBy(string $expression): Table\ClickHouse + { + return $this->table->sampleBy($expression); + } } diff --git a/src/Query/Schema/MongoDB.php b/src/Query/Schema/MongoDB.php index fc504ee..2ba9577 100644 --- a/src/Query/Schema/MongoDB.php +++ b/src/Query/Schema/MongoDB.php @@ -45,6 +45,7 @@ protected function compileColumnType(Column $column): string ColumnType::Linestring, ColumnType::Polygon => 'object', ColumnType::Uuid7 => 'string', ColumnType::Vector => 'array', + ColumnType::FixedString => throw new UnsupportedException('FixedString type is not supported in MongoDB.'), }; } diff --git a/src/Query/Schema/MySQL.php b/src/Query/Schema/MySQL.php index 4ea2d66..e6063d2 100644 --- a/src/Query/Schema/MySQL.php +++ b/src/Query/Schema/MySQL.php @@ -73,6 +73,7 @@ protected function compileColumnType(Column $column): string ColumnType::Polygon => 'POLYGON' . ($column->srid !== null ? ' SRID ' . $column->srid : ''), ColumnType::Uuid7 => 'VARCHAR(36)', ColumnType::Vector => throw new UnsupportedException('Vector type is not supported in MySQL.'), + ColumnType::FixedString => throw new UnsupportedException('FixedString type is not supported in MySQL.'), }; } diff --git a/src/Query/Schema/PostgreSQL.php b/src/Query/Schema/PostgreSQL.php index 5bdf04e..94fe682 100644 --- a/src/Query/Schema/PostgreSQL.php +++ b/src/Query/Schema/PostgreSQL.php @@ -79,6 +79,7 @@ protected function compileColumnType(Column $column): string ColumnType::Serial => 'SERIAL', ColumnType::BigSerial => 'BIGSERIAL', ColumnType::SmallSerial => 'SMALLSERIAL', + ColumnType::FixedString => throw new UnsupportedException('FixedString type is not supported in PostgreSQL.'), }; } diff --git a/src/Query/Schema/SQLite.php b/src/Query/Schema/SQLite.php index 868c66a..ae7741a 100644 --- a/src/Query/Schema/SQLite.php +++ b/src/Query/Schema/SQLite.php @@ -36,6 +36,7 @@ protected function compileColumnType(Column $column): string ColumnType::Point, ColumnType::Linestring, ColumnType::Polygon => 'TEXT', ColumnType::Uuid7 => 'VARCHAR(36)', ColumnType::Vector => throw new UnsupportedException('Vector type is not supported in SQLite.'), + ColumnType::FixedString => throw new UnsupportedException('FixedString type is not supported in SQLite.'), }; } diff --git a/src/Query/Schema/Table/ClickHouse.php b/src/Query/Schema/Table/ClickHouse.php index da9b392..d9f3d5e 100644 --- a/src/Query/Schema/Table/ClickHouse.php +++ b/src/Query/Schema/Table/ClickHouse.php @@ -16,6 +16,9 @@ class ClickHouse extends Table { use Trait\CompositePrimary; + /** ClickHouse SAMPLE BY expression. Emitted after ORDER BY when set. */ + public protected(set) ?string $sampleBy = null; + #[\Override] protected function newColumn(string $name, ColumnType $type, ?int $length = null, ?int $precision = null): Column\ClickHouse { @@ -31,6 +34,28 @@ public function vector(string $name, int $dimensions): Column\ClickHouse return $col; } + /** + * Add a `FixedString(N)` column. + * + * Used for fixed-length string values whose byte length is known and + * constant — ISO 3166 country codes, ISO 4217 currency codes, hash + * digests, and similar values that benefit from ClickHouse's columnar + * storage of fixed-width data. + * + * @throws ValidationException if $length is less than 1. + */ + public function fixedString(string $name, int $length): Column\ClickHouse + { + if ($length < 1) { + throw new ValidationException('FixedString length must be at least 1.'); + } + + $col = $this->newColumn($name, ColumnType::FixedString, $length); + $this->columns[] = $col; + + return $col; + } + /** * Select the table engine. Engine-specific arguments are validated against * the engine variant: @@ -158,4 +183,28 @@ public function partitionBy(string $expression): static return $this; } + + /** + * Set the SAMPLE BY expression. Emitted after ORDER BY at table creation + * time. Required to model tables that need approximate-query support via + * `SELECT ... SAMPLE k` on MergeTree-family engines. + * + * @throws ValidationException if the expression is empty or contains a semicolon. + */ + public function sampleBy(string $expression): static + { + $trimmed = \trim($expression); + + if ($trimmed === '') { + throw new ValidationException('SAMPLE BY expression must not be empty.'); + } + + if (\str_contains($trimmed, ';')) { + throw new ValidationException('SAMPLE BY expression must not contain ";".'); + } + + $this->sampleBy = $trimmed; + + return $this; + } } diff --git a/tests/Query/Schema/ClickHouseTest.php b/tests/Query/Schema/ClickHouseTest.php index 468b3ce..80a1a55 100644 --- a/tests/Query/Schema/ClickHouseTest.php +++ b/tests/Query/Schema/ClickHouseTest.php @@ -945,4 +945,222 @@ public function testAlterRejectsSettings(): void ->settings(['index_granularity' => 4096]) ->alter(); } + + public function testCreateTableFixedStringColumn(): void + { + $schema = new Schema(); + $result = $schema->table('locations') + ->bigInteger('id')->primary() + ->fixedString('country_code', 2) + ->fixedString('currency_code', 3) + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `locations` (`id` Int64, `country_code` FixedString(2), `currency_code` FixedString(3)) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testCreateTableFixedStringNullable(): void + { + $schema = new Schema(); + $result = $schema->table('t') + ->fixedString('hash', 32)->nullable() + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `t` (`hash` Nullable(FixedString(32))) ENGINE = MergeTree() ORDER BY tuple()', + $result->query, + ); + } + + public function testFixedStringRejectsZeroLength(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->table('t')->fixedString('bad', 0); + } + + public function testCreateTableLowCardinalityColumn(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('status')->lowCardinality() + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `status` LowCardinality(String)) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testCreateTableLowCardinalityNullableWrapsInBothOrder(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('status')->lowCardinality()->nullable() + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `status` Nullable(LowCardinality(String))) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testAlterAddLowCardinalityColumn(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->addColumn('country', ColumnType::String)->lowCardinality() + ->alter(); + $this->assertBindingCount($result); + + $this->assertSame( + 'ALTER TABLE `events` ADD COLUMN `country` LowCardinality(String)', + $result->query, + ); + } + + public function testCreateTableColumnWithSingleCodec(): void + { + $schema = new Schema(); + $result = $schema->table('metrics') + ->bigInteger('id')->primary() + ->datetime('ts', 3)->codec('LZ4') + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `metrics` (`id` Int64, `ts` DateTime64(3) CODEC(LZ4)) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testCreateTableColumnWithMultipleCodecs(): void + { + $schema = new Schema(); + $result = $schema->table('metrics') + ->bigInteger('id')->primary() + ->datetime('ts', 3)->codec('Delta(4)')->codec('LZ4') + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `metrics` (`id` Int64, `ts` DateTime64(3) CODEC(Delta(4), LZ4)) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testCodecOrderingRelativeToTtlAndComment(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('payload') + ->codec('ZSTD(3)') + ->ttl('ts + INTERVAL 30 DAY') + ->comment('Compressed payload') + ->datetime('ts') + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64,' + . ' `payload` String CODEC(ZSTD(3)) TTL ts + INTERVAL 30 DAY COMMENT \'Compressed payload\',' + . ' `ts` DateTime) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testCodecRejectsEmpty(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->table('t') + ->integer('id')->codec(''); + } + + public function testCodecRejectsSemicolon(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->table('t') + ->integer('id')->codec('LZ4;'); + } + + public function testCreateTableWithSampleBy(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->bigInteger('user_id')->unsigned() + ->sampleBy('user_id') + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `user_id` UInt64) ENGINE = MergeTree() ORDER BY (`id`) SAMPLE BY user_id', + $result->query, + ); + } + + public function testCreateTableSampleByOrderingWithTtlAndSettings(): void + { + $schema = new Schema(); + $table = $schema->table('events'); + $table->bigInteger('id')->primary(); + $table->datetime('created_at'); + $result = $table + ->sampleBy('id') + ->ttl('`created_at` + INTERVAL 30 DAY') + ->settings(['index_granularity' => 4096]) + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `created_at` DateTime) ENGINE = MergeTree() ORDER BY (`id`)' + . ' SAMPLE BY id' + . ' TTL `created_at` + INTERVAL 30 DAY' + . ' SETTINGS index_granularity = 4096', + $result->query, + ); + } + + public function testSampleByRejectsEmpty(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->table('events')->sampleBy(''); + } + + public function testSampleByRejectsSemicolon(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->table('events')->sampleBy('id;'); + } + + public function testSampleByRejectedOnEnginesWithoutOrderBy(): void + { + $this->expectException(UnsupportedException::class); + $this->expectExceptionMessage('SAMPLE BY'); + + $schema = new Schema(); + $schema->table('cache') + ->integer('id')->primary() + ->engine(Engine::Memory) + ->sampleBy('id') + ->create(); + } } From 825c507f1a78a3322b4f78641237762b720e93f5 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 11 May 2026 03:47:42 +0000 Subject: [PATCH 2/4] docs(readme): document OLAP-scoped LowCardinality/FixedString/CODEC/SAMPLE BY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds four sections to the ClickHouse Schema chapter covering the new `Feature\OLAP` modifiers. The narrative makes clear that the methods are dialect-scoped at the type level — calling them on `MySQL`, `PostgreSQL`, `SQLite`, or `MongoDB` builders is a compile-time error, not a runtime throw. Also extends the ClickHouse "Supports the ... interfaces" line to list `Views`, `Databases`, and `OLAP` alongside the existing entries. --- README.md | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 72e2640..051a90e 100644 --- a/README.md +++ b/README.md @@ -2033,7 +2033,7 @@ $result = $schema->table('events') ClickHouse uses `Nullable(type)` wrapping for nullable columns, `Enum8(...)` for enums, `Tuple(Float64, Float64)` for points, and `TYPE minmax GRANULARITY 3` for indexes. Foreign keys, stored procedures, triggers, generated columns, and CHECK constraints throw `UnsupportedException`. -Supports the `TableComments`, `ColumnComments`, and `DropPartition` interfaces. +Supports the `TableComments`, `ColumnComments`, `DropPartition`, `Views`, `Databases`, and `OLAP` interfaces. **Engine selection** — choose from 10 variants of the `Engine` enum: @@ -2130,6 +2130,73 @@ $schema->table('events') Setting names must match `[A-Za-z_][A-Za-z0-9_]*`; string values are restricted to `[A-Za-z0-9_.\-+/]*`. Use ints / floats / booleans for everything else. Other dialects ignore the call. +**LowCardinality** — wrap a column type in `LowCardinality(...)` for compact dictionary-encoded storage on string columns with a small number of distinct values (status enums, type discriminators, country codes, category labels): + +```php +$schema->table('events') + ->bigInteger('id')->primary() + ->string('status')->lowCardinality() + ->string('country')->lowCardinality()->nullable() + ->create(); + +// CREATE TABLE `events` (`id` Int64, `status` LowCardinality(String), +// `country` Nullable(LowCardinality(String))) ENGINE = MergeTree() ORDER BY (`id`) +``` + +`Nullable` is applied outside `LowCardinality` to match ClickHouse's required wrapping order. The `lowCardinality()` method is only available on the ClickHouse builder — callers on other dialects (`MySQL`, `PostgreSQL`, `SQLite`, `MongoDB`) cannot reach this method at all. + +**FixedString(N)** — fixed-length string column. Use for ISO codes, hash digests, and other values whose byte length is known and constant: + +```php +$schema->table('locations') + ->bigInteger('id')->primary() + ->fixedString('country_code', 2) // ISO 3166-1 alpha-2 + ->fixedString('currency_code', 3) // ISO 4217 + ->fixedString('digest', 32) // raw MD5 + ->create(); + +// CREATE TABLE `locations` (`id` Int64, `country_code` FixedString(2), +// `currency_code` FixedString(3), `digest` FixedString(32)) +// ENGINE = MergeTree() ORDER BY (`id`) +``` + +Length must be at least 1. The `fixedString()` method is only available on the ClickHouse builder — the type has no portable mapping. + +**Column-level CODEC** — append one or more compression codecs to a column. Multiple `codec()` calls accumulate and emit `CODEC(c1, c2, ...)`: + +```php +$schema->table('metrics') + ->bigInteger('id')->primary() + ->datetime('ts', 3)->codec('Delta(4)')->codec('LZ4') // monotonic timestamps + ->bigInteger('value')->codec('T64')->codec('LZ4') // integer column + ->string('payload')->codec('ZSTD(3)') // text column + ->create(); + +// CREATE TABLE `metrics` (`id` Int64, +// `ts` DateTime64(3) CODEC(Delta(4), LZ4), +// `value` Int64 CODEC(T64, LZ4), +// `payload` String CODEC(ZSTD(3))) ENGINE = MergeTree() ORDER BY (`id`) +``` + +Each codec string is emitted verbatim; supply codec arguments inline (`'Delta(4)'`, `'ZSTD(3)'`). Codec strings must not be empty or contain a semicolon. The `codec()` method is only available on the ClickHouse builder. + +**SAMPLE BY** — declare a sampling expression for approximate-query support (`SELECT ... SAMPLE k`). Emitted after `ORDER BY` and before `TTL` / `SETTINGS`: + +```php +$schema->table('events') + ->bigInteger('id')->primary() + ->bigInteger('user_id')->unsigned() + ->sampleBy('user_id') + ->create(); + +// CREATE TABLE `events` (`id` Int64, `user_id` UInt64) ENGINE = MergeTree() +// ORDER BY (`id`) SAMPLE BY user_id +``` + +The expression is emitted verbatim and must not be empty or contain a semicolon. `SAMPLE BY` only applies to engines that take an `ORDER BY` clause (the MergeTree family); using it with `Memory`, `Log`, `TinyLog`, or `StripeLog` throws `UnsupportedException`. The `sampleBy()` method is only available on the ClickHouse builder. + +These OLAP-shaped modifiers are exposed on the ClickHouse dialect via the `Feature\OLAP` marker interface. Dialect-specific Column/Table subclasses surface the methods only when the underlying dialect implements the feature — so calling `->lowCardinality()` or `->sampleBy()` on a `MySQL`, `PostgreSQL`, `SQLite`, or `MongoDB` builder fails at the type level, with no runtime branch needed. + ### SQLite Schema ```php From 951783cbf7d7de8da36c4927cb7ef48f977ccbb2 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 11 May 2026 05:49:15 +0000 Subject: [PATCH 3/4] refactor(clickhouse): scope FixedString to ClickHouse dialect, drop from global ColumnType MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes `ColumnType::FixedString` from the cross-dialect enum. FixedString state now lives on `Column\ClickHouse` (via `asFixedString()` / `isFixedString()` / `$fixedStringLength`), and `Schema\ClickHouse::compileColumnType()` reads that state to emit `FixedString(N)` DDL. `Table\ClickHouse::fixedString()` now registers a `ColumnType::String` column and tags it with the FixedString state, so the global enum carries no ClickHouse-only cases and the other dialects (`MySQL`, `PostgreSQL`, `SQLite`, `MongoDB`) no longer need `UnsupportedException` match branches — their `compileColumnType()` methods are byte-identical to `main`. `Feature\OLAP` remains a marker interface matching the dialect-shape pattern (OLAP modifiers live on the column/table builder, not on `Schema`, so they cannot be expressed as a Schema-level method contract); docblock updated to explain why and to confirm the non-OLAP dialects are unchanged by construction. Compiled DDL bytes for ClickHouse are unchanged; all 5175 tests pass; lint and PHPStan max are clean. --- src/Query/Schema/ClickHouse.php | 15 ++++++++++++- src/Query/Schema/Column/ClickHouse.php | 29 ++++++++++++++++++++++++++ src/Query/Schema/ColumnType.php | 1 - src/Query/Schema/Feature/OLAP.php | 12 +++++++++-- src/Query/Schema/MongoDB.php | 1 - src/Query/Schema/MySQL.php | 1 - src/Query/Schema/PostgreSQL.php | 1 - src/Query/Schema/SQLite.php | 1 - src/Query/Schema/Table/ClickHouse.php | 12 ++++++----- 9 files changed, 60 insertions(+), 13 deletions(-) diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index 1e5a082..59aaea6 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -33,9 +33,22 @@ protected function compileColumnType(Column $column): string throw new UnsupportedException('User-defined types are not supported in ClickHouse.'); } + if ($column instanceof Column\ClickHouse && $column->isFixedString()) { + $type = 'FixedString(' . $column->fixedStringLength . ')'; + + if ($column->isLowCardinality) { + $type = 'LowCardinality(' . $type . ')'; + } + + if ($column->isNullable) { + $type = 'Nullable(' . $type . ')'; + } + + return $type; + } + $type = match ($column->type) { ColumnType::String, ColumnType::Varchar, ColumnType::Relationship => 'String', - ColumnType::FixedString => 'FixedString(' . ($column->length ?? throw new ValidationException('FixedString requires a length.')) . ')', ColumnType::Text => 'String', ColumnType::MediumText, ColumnType::LongText => 'String', ColumnType::Integer => $column->isUnsigned ? 'UInt32' : 'Int32', diff --git a/src/Query/Schema/Column/ClickHouse.php b/src/Query/Schema/Column/ClickHouse.php index 2b08e3a..a056f42 100644 --- a/src/Query/Schema/Column/ClickHouse.php +++ b/src/Query/Schema/Column/ClickHouse.php @@ -16,9 +16,38 @@ class ClickHouse extends Column public protected(set) bool $isLowCardinality = false; + /** Length when the column should be emitted as `FixedString(N)`; null otherwise. */ + public protected(set) ?int $fixedStringLength = null; + /** @var list Column-level CODEC clauses, e.g. ['Delta(4)', 'LZ4'] */ public protected(set) array $codecs = []; + /** + * Mark the column as `FixedString(N)`. + * + * Used by {@see Table\ClickHouse::fixedString()} to attach the + * ClickHouse-specific FixedString width to a column whose generic + * {@see \Utopia\Query\Schema\ColumnType} is `String`. The compiler reads + * this state when emitting DDL. + * + * @throws ValidationException if $length is less than 1. + */ + public function asFixedString(int $length): static + { + if ($length < 1) { + throw new ValidationException('FixedString length must be at least 1.'); + } + + $this->fixedStringLength = $length; + + return $this; + } + + public function isFixedString(): bool + { + return $this->fixedStringLength !== null; + } + /** * @param list $columns * diff --git a/src/Query/Schema/ColumnType.php b/src/Query/Schema/ColumnType.php index 173bb51..a7ff7a1 100644 --- a/src/Query/Schema/ColumnType.php +++ b/src/Query/Schema/ColumnType.php @@ -5,7 +5,6 @@ enum ColumnType: string { case String = 'string'; - case FixedString = 'fixedstring'; case Varchar = 'varchar'; case Text = 'text'; case MediumText = 'mediumtext'; diff --git a/src/Query/Schema/Feature/OLAP.php b/src/Query/Schema/Feature/OLAP.php index 3468b87..9535ade 100644 --- a/src/Query/Schema/Feature/OLAP.php +++ b/src/Query/Schema/Feature/OLAP.php @@ -6,12 +6,20 @@ * Marker for dialects that expose OLAP-shaped column and table modifiers * (`LowCardinality`, `FixedString`, column-level `CODEC`, `SAMPLE BY`). * - * The modifier methods live on the dialect's `Table` / `Column` subclasses - * (e.g. {@see \Utopia\Query\Schema\Table\ClickHouse}, + * Unlike sibling `Feature/*` interfaces — which declare schema-level method + * signatures because their operations are emitted as standalone statements — + * OLAP modifiers are intrinsic to a dialect's column/table builder shape and + * cannot be expressed as `Schema` methods. They live on the dialect's + * `Table` / `Column` subclasses (e.g. {@see \Utopia\Query\Schema\Table\ClickHouse}, * {@see \Utopia\Query\Schema\Column\ClickHouse}) and the corresponding * `Forwarder` trait, so callers can only chain them when the underlying * dialect supports them — the methods aren't reachable from non-OLAP * dialects at the type level. + * + * Non-OLAP dialects therefore have nothing to handle or throw from: the + * cross-dialect `ColumnType` enum carries no OLAP-only cases, and the + * `compileColumnType()` implementations on `MySQL` / `PostgreSQL` / `SQLite` / + * `MongoDB` are byte-identical to their pre-OLAP form. */ interface OLAP { diff --git a/src/Query/Schema/MongoDB.php b/src/Query/Schema/MongoDB.php index 2ba9577..fc504ee 100644 --- a/src/Query/Schema/MongoDB.php +++ b/src/Query/Schema/MongoDB.php @@ -45,7 +45,6 @@ protected function compileColumnType(Column $column): string ColumnType::Linestring, ColumnType::Polygon => 'object', ColumnType::Uuid7 => 'string', ColumnType::Vector => 'array', - ColumnType::FixedString => throw new UnsupportedException('FixedString type is not supported in MongoDB.'), }; } diff --git a/src/Query/Schema/MySQL.php b/src/Query/Schema/MySQL.php index e6063d2..4ea2d66 100644 --- a/src/Query/Schema/MySQL.php +++ b/src/Query/Schema/MySQL.php @@ -73,7 +73,6 @@ protected function compileColumnType(Column $column): string ColumnType::Polygon => 'POLYGON' . ($column->srid !== null ? ' SRID ' . $column->srid : ''), ColumnType::Uuid7 => 'VARCHAR(36)', ColumnType::Vector => throw new UnsupportedException('Vector type is not supported in MySQL.'), - ColumnType::FixedString => throw new UnsupportedException('FixedString type is not supported in MySQL.'), }; } diff --git a/src/Query/Schema/PostgreSQL.php b/src/Query/Schema/PostgreSQL.php index 94fe682..5bdf04e 100644 --- a/src/Query/Schema/PostgreSQL.php +++ b/src/Query/Schema/PostgreSQL.php @@ -79,7 +79,6 @@ protected function compileColumnType(Column $column): string ColumnType::Serial => 'SERIAL', ColumnType::BigSerial => 'BIGSERIAL', ColumnType::SmallSerial => 'SMALLSERIAL', - ColumnType::FixedString => throw new UnsupportedException('FixedString type is not supported in PostgreSQL.'), }; } diff --git a/src/Query/Schema/SQLite.php b/src/Query/Schema/SQLite.php index ae7741a..868c66a 100644 --- a/src/Query/Schema/SQLite.php +++ b/src/Query/Schema/SQLite.php @@ -36,7 +36,6 @@ protected function compileColumnType(Column $column): string ColumnType::Point, ColumnType::Linestring, ColumnType::Polygon => 'TEXT', ColumnType::Uuid7 => 'VARCHAR(36)', ColumnType::Vector => throw new UnsupportedException('Vector type is not supported in SQLite.'), - ColumnType::FixedString => throw new UnsupportedException('FixedString type is not supported in SQLite.'), }; } diff --git a/src/Query/Schema/Table/ClickHouse.php b/src/Query/Schema/Table/ClickHouse.php index d9f3d5e..0ca5a91 100644 --- a/src/Query/Schema/Table/ClickHouse.php +++ b/src/Query/Schema/Table/ClickHouse.php @@ -42,15 +42,17 @@ public function vector(string $name, int $dimensions): Column\ClickHouse * digests, and similar values that benefit from ClickHouse's columnar * storage of fixed-width data. * + * The column is registered with the generic `ColumnType::String` type and + * tagged with FixedString state on {@see Column\ClickHouse}; the compiler + * reads that state when emitting DDL, so the global `ColumnType` enum + * stays free of ClickHouse-only cases. + * * @throws ValidationException if $length is less than 1. */ public function fixedString(string $name, int $length): Column\ClickHouse { - if ($length < 1) { - throw new ValidationException('FixedString length must be at least 1.'); - } - - $col = $this->newColumn($name, ColumnType::FixedString, $length); + $col = $this->newColumn($name, ColumnType::String, $length); + $col->asFixedString($length); $this->columns[] = $col; return $col; From d408b3e40f0dc11f7739df0268bb26993c5c1ff3 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Mon, 11 May 2026 06:05:19 +0000 Subject: [PATCH 4/4] refactor(clickhouse): drop empty Feature\OLAP marker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The interface declared no methods, was inspected nowhere, and pulled no weight at runtime or in the type system. Every sibling in `Feature/*` declares Statement-returning method signatures, but OLAP modifiers are intrinsic to the column/table builder shape and can't be expressed at the Schema level. Dialect-scoping is fully preserved by `Column\ClickHouse` / `Table\ClickHouse` / `Forwarder\ClickHouse` carrying the modifier methods natively — calling them on a non-ClickHouse builder is a clean type-system error, not a runtime exception. --- README.md | 4 ++-- src/Query/Schema/ClickHouse.php | 3 +-- src/Query/Schema/Feature/OLAP.php | 26 -------------------------- 3 files changed, 3 insertions(+), 30 deletions(-) delete mode 100644 src/Query/Schema/Feature/OLAP.php diff --git a/README.md b/README.md index 051a90e..1793450 100644 --- a/README.md +++ b/README.md @@ -2033,7 +2033,7 @@ $result = $schema->table('events') ClickHouse uses `Nullable(type)` wrapping for nullable columns, `Enum8(...)` for enums, `Tuple(Float64, Float64)` for points, and `TYPE minmax GRANULARITY 3` for indexes. Foreign keys, stored procedures, triggers, generated columns, and CHECK constraints throw `UnsupportedException`. -Supports the `TableComments`, `ColumnComments`, `DropPartition`, `Views`, `Databases`, and `OLAP` interfaces. +Supports the `TableComments`, `ColumnComments`, `DropPartition`, `Views`, and `Databases` interfaces. **Engine selection** — choose from 10 variants of the `Engine` enum: @@ -2195,7 +2195,7 @@ $schema->table('events') The expression is emitted verbatim and must not be empty or contain a semicolon. `SAMPLE BY` only applies to engines that take an `ORDER BY` clause (the MergeTree family); using it with `Memory`, `Log`, `TinyLog`, or `StripeLog` throws `UnsupportedException`. The `sampleBy()` method is only available on the ClickHouse builder. -These OLAP-shaped modifiers are exposed on the ClickHouse dialect via the `Feature\OLAP` marker interface. Dialect-specific Column/Table subclasses surface the methods only when the underlying dialect implements the feature — so calling `->lowCardinality()` or `->sampleBy()` on a `MySQL`, `PostgreSQL`, `SQLite`, or `MongoDB` builder fails at the type level, with no runtime branch needed. +These OLAP-shaped modifiers live on the ClickHouse-specific `Column\ClickHouse` and `Table\ClickHouse` builders. Because the methods only exist on the dialect's own builder subclasses, calling `->lowCardinality()` or `->sampleBy()` on a `MySQL`, `PostgreSQL`, `SQLite`, or `MongoDB` builder fails at the type level, with no runtime branch needed. ### SQLite Schema diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index 59aaea6..6cf2318 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -11,11 +11,10 @@ use Utopia\Query\Schema\Feature\ColumnComments; use Utopia\Query\Schema\Feature\Databases; use Utopia\Query\Schema\Feature\DropPartition; -use Utopia\Query\Schema\Feature\OLAP; use Utopia\Query\Schema\Feature\TableComments; use Utopia\Query\Schema\Feature\Views; -class ClickHouse extends Schema implements TableComments, ColumnComments, DropPartition, Views, Databases, OLAP +class ClickHouse extends Schema implements TableComments, ColumnComments, DropPartition, Views, Databases { use QuotesIdentifiers; use Trait\Databases; diff --git a/src/Query/Schema/Feature/OLAP.php b/src/Query/Schema/Feature/OLAP.php deleted file mode 100644 index 9535ade..0000000 --- a/src/Query/Schema/Feature/OLAP.php +++ /dev/null @@ -1,26 +0,0 @@ -