diff --git a/README.md b/README.md index 72e2640..1793450 100644 --- a/README.md +++ b/README.md @@ -2033,7 +2033,7 @@ $result = $schema->table('events') ClickHouse uses `Nullable(type)` wrapping for nullable columns, `Enum8(...)` for enums, `Tuple(Float64, Float64)` for points, and `TYPE minmax GRANULARITY 3` for indexes. Foreign keys, stored procedures, triggers, generated columns, and CHECK constraints throw `UnsupportedException`. -Supports the `TableComments`, `ColumnComments`, and `DropPartition` interfaces. +Supports the `TableComments`, `ColumnComments`, `DropPartition`, `Views`, and `Databases` interfaces. **Engine selection** — choose from 10 variants of the `Engine` enum: @@ -2130,6 +2130,73 @@ $schema->table('events') Setting names must match `[A-Za-z_][A-Za-z0-9_]*`; string values are restricted to `[A-Za-z0-9_.\-+/]*`. Use ints / floats / booleans for everything else. Other dialects ignore the call. +**LowCardinality** — wrap a column type in `LowCardinality(...)` for compact dictionary-encoded storage on string columns with a small number of distinct values (status enums, type discriminators, country codes, category labels): + +```php +$schema->table('events') + ->bigInteger('id')->primary() + ->string('status')->lowCardinality() + ->string('country')->lowCardinality()->nullable() + ->create(); + +// CREATE TABLE `events` (`id` Int64, `status` LowCardinality(String), +// `country` Nullable(LowCardinality(String))) ENGINE = MergeTree() ORDER BY (`id`) +``` + +`Nullable` is applied outside `LowCardinality` to match ClickHouse's required wrapping order. The `lowCardinality()` method is only available on the ClickHouse builder — callers on other dialects (`MySQL`, `PostgreSQL`, `SQLite`, `MongoDB`) cannot reach this method at all. + +**FixedString(N)** — fixed-length string column. Use for ISO codes, hash digests, and other values whose byte length is known and constant: + +```php +$schema->table('locations') + ->bigInteger('id')->primary() + ->fixedString('country_code', 2) // ISO 3166-1 alpha-2 + ->fixedString('currency_code', 3) // ISO 4217 + ->fixedString('digest', 32) // raw MD5 + ->create(); + +// CREATE TABLE `locations` (`id` Int64, `country_code` FixedString(2), +// `currency_code` FixedString(3), `digest` FixedString(32)) +// ENGINE = MergeTree() ORDER BY (`id`) +``` + +Length must be at least 1. The `fixedString()` method is only available on the ClickHouse builder — the type has no portable mapping. + +**Column-level CODEC** — append one or more compression codecs to a column. Multiple `codec()` calls accumulate and emit `CODEC(c1, c2, ...)`: + +```php +$schema->table('metrics') + ->bigInteger('id')->primary() + ->datetime('ts', 3)->codec('Delta(4)')->codec('LZ4') // monotonic timestamps + ->bigInteger('value')->codec('T64')->codec('LZ4') // integer column + ->string('payload')->codec('ZSTD(3)') // text column + ->create(); + +// CREATE TABLE `metrics` (`id` Int64, +// `ts` DateTime64(3) CODEC(Delta(4), LZ4), +// `value` Int64 CODEC(T64, LZ4), +// `payload` String CODEC(ZSTD(3))) ENGINE = MergeTree() ORDER BY (`id`) +``` + +Each codec string is emitted verbatim; supply codec arguments inline (`'Delta(4)'`, `'ZSTD(3)'`). Codec strings must not be empty or contain a semicolon. The `codec()` method is only available on the ClickHouse builder. + +**SAMPLE BY** — declare a sampling expression for approximate-query support (`SELECT ... SAMPLE k`). Emitted after `ORDER BY` and before `TTL` / `SETTINGS`: + +```php +$schema->table('events') + ->bigInteger('id')->primary() + ->bigInteger('user_id')->unsigned() + ->sampleBy('user_id') + ->create(); + +// CREATE TABLE `events` (`id` Int64, `user_id` UInt64) ENGINE = MergeTree() +// ORDER BY (`id`) SAMPLE BY user_id +``` + +The expression is emitted verbatim and must not be empty or contain a semicolon. `SAMPLE BY` only applies to engines that take an `ORDER BY` clause (the MergeTree family); using it with `Memory`, `Log`, `TinyLog`, or `StripeLog` throws `UnsupportedException`. The `sampleBy()` method is only available on the ClickHouse builder. + +These OLAP-shaped modifiers live on the ClickHouse-specific `Column\ClickHouse` and `Table\ClickHouse` builders. Because the methods only exist on the dialect's own builder subclasses, calling `->lowCardinality()` or `->sampleBy()` on a `MySQL`, `PostgreSQL`, `SQLite`, or `MongoDB` builder fails at the type level, with no runtime branch needed. + ### SQLite Schema ```php diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index ced5544..6cf2318 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -32,6 +32,20 @@ protected function compileColumnType(Column $column): string throw new UnsupportedException('User-defined types are not supported in ClickHouse.'); } + if ($column instanceof Column\ClickHouse && $column->isFixedString()) { + $type = 'FixedString(' . $column->fixedStringLength . ')'; + + if ($column->isLowCardinality) { + $type = 'LowCardinality(' . $type . ')'; + } + + if ($column->isNullable) { + $type = 'Nullable(' . $type . ')'; + } + + return $type; + } + $type = match ($column->type) { ColumnType::String, ColumnType::Varchar, ColumnType::Relationship => 'String', ColumnType::Text => 'String', @@ -53,6 +67,10 @@ protected function compileColumnType(Column $column): string ColumnType::Serial, ColumnType::BigSerial, ColumnType::SmallSerial => throw new UnsupportedException('SERIAL types are not supported in ClickHouse.'), }; + if ($column instanceof Column\ClickHouse && $column->isLowCardinality) { + $type = 'LowCardinality(' . $type . ')'; + } + if ($column->isNullable) { $type = 'Nullable(' . $type . ')'; } @@ -89,6 +107,10 @@ protected function compileColumnDefinition(Column $column): string $parts[] = 'DEFAULT ' . $this->compileDefaultValue($column->default); } + if ($column instanceof Column\ClickHouse && $column->codecs !== []) { + $parts[] = 'CODEC(' . \implode(', ', $column->codecs) . ')'; + } + if ($column->ttl !== null) { $parts[] = 'TTL ' . $column->ttl; } @@ -226,6 +248,15 @@ public function compileCreate(Table $table, bool $ifNotExists = false): Statemen : ' ORDER BY tuple()'; } + if ($table instanceof Table\ClickHouse && $table->sampleBy !== null) { + if (! $engine->requiresOrderBy()) { + throw new UnsupportedException( + 'SAMPLE BY is only supported on engines that take an ORDER BY clause.' + ); + } + $sql .= ' SAMPLE BY ' . $table->sampleBy; + } + if ($table->ttl !== null) { $sql .= ' TTL ' . $table->ttl; } diff --git a/src/Query/Schema/Column/ClickHouse.php b/src/Query/Schema/Column/ClickHouse.php index 14528a9..a056f42 100644 --- a/src/Query/Schema/Column/ClickHouse.php +++ b/src/Query/Schema/Column/ClickHouse.php @@ -2,6 +2,7 @@ namespace Utopia\Query\Schema\Column; +use Utopia\Query\Exception\ValidationException; use Utopia\Query\Schema\Column; use Utopia\Query\Schema\Forwarder; use Utopia\Query\Schema\Table; @@ -13,6 +14,40 @@ class ClickHouse extends Column { use Forwarder\ClickHouse; + public protected(set) bool $isLowCardinality = false; + + /** Length when the column should be emitted as `FixedString(N)`; null otherwise. */ + public protected(set) ?int $fixedStringLength = null; + + /** @var list Column-level CODEC clauses, e.g. ['Delta(4)', 'LZ4'] */ + public protected(set) array $codecs = []; + + /** + * Mark the column as `FixedString(N)`. + * + * Used by {@see Table\ClickHouse::fixedString()} to attach the + * ClickHouse-specific FixedString width to a column whose generic + * {@see \Utopia\Query\Schema\ColumnType} is `String`. The compiler reads + * this state when emitting DDL. + * + * @throws ValidationException if $length is less than 1. + */ + public function asFixedString(int $length): static + { + if ($length < 1) { + throw new ValidationException('FixedString length must be at least 1.'); + } + + $this->fixedStringLength = $length; + + return $this; + } + + public function isFixedString(): bool + { + return $this->fixedStringLength !== null; + } + /** * @param list $columns * @@ -28,4 +63,47 @@ public function primary(array $columns = []): static|Table return $this->table->primary($columns); } + + /** + * Wrap the column type in `LowCardinality(...)`. + * + * Suitable for string columns with a small number of distinct values + * (status enums, type discriminators, country codes). `Nullable` is + * applied outside `LowCardinality` to match ClickHouse's required + * wrapping order: `Nullable(LowCardinality(String))`. + */ + public function lowCardinality(): static + { + $this->isLowCardinality = true; + + return $this; + } + + /** + * Append a column-level CODEC clause. + * + * Multiple calls accumulate and emit `CODEC(c1, c2, ...)`. Pass either + * a bare codec name (`->codec('LZ4')`) or one with arguments + * (`->codec('Delta(4)')`, `->codec('ZSTD(3)')`). The codec string is + * emitted verbatim and must come from a trusted source. + * + * @throws ValidationException if the codec string is empty or contains + * a semicolon. + */ + public function codec(string $codec): static + { + $trimmed = \trim($codec); + + if ($trimmed === '') { + throw new ValidationException('CODEC expression must not be empty.'); + } + + if (\str_contains($trimmed, ';')) { + throw new ValidationException('CODEC expression must not contain ";".'); + } + + $this->codecs[] = $trimmed; + + return $this; + } } diff --git a/src/Query/Schema/Forwarder/ClickHouse.php b/src/Query/Schema/Forwarder/ClickHouse.php index bc72453..ce54e07 100644 --- a/src/Query/Schema/Forwarder/ClickHouse.php +++ b/src/Query/Schema/Forwarder/ClickHouse.php @@ -18,6 +18,11 @@ public function vector(string $name, int $dimensions): Column\ClickHouse return $this->table->vector($name, $dimensions); } + public function fixedString(string $name, int $length): Column\ClickHouse + { + return $this->table->fixedString($name, $length); + } + public function engine(Engine $engine, string ...$args): Table\ClickHouse { return $this->table->engine($engine, ...$args); @@ -44,4 +49,8 @@ public function partitionBy(string $expression): Table\ClickHouse return $this->table->partitionBy($expression); } + public function sampleBy(string $expression): Table\ClickHouse + { + return $this->table->sampleBy($expression); + } } diff --git a/src/Query/Schema/Table/ClickHouse.php b/src/Query/Schema/Table/ClickHouse.php index da9b392..0ca5a91 100644 --- a/src/Query/Schema/Table/ClickHouse.php +++ b/src/Query/Schema/Table/ClickHouse.php @@ -16,6 +16,9 @@ class ClickHouse extends Table { use Trait\CompositePrimary; + /** ClickHouse SAMPLE BY expression. Emitted after ORDER BY when set. */ + public protected(set) ?string $sampleBy = null; + #[\Override] protected function newColumn(string $name, ColumnType $type, ?int $length = null, ?int $precision = null): Column\ClickHouse { @@ -31,6 +34,30 @@ public function vector(string $name, int $dimensions): Column\ClickHouse return $col; } + /** + * Add a `FixedString(N)` column. + * + * Used for fixed-length string values whose byte length is known and + * constant — ISO 3166 country codes, ISO 4217 currency codes, hash + * digests, and similar values that benefit from ClickHouse's columnar + * storage of fixed-width data. + * + * The column is registered with the generic `ColumnType::String` type and + * tagged with FixedString state on {@see Column\ClickHouse}; the compiler + * reads that state when emitting DDL, so the global `ColumnType` enum + * stays free of ClickHouse-only cases. + * + * @throws ValidationException if $length is less than 1. + */ + public function fixedString(string $name, int $length): Column\ClickHouse + { + $col = $this->newColumn($name, ColumnType::String, $length); + $col->asFixedString($length); + $this->columns[] = $col; + + return $col; + } + /** * Select the table engine. Engine-specific arguments are validated against * the engine variant: @@ -158,4 +185,28 @@ public function partitionBy(string $expression): static return $this; } + + /** + * Set the SAMPLE BY expression. Emitted after ORDER BY at table creation + * time. Required to model tables that need approximate-query support via + * `SELECT ... SAMPLE k` on MergeTree-family engines. + * + * @throws ValidationException if the expression is empty or contains a semicolon. + */ + public function sampleBy(string $expression): static + { + $trimmed = \trim($expression); + + if ($trimmed === '') { + throw new ValidationException('SAMPLE BY expression must not be empty.'); + } + + if (\str_contains($trimmed, ';')) { + throw new ValidationException('SAMPLE BY expression must not contain ";".'); + } + + $this->sampleBy = $trimmed; + + return $this; + } } diff --git a/tests/Query/Schema/ClickHouseTest.php b/tests/Query/Schema/ClickHouseTest.php index 468b3ce..80a1a55 100644 --- a/tests/Query/Schema/ClickHouseTest.php +++ b/tests/Query/Schema/ClickHouseTest.php @@ -945,4 +945,222 @@ public function testAlterRejectsSettings(): void ->settings(['index_granularity' => 4096]) ->alter(); } + + public function testCreateTableFixedStringColumn(): void + { + $schema = new Schema(); + $result = $schema->table('locations') + ->bigInteger('id')->primary() + ->fixedString('country_code', 2) + ->fixedString('currency_code', 3) + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `locations` (`id` Int64, `country_code` FixedString(2), `currency_code` FixedString(3)) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testCreateTableFixedStringNullable(): void + { + $schema = new Schema(); + $result = $schema->table('t') + ->fixedString('hash', 32)->nullable() + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `t` (`hash` Nullable(FixedString(32))) ENGINE = MergeTree() ORDER BY tuple()', + $result->query, + ); + } + + public function testFixedStringRejectsZeroLength(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->table('t')->fixedString('bad', 0); + } + + public function testCreateTableLowCardinalityColumn(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('status')->lowCardinality() + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `status` LowCardinality(String)) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testCreateTableLowCardinalityNullableWrapsInBothOrder(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('status')->lowCardinality()->nullable() + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `status` Nullable(LowCardinality(String))) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testAlterAddLowCardinalityColumn(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->addColumn('country', ColumnType::String)->lowCardinality() + ->alter(); + $this->assertBindingCount($result); + + $this->assertSame( + 'ALTER TABLE `events` ADD COLUMN `country` LowCardinality(String)', + $result->query, + ); + } + + public function testCreateTableColumnWithSingleCodec(): void + { + $schema = new Schema(); + $result = $schema->table('metrics') + ->bigInteger('id')->primary() + ->datetime('ts', 3)->codec('LZ4') + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `metrics` (`id` Int64, `ts` DateTime64(3) CODEC(LZ4)) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testCreateTableColumnWithMultipleCodecs(): void + { + $schema = new Schema(); + $result = $schema->table('metrics') + ->bigInteger('id')->primary() + ->datetime('ts', 3)->codec('Delta(4)')->codec('LZ4') + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `metrics` (`id` Int64, `ts` DateTime64(3) CODEC(Delta(4), LZ4)) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testCodecOrderingRelativeToTtlAndComment(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('payload') + ->codec('ZSTD(3)') + ->ttl('ts + INTERVAL 30 DAY') + ->comment('Compressed payload') + ->datetime('ts') + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64,' + . ' `payload` String CODEC(ZSTD(3)) TTL ts + INTERVAL 30 DAY COMMENT \'Compressed payload\',' + . ' `ts` DateTime) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testCodecRejectsEmpty(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->table('t') + ->integer('id')->codec(''); + } + + public function testCodecRejectsSemicolon(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->table('t') + ->integer('id')->codec('LZ4;'); + } + + public function testCreateTableWithSampleBy(): void + { + $schema = new Schema(); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->bigInteger('user_id')->unsigned() + ->sampleBy('user_id') + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `user_id` UInt64) ENGINE = MergeTree() ORDER BY (`id`) SAMPLE BY user_id', + $result->query, + ); + } + + public function testCreateTableSampleByOrderingWithTtlAndSettings(): void + { + $schema = new Schema(); + $table = $schema->table('events'); + $table->bigInteger('id')->primary(); + $table->datetime('created_at'); + $result = $table + ->sampleBy('id') + ->ttl('`created_at` + INTERVAL 30 DAY') + ->settings(['index_granularity' => 4096]) + ->create(); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `created_at` DateTime) ENGINE = MergeTree() ORDER BY (`id`)' + . ' SAMPLE BY id' + . ' TTL `created_at` + INTERVAL 30 DAY' + . ' SETTINGS index_granularity = 4096', + $result->query, + ); + } + + public function testSampleByRejectsEmpty(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->table('events')->sampleBy(''); + } + + public function testSampleByRejectsSemicolon(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->table('events')->sampleBy('id;'); + } + + public function testSampleByRejectedOnEnginesWithoutOrderBy(): void + { + $this->expectException(UnsupportedException::class); + $this->expectExceptionMessage('SAMPLE BY'); + + $schema = new Schema(); + $schema->table('cache') + ->integer('id')->primary() + ->engine(Engine::Memory) + ->sampleBy('id') + ->create(); + } }