diff --git a/README.md b/README.md index cb5fb6d..a6fb73d 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ API key priority (lowest to highest): config file → `HOTDATA_API_KEY` env var | `connections` | `list`, `create`, `refresh`, `new` | Manage connections | | `databases` | `list`, `create`, `delete`, `tables` | Managed databases (create and load tables via parquet) | | `tables` | `list` | List tables and columns | -| `datasets` | `list`, `create`, `update` | Manage uploaded datasets | +| `views` | `list`, `create`, `update`, `refresh` | Manage SQL-derived views | | `context` | `list`, `show`, `pull`, `push` | Workspace Markdown context (e.g. data model `DATAMODEL`) via the context API | | `query` | | Execute a SQL query | | `queries` | `list` | Inspect query run history | @@ -154,7 +154,7 @@ hotdata databases tables delete [--schema public] Example: ```sh -hotdata databases create --name sales --table orders +hotdata databases create --name sales hotdata databases tables load sales orders --file ./orders.parquet hotdata query "SELECT count(*) FROM sales.public.orders" ``` @@ -170,24 +170,19 @@ hotdata tables list [--workspace-id ] [--connection-id ] [--schema ..
` — use this format in SQL queries. -## Datasets +## Views ```sh -hotdata datasets list [--workspace-id ] [--limit ] [--offset ] [--format table|json|yaml] -hotdata datasets [--workspace-id ] [--format table|json|yaml] -hotdata datasets create --file data.csv [--label "My Dataset"] [--table-name my_dataset] -hotdata datasets create --sql "SELECT ..." --label "My Dataset" -hotdata datasets create --url "https://example.com/data.parquet" --label "My Dataset" -hotdata datasets update [--label "New Label"] [--table-name new_table] -hotdata datasets refresh [--workspace-id ] [--async] +hotdata views list [--workspace-id ] [--limit ] [--offset ] [--output table|json|yaml] +hotdata views [--workspace-id ] [--output table|json|yaml] +hotdata views create --name my_view [--description "My View"] (--sql "SELECT ..." | --query-id ) +hotdata views update [--description "New Label"] [--name new_table] +hotdata views refresh [--workspace-id ] [--async] ``` -- Datasets are queryable as `datasets.main.`. -- `--file`, `--sql`, `--query-id`, and `--url` are mutually exclusive. -- `--url` imports data directly from a URL (supports csv, json, parquet). -- Format is auto-detected from file extension or content. -- Piped stdin is supported: `cat data.csv | hotdata datasets create --label "My Dataset"` -- `refresh` re-runs the dataset's source (URL fetch or saved query) and creates a new version. Not supported for upload-source datasets. +- Views are queryable as `views.main.`. +- `--sql` and `--query-id` are mutually exclusive; exactly one is required for `create`. +- `refresh` re-runs the view's source query and creates a new version. - `--async` submits the refresh as a background job and returns a job ID; poll with `hotdata jobs `. ## Workspace context @@ -237,10 +232,10 @@ hotdata queries [-o table|json|yaml] ```sh # BM25 full-text search (requires a BM25 index on the column) -hotdata search "" --type bm25 --table --column [--select ] [--limit ] [-o table|json|csv] +hotdata search "" --type bm25 --catalog --table
--column [--schema ] [--select ] [--limit ] [-o table|json|csv] # Vector search (requires a vector index with auto-embedding on the column) -hotdata search "" --type vector --table
--column [--limit ] +hotdata search "" --type vector --catalog --table
--column [--schema ] [--limit ] ``` - **`--type vector`** — pass your query as **plain text**, name the **source text column** (e.g. `title`). The server embeds the query at the same time, using the same provider that auto-embedded the column when the index was built — so distance metric, model, and dimensions all match automatically. No `OPENAI_API_KEY`, no client-side embedding, no need to know about the auto-generated `_embedding` column. Generated SQL: `vector_distance(col, 'query')` server-side. @@ -252,16 +247,16 @@ hotdata search "" --type vector --table
--column --schema --table
[-o table|json|yaml] -hotdata indexes create --connection-id --schema --table
\ - --name --columns --type sorted|bm25|vector \ - [--metric l2|cosine|dot] [--async] \ +# Catalog-table scope +hotdata indexes list --catalog --table
[--schema ] [-o table|json|yaml] +hotdata indexes create --catalog --table
[--schema ] \ + --column --type sorted|bm25|vector \ + [--name ] [--metric l2|cosine|dot] [--async] \ [--embedding-provider-id ] [--dimensions ] [--output-column ] [--description ] -hotdata indexes delete --connection-id --schema --table
--name +hotdata indexes delete --catalog --table
[--schema ] --name # Dataset scope hotdata indexes list --dataset-id [-o table|json|yaml] diff --git a/skills/hotdata-analytics/SKILL.md b/skills/hotdata-analytics/SKILL.md index 5717240..d6e8600 100644 --- a/skills/hotdata-analytics/SKILL.md +++ b/skills/hotdata-analytics/SKILL.md @@ -8,7 +8,7 @@ version: 0.3.3 **OLAP-style analytics** in Hotdata: PostgreSQL-dialect SQL, query execution, run history, stored results, **Chain** materializations, and **sorted** indexes for filters and joins. -**Prerequisites:** Authenticate, workspace, and catalog discovery via the **`hotdata`** skill (`connections`, `tables`, `datasets`, `databases`). +**Prerequisites:** Authenticate, workspace, and catalog discovery via the **`hotdata`** skill (`connections`, `tables`, `views`, `databases`). **Related skills:** **`hotdata-search`** (BM25, vector, retrieval indexes), **`hotdata-geospatial`** (spatial SQL). @@ -23,7 +23,7 @@ hotdata query status [--output table|json|csv] - **PostgreSQL dialect.** Quote mixed-case identifiers: `"CustomerName"`. - Use **`hotdata tables list`** for schema discovery — not `information_schema` via `query`. -- Fully qualified names: `..
`, `datasets..
`, `..
`. +- Fully qualified names: `..
`, `views..
`, `..
`. - Long-running queries may return `query_run_id` → poll with **`query status`** (exit `2` = still running). Do not re-run identical heavy SQL while polling. - For **workspace-wide** joins and naming, load **context:DATAMODEL** when listed (`hotdata context list` → `show DATAMODEL`) — see **`hotdata`** skill. @@ -82,8 +82,8 @@ hotdata results [--workspace-id ] [--output table|json 2. **Materialize** (pick one) ```bash - hotdata datasets create --name chain_slice [--description "chain slice"] --sql "SELECT ..." - hotdata datasets create --name chain_from_saved [--description "from saved"] --query-id + hotdata views create --name chain_slice --description "chain slice" --sql "SELECT ..." + hotdata views create --name chain_from_saved --description "from saved" --query-id ``` Or managed parquet: @@ -94,10 +94,10 @@ hotdata results [--workspace-id ] [--output table|json hotdata databases tables load slice --file ./slice.parquet ``` -3. **Chain query** — use printed **`full_name`** or `datasets list` **FULL NAME** column: +3. **Chain query** — use printed **`full_name`** or `views list` **FULL NAME** column: ```bash - hotdata query "SELECT * FROM datasets.main.chain_slice WHERE ..." + hotdata query "SELECT * FROM views.main.chain_slice WHERE ..." hotdata query "SELECT * FROM analytics.public.slice WHERE ..." ``` @@ -122,4 +122,4 @@ List and delete use the same `hotdata indexes` commands as in the search skill; ## Sandboxes and chains -Sandbox datasets use **`datasets..
`**, not `datasets.main`. Run queries with active sandbox config or `hotdata sandbox run hotdata query "..."`. See **`hotdata`** skill **Sandboxes**. +Sandbox views use **`views..
`**, not `views.main`. Run queries with active sandbox config or `hotdata sandbox run hotdata query "..."`. See **`hotdata`** skill **Sandboxes**. diff --git a/skills/hotdata-analytics/references/WORKFLOWS.md b/skills/hotdata-analytics/references/WORKFLOWS.md index 0a11385..affeffe 100644 --- a/skills/hotdata-analytics/references/WORKFLOWS.md +++ b/skills/hotdata-analytics/references/WORKFLOWS.md @@ -2,7 +2,7 @@ OLAP-style SQL, **History** (query runs and stored results), and **Chain** (materialized follow-ups). Requires **`hotdata`** for auth, workspaces, and catalog commands. -**Related:** **`hotdata-search`** for BM25/vector indexes and `hotdata search`; **`hotdata`** [WORKFLOWS.md](../../hotdata/references/WORKFLOWS.md) for datasets vs managed databases. +**Related:** **`hotdata-search`** for BM25/vector indexes and `hotdata search`; **`hotdata`** [WORKFLOWS.md](../../hotdata/references/WORKFLOWS.md) for views vs managed databases. --- @@ -66,11 +66,11 @@ hotdata query "SELECT ..." Land a smaller table — pick one: -**Datasets** (CSV/JSON/URL/SQL snapshot → `datasets..
`): +**Views** (SQL snapshot → `views..
`): ```bash -hotdata datasets create --label "chain revenue slice" --sql "SELECT ..." [--table-name chain_revenue_slice] -hotdata datasets create --label "from saved" --query-id [--table-name ...] +hotdata views create --name chain_revenue_slice --description "chain revenue slice" --sql "SELECT ..." +hotdata views create --name chain_from_saved --description "from saved" --query-id ``` **Managed database** (parquet → `..
`): @@ -80,17 +80,17 @@ hotdata databases create --name chain_db --table revenue_slice hotdata databases tables load chain_db revenue_slice --file ./revenue_slice.parquet ``` -Note the printed **`full_name`** (e.g. `datasets.main.chain_revenue_slice` or `chain_db.public.revenue_slice`). For datasets, **`FULL NAME`** from `datasets list` is authoritative. +Note the printed **`full_name`** (e.g. `views.main.chain_revenue_slice` or `chain_db.public.revenue_slice`). For views, **`FULL NAME`** from `views list` is authoritative. ### 3. Chain query -Query using that name — do not hardcode `datasets.main` if the schema segment is a sandbox id: +Query using that name — do not hardcode `views.main` if the schema segment is a sandbox id: ```bash -hotdata datasets list -hotdata query "SELECT * FROM datasets.main.chain_revenue_slice WHERE ..." +hotdata views list +hotdata query "SELECT * FROM views.main.chain_revenue_slice WHERE ..." # Sandbox example (use actual full_name from create or list): -# hotdata query "SELECT * FROM datasets.s_ufmblmvq.chain_revenue_slice WHERE ..." +# hotdata query "SELECT * FROM views.s_ufmblmvq.chain_revenue_slice WHERE ..." # Managed database: # hotdata query "SELECT * FROM chain_db.public.revenue_slice WHERE ..." ``` @@ -99,18 +99,18 @@ hotdata query "SELECT * FROM datasets.main.chain_revenue_slice WHERE ..." For **sandbox-scoped** chain tables: -- Qualified name is **`datasets..
`**, not `datasets.main`. +- Qualified name is **`views..
`**, not `views.main`. - Run queries with **active sandbox** in config (`hotdata sandbox set`) **or** inside **`hotdata sandbox run hotdata query "…"`**. - Without sandbox context, you may get **access denied** on sandbox-only tables. ### Naming and documentation - Prefer predictable `--table-name` values: `chain__`. -- Record long-lived chains in **context:DATAMODEL → Derived tables (Chain)** with the **full** SQL name you use (`datasets.…` or `database.schema.table`). +- Record long-lived chains in **context:DATAMODEL → Derived tables (Chain)** with the **full** SQL name you use (`views.…` or `database.schema.table`). - Promote join/grain findings to **context:DATAMODEL** when they should outlive the sandbox (**`hotdata`** skill). ### Guardrails - Materialize when the base scan is large and the follow-up runs many times. - Keep Chain tables focused; avoid wide `SELECT *` materializations when a narrow projection suffices. -- For upload format choice (datasets vs databases), see **`hotdata`** WORKFLOWS — [Datasets vs managed databases](../../hotdata/references/WORKFLOWS.md#datasets-vs-managed-databases). +- For source format choice (views vs databases), see **`hotdata`** WORKFLOWS — [Views vs managed databases](../../hotdata/references/WORKFLOWS.md#views-vs-managed-databases). diff --git a/skills/hotdata-search/SKILL.md b/skills/hotdata-search/SKILL.md index a2adbeb..e30eb45 100644 --- a/skills/hotdata-search/SKILL.md +++ b/skills/hotdata-search/SKILL.md @@ -20,12 +20,12 @@ Retrieval workloads in Hotdata: **BM25 full-text**, **vector similarity**, and t ```bash # BM25 (requires a BM25 index on the column) -hotdata search "" --type bm25 --table --column \ - [--select ] [--limit ] [--workspace-id ] [--output table|json|csv] +hotdata search "" --type bm25 --catalog --table
--column \ + [--schema ] [--select ] [--limit ] [--workspace-id ] [--output table|json|csv] # Vector (requires a vector index; server auto-embeds the query text) -hotdata search "" --type vector --table --column \ - [--select ] [--limit ] [--workspace-id ] [--output table|json|csv] +hotdata search "" --type vector --catalog --table
--column \ + [--schema ] [--select ] [--limit ] [--workspace-id ] [--output table|json|csv] ``` | Type | Behavior | @@ -41,22 +41,24 @@ hotdata search "" --type vector --table --colum ## Indexes (BM25 and vector) -Indexes attach to a **connection table** (`--connection-id` + `--schema` + `--table`) or a **dataset** (`--dataset-id`). Scopes are mutually exclusive for create/delete. +Indexes attach to a **catalog table** (`--catalog` + `--table`) or a **dataset** (`--dataset-id`). Scopes are mutually exclusive for create/delete. + +**Note:** `indexes create` uses `--catalog`/`--table`; `indexes list` and `indexes delete` still use `--connection-id`/`--schema`/`--table`. ```bash -# List — workspace scan on connection tables (filter with -c / --schema / --table) +# List — workspace scan (filter with --connection-id / --schema / --table) hotdata indexes list [--connection-id ] [--schema ] [--table
] [--workspace-id ] [--output table|json|yaml] hotdata indexes list --dataset-id [--workspace-id ] [--output table|json|yaml] -# Connection table -hotdata indexes create --connection-id --schema --table
\ - --name --columns --type bm25|vector \ - [--metric l2|cosine|dot] [--async] \ +# Catalog table (create uses --catalog; list/delete use --connection-id) +hotdata indexes create --catalog --table
--column --type bm25|vector \ + [--schema ] [--name ] [--metric l2|cosine|dot] [--async] \ [--embedding-provider-id ] [--dimensions ] [--output-column ] [--description ] hotdata indexes delete --connection-id --schema --table
--name # Dataset -hotdata indexes create --dataset-id --name --columns --type bm25|vector ... +hotdata indexes create --dataset-id --columns --type bm25|vector \ + [--name ] [--metric l2|cosine|dot] [--async] ... hotdata indexes delete --dataset-id --name ``` @@ -89,6 +91,6 @@ hotdata embedding-providers delete [--workspace-id ] 1. `hotdata tables list --connection-id ` — confirm column types. 2. `hotdata indexes list` — avoid duplicate indexes. -3. `hotdata indexes create ... --type bm25|vector` (add `--async` if large). -4. `hotdata search "..." --type bm25|vector --table ... --column ...` +3. `hotdata indexes create --catalog --table
--column --type bm25|vector` (add `--async` if large). +4. `hotdata search "..." --type bm25|vector --catalog --table
--column ` 5. Record what exists in **context:DATAMODEL** (core skill) when the workspace should remember index choices. diff --git a/skills/hotdata/SKILL.md b/skills/hotdata/SKILL.md index 43684a8..24624ed 100644 --- a/skills/hotdata/SKILL.md +++ b/skills/hotdata/SKILL.md @@ -20,7 +20,7 @@ Install all skills with **`hotdata skills install`**. Load specialized skills on | Skill | Use for | |-------|---------| -| **`hotdata`** (this file) | Auth, workspaces, connections, databases, datasets, tables, basic `query`, context, sandboxes, jobs | +| **`hotdata`** (this file) | Auth, workspaces, connections, databases, views, tables, basic `query`, context, sandboxes, jobs | | **`hotdata-search`** | BM25, vector search, `hotdata search`, bm25/vector indexes, embedding providers | | **`hotdata-analytics`** | OLAP SQL, aggregations, query/results history, Chain materializations, sorted indexes | | **`hotdata-geospatial`** | PostGIS-style `ST_*`, WKB, spatial joins | @@ -82,15 +82,15 @@ Use [references/DATA_MODEL.template.md](references/DATA_MODEL.template.md) and [ These are **patterns** built from the commands below—not separate CLI subcommands: -- **Model (`context:DATAMODEL`)** — The **shared** Markdown semantic map of the active database (entities, keys, joins across connections). **Store and read it only via database context** (`hotdata context list`, then `hotdata context show DATAMODEL` **only when listed**, `context push DATAMODEL`); refresh using `connections`, `connections refresh`, `tables list`, and `datasets list`. For a **deep** pass (connector enrichment, indexes, per-table detail), see [references/MODEL_BUILD.md](references/MODEL_BUILD.md). Contrast **analysis modeling** in sandboxes or chat (see [Analysis modeling vs context:DATAMODEL](#analysis-modeling-vs-contextdatamodel)). +- **Model (`context:DATAMODEL`)** — The **shared** Markdown semantic map of the active database (entities, keys, joins across connections). **Store and read it only via database context** (`hotdata context list`, then `hotdata context show DATAMODEL` **only when listed**, `context push DATAMODEL`); refresh using `connections`, `connections refresh`, `tables list`, and `views list`. For a **deep** pass (connector enrichment, indexes, per-table detail), see [references/MODEL_BUILD.md](references/MODEL_BUILD.md). Contrast **analysis modeling** in sandboxes or chat (see [Analysis modeling vs context:DATAMODEL](#analysis-modeling-vs-contextdatamodel)). - **History / Chain / OLAP SQL** — See **`hotdata-analytics`** and [references/WORKFLOWS.md](references/WORKFLOWS.md). - **Search / retrieval indexes** — See **`hotdata-search`**. -Catalog, skill decision tree, epic flows (onboard, chain, retrieval), datasets vs databases, and sandbox procedures: [references/WORKFLOWS.md](references/WORKFLOWS.md). +Catalog, skill decision tree, epic flows (onboard, chain, retrieval), views vs databases, and sandbox procedures: [references/WORKFLOWS.md](references/WORKFLOWS.md). ## Available Commands -Top-level subcommands (each detailed below): **`auth`**, **`datasets`**, **`query`**, **`workspaces`**, **`connections`**, **`databases`**, **`tables`**, **`skills`**, **`results`**, **`jobs`**, **`indexes`**, **`embedding-providers`**, **`search`**, **`queries`**, **`sandbox`**, **`context`**, **`completions`**. Search, indexes (bm25/vector), and embedding providers are documented in **`hotdata-search`**; query history, results, Chain, and OLAP patterns in **`hotdata-analytics`**. +Top-level subcommands (each detailed below): **`auth`**, **`views`**, **`query`**, **`workspaces`**, **`connections`**, **`databases`**, **`tables`**, **`skills`**, **`results`**, **`jobs`**, **`indexes`**, **`embedding-providers`**, **`search`**, **`queries`**, **`sandbox`**, **`context`**, **`completions`**. Search, indexes (bm25/vector), and embedding providers are documented in **`hotdata-search`**; query history, results, Chain, and OLAP patterns in **`hotdata-analytics`**. Global CLI options: **`--api-key`**, **`-v` / `--version`**, **`-h` / `--help`**. Hidden developer flag: **`--debug`** (verbose HTTP logs). @@ -187,27 +187,26 @@ hotdata connections create \ ``` hotdata databases list [--workspace-id ] [--output table|json|yaml] -hotdata databases create [--description
...] [--schema public] [--expires-at ] [--workspace-id ] [--output table|json|yaml] -hotdata databases set -hotdata databases [--workspace-id ] [--output table|json|yaml] -hotdata databases delete [--workspace-id ] +hotdata databases create [--name ] [--description
...] [--schema public] [--expires-at ] [--workspace-id ] [--output table|json|yaml] +hotdata databases set +hotdata databases [--workspace-id ] [--output table|json|yaml] +hotdata databases delete [--workspace-id ] hotdata databases run [--database ] [--description
...] [--expires-at ] [--workspace-id ] [args...] hotdata databases run [args...] -# Dot-notation shorthand for load: database.table or database.schema.table -hotdata databases load [--file ./data.parquet] [--url ] [--upload-id ] [--workspace-id ] +hotdata databases load --table
[--catalog ] [--schema public] [--file ./data.parquet] [--url ] [--upload-id ] [--workspace-id ] -hotdata databases tables list [--database ] [--schema ] [--workspace-id ] [--output table|json|yaml] -hotdata databases tables load
[--database ] [--schema public] [--file ./data.parquet] [--url ] [--upload-id ] [--workspace-id ] -hotdata databases tables delete
[--database ] [--schema public] [--workspace-id ] +hotdata databases tables list [--database ] [--schema ] [--workspace-id ] [--output table|json|yaml] +hotdata databases tables load
[--database ] [--schema public] [--file ./data.parquet] [--url ] [--upload-id ] [--workspace-id ] +hotdata databases tables delete
[--database ] [--schema public] [--workspace-id ] ``` - `list` — all managed databases in the workspace. -- `create` — creates a new managed database. `--description` is an optional human-readable label (databases are addressed by id, not description). `--expires-at` accepts relative durations (`24h`, `7d`, `90m`) or an RFC 3339 timestamp; defaults to `24h` when omitted. Repeat `--table` to declare tables up front. -- `set` — saves `` as the active database. Subsequent `databases tables` and `context` commands use it automatically. -- `` — inspect one database (id, description, expires_at). +- `create` — creates a new managed database. `--name` sets the SQL catalog alias used in queries (`SELECT … FROM .public.
`); must be `[a-z_][a-z0-9_]*`, globally unique, and omitting it means no expiry default. `--description` is an optional display label. `--expires-at` accepts relative durations (`24h`, `7d`, `90m`) or an RFC 3339 timestamp; defaults to `24h` when `--name` is omitted. Repeat `--table` to declare tables up front. +- `set` — saves `` as the active database. Subsequent `databases tables` and `context` commands use it automatically. +- `` — inspect one database (id, name, expires_at). - `delete` — removes the managed database; clears the active-database config if it matched. -- `load` — shorthand with dot notation (`database.table` or `database.schema.table`). Schema defaults to `public`. +- `load` — loads a parquet file into a table. `--catalog` selects the database by name; defaults to the current database set via `databases set`. Schema defaults to `public`. - `tables list` — lists tables with `TABLE` (`..
`), `SYNCED`, `LAST_SYNC`. Uses active database when `--database` is omitted. - `tables load` — uploads a local parquet file (`--file`), a remote parquet URL (`--url`), or a pre-staged upload (`--upload-id`) and publishes with **replace** mode. - `tables delete` — drops a table from the managed database. @@ -216,10 +215,9 @@ hotdata databases tables delete
[--database ] [--schema publ Example: ``` -hotdata databases create --description "sales" --table orders -hotdata databases set -hotdata databases tables load orders --file ./orders.parquet -hotdata query "SELECT count(*) FROM .public.orders" +hotdata databases create --name sales +hotdata databases load --catalog sales --table orders --file ./orders.parquet +hotdata query "SELECT count(*) FROM sales.public.orders" ``` ### List Tables and Columns @@ -234,63 +232,62 @@ hotdata tables list [--workspace-id ] [--connection-id ] [--limit ] [--offset ] [--output table|json|yaml] +hotdata views list [--workspace-id ] [--limit ] [--offset ] [--output table|json|yaml] ``` - Default format is `table`. -- Returns `id`, `label`, and `created_at`; table output includes a **`FULL NAME`** column (`datasets..
`). +- Returns `id`, `label`, and `created_at`; table output includes a **`FULL NAME`** column (`views..
`). - Results are paginated (default 100). Use `--offset` to fetch further pages. -- **There is no filter for “this sandbox only.”** `datasets list` always returns **all** datasets in the workspace. To tell sandbox-scoped datasets from workspace-wide ones, read **`FULL NAME`**: the middle segment is the sandbox id (e.g. `datasets.s_ufmblmvq.tac_csat`) for sandbox data, and usually **`main`** (e.g. `datasets.main.my_table`) for ordinary uploads. +- **There is no filter for “this sandbox only.”** `views list` always returns **all** views in the workspace. To tell sandbox-scoped views from workspace-wide ones, read **`FULL NAME`**: the middle segment is the sandbox id (e.g. `views.s_ufmblmvq.tac_csat`) for sandbox data, and usually **`main`** (e.g. `views.main.my_table`) for ordinary views. -#### Get dataset details +#### Get view details ``` -hotdata datasets [--workspace-id ] [--output table|json|yaml] +hotdata views [--workspace-id ] [--output table|json|yaml] ``` -- Shows dataset metadata and a full column listing with `name`, `data_type`, `nullable`. +- Shows view metadata and a full column listing with `name`, `data_type`, `nullable`. - Use this to inspect schema before querying. -- For the **qualified SQL name**, prefer **`FULL NAME` from `datasets list`** or the **`full_name` printed by `datasets create`**—especially for sandbox datasets, where the schema is **`datasets.`**, not `datasets.main`. +- For the **qualified SQL name**, prefer **`FULL NAME` from `views list`** or the **`full_name` printed by `views create`**—especially for sandbox views, where the schema is **`views.`**, not `views.main`. -#### Update a dataset +#### Update a view ``` -hotdata datasets update [--description
` and active sandbox or `hotdata sandbox run …` +5. [ ] (Sandbox) Use `views..
` and active sandbox or `hotdata sandbox run …` 6. [ ] Record stable chains in **context:DATAMODEL** when they should outlive the session **Detail:** [hotdata-analytics WORKFLOWS — Chain](../../hotdata-analytics/references/WORKFLOWS.md#chain) @@ -80,38 +80,37 @@ End-to-end checklists. Use the linked sections for command detail and guardrails --- -## Datasets vs managed databases +## Views vs managed databases -Both land queryable tables in the workspace; the path depends on **format** and **how you want to name tables in SQL**. +Both land queryable tables in the workspace; the path depends on **source** and **how you want to name tables in SQL**. -| | **Datasets** | **Managed databases** | -|---|-------------|------------------------| -| **Best for** | CSV, JSON, URL import, stdin, SQL/query snapshot | Parquet files you own; catalog-style `name.schema.table` | -| **SQL prefix** | `datasets..
` (often `datasets.main.*`) | `..
` (database = connection name) | -| **CLI** | `hotdata datasets create` | `hotdata databases create` + `databases tables load` | +| | **Views** | **Managed databases** | +|---|-----------|------------------------| +| **Best for** | SQL/query snapshot | Parquet files you own; catalog-style `name.schema.table` | +| **SQL prefix** | `views..
` (often `views.main.*`) | `..
` (database = connection name) | +| **CLI** | `hotdata views create` | `hotdata databases create` + `databases tables load` | | **Declare schema up front** | No | Yes — `--table` on create (required before load on current API) | -| **Parquet** | Yes (`--file`, `--url`, `--upload-id`) | **Only** parquet on `tables load` | -| **Refresh upstream** | `datasets refresh` (URL/query sources) | Replace via `tables load` again | +| **Parquet** | No | **Only** parquet on `tables load` | +| **Refresh upstream** | `views refresh` (query sources) | Replace via `tables load` again | -**Rule of thumb:** CSV/JSON or “upload a file from a URL” → **datasets**. Parquet catalog you control as **`mydb.public.orders`** → **databases**. +**Rule of thumb:** SQL-query snapshot → **views**. Parquet catalog you control as **`mydb.public.orders`** → **databases**. -### Workflow: dataset upload and query +### Workflow: view creation and query 1. Authenticate and set workspace (`hotdata auth`, `hotdata workspaces set` if needed). -2. Create the dataset (one source): +2. Create the view: ```bash - hotdata datasets create --label "Orders" --file ./orders.csv - # or: --url "https://example.com/orders.parquet" - # or: --sql "SELECT ..." # materialize from a query + hotdata views create --name orders --sql “SELECT ...” + # or: --query-id # materialize from a saved query ``` -3. Note the printed **`full_name`** (e.g. `datasets.main.orders`) — do not assume `datasets.main`. -4. Inspect if needed: `hotdata datasets list`, `hotdata datasets `. +3. Note the printed **`full_name`** (e.g. `views.main.orders`) — do not assume `views.main`. +4. Inspect if needed: `hotdata views list`, `hotdata views `. 5. Query: ```bash - hotdata query "SELECT count(*) FROM datasets.main.orders" + hotdata query “SELECT count(*) FROM views.main.orders” ``` ### Workflow: managed database (parquet) @@ -137,7 +136,7 @@ Both land queryable tables in the workspace; the path depends on **format** and hotdata query "SELECT count(*) FROM sales.public.orders" ``` -For **Chain** materializations into datasets or databases, see **`hotdata-analytics`**. +For **Chain** materializations into views or databases, see **`hotdata-analytics`**. --- @@ -165,8 +164,8 @@ hotdata connections list hotdata connections refresh # after DDL / stale remote metadata hotdata tables list hotdata tables list --connection-id -hotdata datasets list -hotdata datasets +hotdata views list +hotdata views hotdata databases list ``` @@ -174,24 +173,24 @@ Use `hotdata tables list` for discovery; do not query `information_schema` for t --- -## Sandboxes and datasets +## Sandboxes and views -Use this when work is isolated in a **sandbox** (exploratory runs, ephemeral datasets). +Use this when work is isolated in a **sandbox** (exploratory runs, ephemeral views). -**Active sandbox vs `sandbox run`:** After `sandbox new` or `sandbox set`, run **`datasets create`**, **`query`**, etc. **directly**. **`sandbox run `** (no id before `run`) **always creates a new sandbox**. +**Active sandbox vs `sandbox run`:** After `sandbox new` or `sandbox set`, run **`views create`**, **`query`**, etc. **directly**. **`sandbox run `** (no id before `run`) **always creates a new sandbox**. -**Qualified names:** Workspace datasets → **`datasets.main.
`**. Sandbox datasets → **`datasets..
`**. Use **`full_name`** from create or **FULL NAME** from `datasets list`. +**Qualified names:** Workspace views → **`views.main.
`**. Sandbox views → **`views..
`**. Use **`full_name`** from create or **FULL NAME** from `views list`. **Access:** Sandbox-only tables need active sandbox config or **`hotdata sandbox run …`**. **SQL:** Quote mixed-case columns with double quotes. -**Listing:** `datasets list` returns all workspace datasets; use **FULL NAME** to spot sandbox vs `main` rows. +**Listing:** `views list` returns all workspace views; use **FULL NAME** to spot sandbox vs `main` rows. --- ## Cross-cutting - **Workspace:** Active workspace or `--workspace-id`. **`hotdata queries`** uses the active workspace only (no `--workspace-id`). -- **Jobs:** `hotdata jobs list` / `jobs ` for async refreshes, dataset refresh, and index builds. +- **Jobs:** `hotdata jobs list` / `jobs ` for async refreshes, view refresh, and index builds. - **Discovery:** `hotdata tables list` — not `query` on `information_schema`. diff --git a/src/command.rs b/src/command.rs index 82d15ae..2440689 100644 --- a/src/command.rs +++ b/src/command.rs @@ -8,23 +8,6 @@ pub enum Commands { command: Option, }, - /// Derived views — virtual SQL tables built from queries over your data - Datasets { - /// Dataset ID to show details - id: Option, - - /// Workspace ID (defaults to first workspace from login) - #[arg(long, short = 'w', global = true)] - workspace_id: Option, - - /// Output format (used with dataset ID) - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - - #[command(subcommand)] - command: Option, - }, - /// Execute a SQL query, or check status of a running query Query { /// SQL query string (omit when using a subcommand) @@ -73,6 +56,23 @@ pub enum Commands { command: Option, }, + /// SQL-derived views materialized from queries or saved queries + Views { + /// View ID to show details + id: Option, + + /// Workspace ID (defaults to first workspace from login) + #[arg(long, short = 'w', global = true)] + workspace_id: Option, + + /// Output format (used with view ID) + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + + #[command(subcommand)] + command: Option, + }, + /// Managed databases you create and populate with tables (parquet uploads) Databases { /// Database id or name (omit to use a subcommand) @@ -173,8 +173,15 @@ pub enum Commands { #[arg(long, value_parser = ["vector", "bm25"])] r#type: Option, - /// Table to search (`connection.table` or `connection.schema.table`). - /// Schema defaults to `public` when omitted. + /// Catalog (database id or name) to search in. + #[arg(long)] + catalog: String, + + /// Schema to search in (default: public) + #[arg(long)] + schema: Option, + + /// Table to search #[arg(long)] table: String, @@ -328,28 +335,29 @@ pub enum IndexesCommands { output: String, }, - /// Create an index on a table or dataset. - /// - /// For connection-scoped indexes, pass the table and columns using bracket notation: - /// `connection.table[col1,col2]` or `connection.schema.table[col1,col2]` - /// (schema defaults to `public` when omitted) - /// - /// For dataset-scoped indexes, use `--dataset-id` with `--columns`. + /// Create an index on a table Create { - /// Table and columns to index: `connection.table[col1,col2]` - /// or `connection.schema.table[col1,col2]`. Schema defaults to `public`. - /// - /// Quote the argument to prevent shell glob expansion: - /// `hotdata indexes create 'airbnb.listings[description]' --type bm25` - #[arg(conflicts_with = "dataset_id")] - target: Option, + /// Catalog (database id or name) for the table to index. + #[arg(long, conflicts_with = "dataset_id", required_unless_present = "dataset_id")] + catalog: Option, + + /// Schema for the table to index (default: public) + #[arg(long, conflicts_with = "dataset_id")] + schema: Option, + + /// Table name to index + #[arg(long = "table", conflicts_with = "dataset_id")] + table_name: Option, + + /// Column to index + #[arg(long)] + column: Option, - /// Dataset ID (alternative scope to the positional target) - #[arg(long, conflicts_with = "target")] + /// Dataset ID (alternative scope — use with --columns) + #[arg(long, conflicts_with_all = ["catalog", "table_name"])] dataset_id: Option, - /// Columns to index (comma-separated). Required with --dataset-id; - /// for connection scope use bracket notation in the target instead. + /// Columns to index (comma-separated). Required with --dataset-id. #[arg(long)] columns: Option, @@ -445,8 +453,37 @@ pub enum JobsCommands { } #[derive(Subcommand)] -pub enum DatasetsCommands { - /// List all datasets in a workspace +pub enum WorkspaceCommands { + /// List all workspaces + List { + /// Output format + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + }, + + /// Set the default workspace + Set { + /// Workspace ID to set as default (omit for interactive selection) + workspace_id: Option, + }, +} + +#[derive(Subcommand)] +pub enum ConnectionsCreateCommands { + /// List available connection types, or get details for a specific type + List { + /// Connection type name (e.g. postgres, mysql); omit to list all + name: Option, + + /// Output format + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + }, +} + +#[derive(Subcommand)] +pub enum ViewsCommands { + /// List all views in a workspace List { /// Maximum number of results (default: 100, max: 1000) #[arg(long)] @@ -461,9 +498,9 @@ pub enum DatasetsCommands { output: String, }, - /// Create a derived view from a SQL query or saved query + /// Create a view from a SQL query or saved query Create { - /// SQL table name the dataset is addressable as (e.g. my_view) + /// SQL table name the view is addressable as (e.g. my_view) #[arg(long)] name: String, @@ -471,11 +508,11 @@ pub enum DatasetsCommands { #[arg(long)] description: Option, - /// SQL query to create the dataset from + /// SQL query to create the view from #[arg(long, conflicts_with = "query_id", required_unless_present = "query_id")] sql: Option, - /// Saved query ID to create the dataset from + /// Saved query ID to create the view from #[arg(long, conflicts_with = "sql", required_unless_present = "sql")] query_id: Option, @@ -484,9 +521,9 @@ pub enum DatasetsCommands { output: String, }, - /// Update a dataset's description and/or name + /// Update a view's description and/or name Update { - /// Dataset ID + /// View ID id: String, /// New display label @@ -502,9 +539,9 @@ pub enum DatasetsCommands { output: String, }, - /// Refresh a dataset by re-running its source (URL fetch or saved query) and creating a new version + /// Refresh a view by re-running its source query and creating a new version Refresh { - /// Dataset ID + /// View ID id: String, /// Submit as a background job @@ -513,35 +550,6 @@ pub enum DatasetsCommands { }, } -#[derive(Subcommand)] -pub enum WorkspaceCommands { - /// List all workspaces - List { - /// Output format - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - }, - - /// Set the default workspace - Set { - /// Workspace ID to set as default (omit for interactive selection) - workspace_id: Option, - }, -} - -#[derive(Subcommand)] -pub enum ConnectionsCreateCommands { - /// List available connection types, or get details for a specific type - List { - /// Connection type name (e.g. postgres, mysql); omit to list all - name: Option, - - /// Output format - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - }, -} - #[derive(Subcommand)] pub enum DatabasesCommands { /// List managed databases in the workspace @@ -601,15 +609,24 @@ pub enum DatabasesCommands { /// Delete a managed database and its tables Delete { - /// Database name or connection ID + /// Database name or ID name_or_id: String, }, - /// Load a parquet file into a table using dot notation: `database.table` or `database.schema.table` + /// Load a parquet file into a managed database table Load { - /// Table to load into: `database.table` or `database.schema.table`. - /// Schema defaults to `public` when omitted. - target: String, + /// Table name to load into + #[arg(long, required = true)] + table: String, + + /// Catalog (database name) to load into. Defaults to the current database set via + /// `databases set`. Required when no current database is configured. + #[arg(long)] + catalog: Option, + + /// Schema to load into (default: public) + #[arg(long)] + schema: Option, /// Path to a local parquet file to upload and load #[arg(long, conflicts_with_all = ["upload_id", "url"])] diff --git a/src/connections.rs b/src/connections.rs index 135663f..3a5be6d 100644 --- a/src/connections.rs +++ b/src/connections.rs @@ -157,37 +157,6 @@ struct ListResponse { connections: Vec, } -/// Resolve a connection name or ID to a connection ID, exiting on failure. -/// -/// If `name_or_id` looks like a raw connection ID (starts with "conn"), tries -/// `GET /connections/{id}` directly first to avoid listing the full workspace. -/// Falls back to listing and matching by name on a 404 or when given a plain name. -pub fn resolve_connection_id(api: &ApiClient, name_or_id: &str) -> String { - use crossterm::style::Stylize; - - if name_or_id.starts_with("conn") { - let (status, _) = api.get_raw(&format!("/connections/{name_or_id}")); - if status.is_success() { - return name_or_id.to_string(); - } - } - - let body: ListResponse = api.get("/connections"); - match body - .connections - .iter() - .find(|c| c.id == name_or_id || c.name == name_or_id) - { - Some(conn) => conn.id.clone(), - None => { - eprintln!( - "{}", - format!("error: no connection named or with id '{name_or_id}'").red() - ); - std::process::exit(1); - } - } -} pub fn get(workspace_id: &str, connection_id: &str, format: &str) { let api = ApiClient::new(Some(workspace_id)); diff --git a/src/databases.rs b/src/databases.rs index 8fe6c07..6f50a20 100644 --- a/src/databases.rs +++ b/src/databases.rs @@ -25,6 +25,7 @@ pub struct Database { #[serde(default)] pub name: Option, pub default_connection_id: String, + pub default_catalog: String, #[serde(default)] attachments: Vec, } @@ -215,11 +216,11 @@ pub fn is_parquet_path(path: &str) -> bool { || Path::new(path).extension().and_then(|e| e.to_str()) == Some("parquet") } -fn table_rows(tables: Vec) -> Vec { +fn table_rows(catalog: &str, tables: Vec) -> Vec { tables .into_iter() .map(|t| TableRow { - full_name: format!("default.{}.{}", t.schema, t.table), + full_name: format!("{catalog}.{}.{}", t.schema, t.table), schema: t.schema, table: t.table, synced: t.synced, @@ -263,7 +264,7 @@ fn upload_parquet_file(api: &ApiClient, path: &str) -> String { if !is_parquet_path(path) { eprintln!( "error: managed table loads require a parquet file (got '{}'). \ - Convert your data to parquet or use `hotdata datasets create` for CSV/JSON.", + Convert your data to parquet before loading.", path ); std::process::exit(1); @@ -607,12 +608,12 @@ pub fn create( format!( concat!( "Load a table:\n", - " hotdata databases load --file {}.\n", + " hotdata databases load --catalog {} --table --file \n", "\nQuery with:\n", " hotdata query --database {} \"SELECT * FROM {}.public.
LIMIT 10\"\n", "\n Tip: column names are case-sensitive — wrap uppercase names in double quotes", ), - result.id, result.id, catalog + catalog, result.id, catalog ) .dark_grey() ); @@ -624,16 +625,12 @@ pub fn create( pub fn set(workspace_id: &str, id: &str) { use crossterm::style::Stylize; let api = ApiClient::new(Some(workspace_id)); - let encoded = urlencoding::encode(id); - if api.get_none_if_not_found::(&format!("/databases/{encoded}")).is_none() { - eprintln!("{}", format!("error: no database with id '{id}'").red()); - std::process::exit(1); - } - if let Err(e) = crate::config::save_current_database("default", workspace_id, id) { + let db = resolve_database(&api, id); + if let Err(e) = crate::config::save_current_database("default", workspace_id, &db.id) { eprintln!("{}", format!("error saving current database: {e}").red()); std::process::exit(1); } - println!("{}", format!("Current database set to {id}").green()); + println!("{}", format!("Current database set to {}", db.id).green()); } fn resolve_current_database(provided: Option<&str>, workspace_id: &str) -> String { @@ -680,7 +677,7 @@ pub fn tables_list(workspace_id: &str, database: Option<&str>, schema: Option<&s let db = resolve_database(&api, &database); let tables = collect_tables(&api, &db.default_connection_id, schema); - let rows = table_rows(tables); + let rows = table_rows(&db.default_catalog, tables); match format { "json" => println!("{}", serde_json::to_string_pretty(&rows).unwrap()), @@ -769,7 +766,7 @@ pub fn tables_load( } }; - let full_name = format!("default.{}.{}", result.schema_name, result.table_name); + let full_name = format!("{}.{}.{}", db.default_catalog, result.schema_name, result.table_name); println!("{}", "Table loaded".green()); println!("full_name: {}", full_name.clone().green()); println!("rows: {}", result.row_count); @@ -889,7 +886,7 @@ mod tests { fn full_detail(id: &str, name: &str, conn_id: &str) -> String { format!( - r#"{{"id":"{id}","name":"{name}","default_connection_id":"{conn_id}","attachments":[]}}"# + r#"{{"id":"{id}","name":"{name}","default_connection_id":"{conn_id}","default_catalog":"default","attachments":[]}}"# ) } @@ -1005,8 +1002,8 @@ mod tests { } #[test] - fn table_rows_uses_default_prefix() { - let rows = table_rows(vec![InfoTable { + fn table_rows_uses_catalog_prefix() { + let rows = table_rows("mydb", vec![InfoTable { connection: "ignored".into(), schema: "public".into(), table: "orders".into(), @@ -1014,7 +1011,7 @@ mod tests { last_sync: Some("2026-05-19T00:00:00Z".into()), }]); assert_eq!(rows.len(), 1); - assert_eq!(rows[0].full_name, "default.public.orders"); + assert_eq!(rows[0].full_name, "mydb.public.orders"); assert!(rows[0].synced); } diff --git a/src/indexes.rs b/src/indexes.rs index 2465b2b..37ce35f 100644 --- a/src/indexes.rs +++ b/src/indexes.rs @@ -219,18 +219,12 @@ pub fn infer_for_search( let api = ApiClient::new(Some(workspace_id)); - // Resolve connection name → ID + // Resolve connection name → ID, or treat as a raw ID when name lookup fails. let conn_map = connection_lookup(&api); - let connection_id = match conn_map.get(connection_name) { - Some(id) => id.clone(), - None => { - eprintln!( - "{}", - format!("Connection '{}' not found.", connection_name).red() - ); - std::process::exit(1); - } - }; + let connection_id = conn_map + .get(connection_name) + .cloned() + .unwrap_or_else(|| connection_name.to_string()); // Fetch indexes for this table let indexes = list_one_table(&api, &connection_id, schema, table); diff --git a/src/main.rs b/src/main.rs index 4c3c5c8..e998dbf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,7 +7,6 @@ mod connections_new; mod context; mod database_session; mod databases; -mod datasets; mod embedding_providers; mod indexes; mod jobs; @@ -22,15 +21,16 @@ mod table; mod tables; mod update; mod util; +mod views; mod workspace; use anstyle::AnsiColor; use clap::{Parser, builder::Styles}; use command::{ AuthCommands, Commands, ConnectionsCommands, ConnectionsCreateCommands, ContextCommands, - DatabaseTablesCommands, DatabasesCommands, DatasetsCommands, EmbeddingProvidersCommands, + DatabaseTablesCommands, DatabasesCommands, EmbeddingProvidersCommands, IndexesCommands, JobsCommands, QueriesCommands, QueryCommands, ResultsCommands, - SandboxCommands, SkillCommands, TablesCommands, WorkspaceCommands, + SandboxCommands, SkillCommands, TablesCommands, ViewsCommands, WorkspaceCommands, }; #[derive(Parser)] @@ -201,74 +201,6 @@ fn main() { Some(AuthCommands::Status) => auth::status("default"), Some(AuthCommands::Logout) => auth::logout("default"), }, - Commands::Datasets { - id, - workspace_id, - output, - command, - } => { - let workspace_id = resolve_workspace(workspace_id); - if let Some(id) = id { - datasets::get(&id, &workspace_id, &output) - } else { - match command { - Some(DatasetsCommands::List { - limit, - offset, - output, - }) => datasets::list(&workspace_id, limit, offset, &output), - Some(DatasetsCommands::Create { - name, - description, - sql, - query_id, - output, - }) => { - if let Some(sql) = sql { - datasets::create_from_query( - &workspace_id, - &sql, - description.as_deref(), - &name, - &output, - ) - } else { - datasets::create_from_saved_query( - &workspace_id, - query_id.as_deref().unwrap_or_else(|| unreachable!("clap enforces --sql or --query-id")), - description.as_deref(), - &name, - &output, - ) - } - } - Some(DatasetsCommands::Update { - id, - description, - name, - output, - }) => datasets::update( - &id, - &workspace_id, - description.as_deref(), - name.as_deref(), - &output, - ), - Some(DatasetsCommands::Refresh { id, r#async }) => { - datasets::refresh(&workspace_id, &id, r#async) - } - None => { - use clap::CommandFactory; - let mut cmd = Cli::command(); - cmd.build(); - cmd.find_subcommand_mut("datasets") - .unwrap() - .print_help() - .unwrap(); - } - } - } - } Commands::Query { sql, workspace_id, @@ -282,11 +214,15 @@ fn main() { Some(QueryCommands::Status { id }) => query::poll(&id, &workspace_id, &output), None => match sql { Some(sql) => { + let resolved_db = database.as_deref().map(|d| { + let api = api::ApiClient::new(Some(&workspace_id)); + databases::resolve_database(&api, d).id + }); query::execute( &sql, &workspace_id, connection.as_deref(), - database.as_deref(), + resolved_db.as_deref(), &output, ) } @@ -389,6 +325,74 @@ fn main() { } } } + Commands::Views { + id, + workspace_id, + output, + command, + } => { + let workspace_id = resolve_workspace(workspace_id); + if let Some(id) = id { + views::get(&id, &workspace_id, &output) + } else { + match command { + Some(ViewsCommands::List { + limit, + offset, + output, + }) => views::list(&workspace_id, limit, offset, &output), + Some(ViewsCommands::Create { + name, + description, + sql, + query_id, + output, + }) => { + if let Some(sql) = sql { + views::create_from_query( + &workspace_id, + &sql, + description.as_deref(), + &name, + &output, + ) + } else { + views::create_from_saved_query( + &workspace_id, + query_id.as_deref().unwrap_or_else(|| unreachable!("clap enforces --sql or --query-id")), + description.as_deref(), + &name, + &output, + ) + } + } + Some(ViewsCommands::Update { + id, + description, + name, + output, + }) => views::update( + &id, + &workspace_id, + description.as_deref(), + name.as_deref(), + &output, + ), + Some(ViewsCommands::Refresh { id, r#async }) => { + views::refresh(&workspace_id, &id, r#async) + } + None => { + use clap::CommandFactory; + let mut cmd = Cli::command(); + cmd.build(); + cmd.find_subcommand_mut("views") + .unwrap() + .print_help() + .unwrap(); + } + } + } + } Commands::Databases { name_or_id, workspace_id, @@ -451,17 +455,20 @@ fn main() { databases::delete(&workspace_id, &name_or_id) } Some(DatabasesCommands::Load { - target, + table, + catalog, + schema, file, url, upload_id, }) => { - let (database, schema, table) = parse_db_target(&target); + let resolved_schema = + schema.unwrap_or_else(|| "public".to_string()); databases::tables_load( &workspace_id, - Some(database.as_str()), + catalog.as_deref(), &table, - Some(schema.as_str()), + Some(resolved_schema.as_str()), file.as_deref(), url.as_deref(), upload_id.as_deref(), @@ -660,7 +667,10 @@ fn main() { &output, ), IndexesCommands::Create { - target, + catalog, + schema, + table_name, + column, dataset_id, columns, name, @@ -674,31 +684,14 @@ fn main() { } => { let api = api::ApiClient::new(Some(&workspace_id)); let (scope, resolved_columns, auto_name) = - match (target.as_deref(), dataset_id.as_deref()) { - (Some(tgt), None) => { - let (conn_name, schema, table, cols) = - parse_index_target(tgt); - let conn_id = - connections::resolve_connection_id(&api, &conn_name); - let auto = format!( - "{table}_{cols}_{type}", - cols = cols.join("_"), - type = r#type - ); - ( - (conn_id, schema, table), - cols.join(","), - auto, - ) - } - (None, Some(did)) => { - let cols = - columns.as_deref().unwrap_or_else(|| { - eprintln!( - "error: --columns is required with --dataset-id" - ); - std::process::exit(1); - }); + match dataset_id.as_deref() { + Some(did) => { + let cols = columns.as_deref().unwrap_or_else(|| { + eprintln!( + "error: --columns is required with --dataset-id" + ); + std::process::exit(1); + }); let auto = format!( "dataset_{cols}_{type}", cols = cols.replace(',', "_"), @@ -710,23 +703,34 @@ fn main() { auto, ) } - _ => { - eprintln!( - "error: provide either (e.g. airbnb.listings[col1,col2]) or --dataset-id with --columns" - ); - std::process::exit(1); + None => { + let tbl = table_name.unwrap_or_else(|| { + eprintln!("error: --table is required"); + std::process::exit(1); + }); + let col = column.or(columns).unwrap_or_else(|| { + eprintln!("error: --column is required"); + std::process::exit(1); + }); + let sch = schema.unwrap_or_else(|| "public".to_string()); + let cat = catalog.unwrap(); + let db = databases::resolve_database(&api, &cat); + let conn_id = db.default_connection_id; + let auto = + format!("{tbl}_{col}_{type}", type = r#type); + ((conn_id, sch, tbl), col, auto) } }; let index_name = name.unwrap_or(auto_name); let is_dataset = dataset_id.is_some(); - let (conn_id, schema, table) = scope; + let (conn_id, idx_schema, idx_table) = scope; let resolved_scope = if is_dataset { indexes::IndexScope::Dataset { dataset_id: &conn_id } } else { indexes::IndexScope::Connection { connection_id: &conn_id, - schema: &schema, - table: &table, + schema: &idx_schema, + table: &idx_table, } }; indexes::create( @@ -827,6 +831,8 @@ fn main() { Commands::Search { query, r#type, + catalog, + schema, table, column, select, @@ -836,22 +842,16 @@ fn main() { } => { let workspace_id = resolve_workspace(workspace_id); - // Parse `connection.table` or `connection.schema.table`. - // Schema defaults to `public` when omitted. - let parts: Vec<&str> = table.splitn(4, '.').collect(); - let (conn_name, schema, table_name) = match parts.as_slice() { - [conn, schema, tbl] => { - (conn.to_string(), schema.to_string(), tbl.to_string()) - } - [conn, tbl] => (conn.to_string(), "public".to_string(), tbl.to_string()), - _ => { - eprintln!( - "error: --table must be 'connection.table' or 'connection.schema.table'" - ); - std::process::exit(1); - } - }; - let normalized_table = format!("{}.{}.{}", conn_name, schema, table_name); + let api = api::ApiClient::new(Some(&workspace_id)); + let db = databases::resolve_database(&api, &catalog); + let resolved_schema = schema.unwrap_or_else(|| "public".to_string()); + let db_id = db.id.clone(); + let conn_id = db.default_connection_id; + + // Both search types run as SQL with X-Database-Id; the server rewrites + // catalog aliases (like "default") to the real connection name before execution. + let bm25_table = format!("{}.{}.{}", db.default_catalog, resolved_schema, table); + let vector_table = format!("{}.{}.{}", db.default_catalog, resolved_schema, table); // Infer --type and --column from the table's indexes when either is omitted. let (resolved_type, resolved_column) = @@ -860,9 +860,9 @@ fn main() { } else { let (inferred_type, inferred_column) = indexes::infer_for_search( &workspace_id, - &conn_name, - &schema, - &table_name, + &conn_id, + &resolved_schema, + &table, r#type.as_deref(), column.as_deref(), ); @@ -883,7 +883,7 @@ fn main() { format!( "SELECT {} FROM bm25_search('{}', '{}', '{}') ORDER BY score DESC LIMIT {}", bm25_columns, - normalized_table.replace('\'', "''"), + bm25_table.replace('\'', "''"), resolved_column.replace('\'', "''"), query.replace('\'', "''"), limit, @@ -896,12 +896,12 @@ fn main() { select_cols, resolved_column, query.replace('\'', "''"), - normalized_table, + vector_table, limit, ), _ => unreachable!(), }; - query::execute(&sql, &workspace_id, None, None, &output) + query::execute(&sql, &workspace_id, None, Some(db_id.as_str()), &output) } Commands::Queries { id, @@ -1057,122 +1057,6 @@ fn main() { update::maybe_print_update_notice(update_handle); } -/// Parse a database target like `airbnb.listings` or `airbnb.public.listings` -/// into `(database, schema, table)`. Schema defaults to `public`. -fn parse_db_target(target: &str) -> (String, String, String) { - let parts: Vec<&str> = target.splitn(4, '.').collect(); - match parts.as_slice() { - [db, tbl] => (db.to_string(), "public".to_string(), tbl.to_string()), - [db, schema, tbl] => (db.to_string(), schema.to_string(), tbl.to_string()), - _ => { - eprintln!( - "error: target must be 'database.table' or 'database.schema.table'" - ); - std::process::exit(1); - } - } -} - -/// Parse an index target like `airbnb.listings[col1,col2]` or -/// `airbnb.public.listings[col1,col2]` into `(conn_name, schema, table, columns)`. -/// Schema defaults to `public` when only two dot-parts are given. -fn parse_index_target(target: &str) -> (String, String, String, Vec) { - let Some(bracket_pos) = target.find('[') else { - eprintln!( - "error: target must include columns in brackets, e.g. airbnb.listings[col1,col2]" - ); - std::process::exit(1); - }; - if !target.ends_with(']') { - eprintln!( - "error: target bracket is not closed — use e.g. 'airbnb.listings[col1,col2]'" - ); - std::process::exit(1); - } - let table_part = &target[..bracket_pos]; - let cols_raw = &target[bracket_pos + 1..target.len() - 1]; - - let parts: Vec<&str> = table_part.splitn(4, '.').collect(); - let (conn, schema, table) = match parts.as_slice() { - [c, t] => (c.to_string(), "public".to_string(), t.to_string()), - [c, s, t] => (c.to_string(), s.to_string(), t.to_string()), - _ => { - eprintln!( - "error: target must be 'connection.table[cols]' or 'connection.schema.table[cols]'" - ); - std::process::exit(1); - } - }; - - let columns: Vec = cols_raw - .split(',') - .map(|s| s.trim().to_string()) - .filter(|s| !s.is_empty()) - .collect(); - - if columns.is_empty() { - eprintln!("error: no columns specified in brackets"); - std::process::exit(1); - } - - (conn, schema, table, columns) -} - -#[cfg(test)] -mod tests { - use super::*; - - // --- parse_db_target --- - - #[test] - fn db_target_two_parts_defaults_schema_to_public() { - let (db, schema, table) = parse_db_target("airbnb.listings"); - assert_eq!(db, "airbnb"); - assert_eq!(schema, "public"); - assert_eq!(table, "listings"); - } - - #[test] - fn db_target_three_parts_uses_explicit_schema() { - let (db, schema, table) = parse_db_target("airbnb.staging.listings"); - assert_eq!(db, "airbnb"); - assert_eq!(schema, "staging"); - assert_eq!(table, "listings"); - } - - // --- parse_index_target --- - - #[test] - fn index_target_two_parts_defaults_schema_to_public() { - let (conn, schema, table, cols) = parse_index_target("airbnb.listings[description]"); - assert_eq!(conn, "airbnb"); - assert_eq!(schema, "public"); - assert_eq!(table, "listings"); - assert_eq!(cols, vec!["description"]); - } - - #[test] - fn index_target_three_parts_uses_explicit_schema() { - let (conn, schema, table, cols) = - parse_index_target("airbnb.public.listings[name,description]"); - assert_eq!(conn, "airbnb"); - assert_eq!(schema, "public"); - assert_eq!(table, "listings"); - assert_eq!(cols, vec!["name", "description"]); - } - - #[test] - fn index_target_multiple_columns() { - let (_, _, _, cols) = parse_index_target("db.tbl[a,b,c]"); - assert_eq!(cols, vec!["a", "b", "c"]); - } - - #[test] - fn index_target_trims_column_whitespace() { - let (_, _, _, cols) = parse_index_target("db.tbl[a, b]"); - assert_eq!(cols, vec!["a", "b"]); - } -} pub fn get_styles() -> clap::builder::Styles { Styles::styled() diff --git a/src/datasets.rs b/src/views.rs similarity index 72% rename from src/datasets.rs rename to src/views.rs index 735031e..9afbe62 100644 --- a/src/datasets.rs +++ b/src/views.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; use serde_json::json; #[derive(Deserialize, Serialize)] -struct Dataset { +struct View { id: String, label: String, #[serde(default = "default_schema")] @@ -28,7 +28,8 @@ struct CreateResponse { #[derive(Deserialize)] struct ListResponse { - datasets: Vec, + #[serde(rename = "datasets")] + views: Vec, count: u64, has_more: bool, } @@ -41,7 +42,7 @@ struct Column { } #[derive(Deserialize, Serialize)] -struct DatasetDetail { +struct ViewDetail { id: String, label: String, schema_name: String, @@ -56,9 +57,9 @@ struct DatasetDetail { struct UpdateResponse { id: String, label: String, - // Not currently in runtimedb's UpdateDatasetResponse; kept Optional so we - // print `full_name` only when the server actually returns the schema. - // Synthesizing "main" is wrong for sandbox-scoped datasets where + // Not currently in runtimedb's UpdateDatasetResponse (see runtimedb/src/http/models.rs). + // Kept Optional so we print `full_name` only when the server actually returns the schema. + // Synthesizing "main" is wrong for sandbox-scoped views where // schema_name == sandbox_id. #[serde(default)] schema_name: Option, @@ -70,7 +71,7 @@ struct UpdateResponse { updated_at: String, } -fn create_dataset( +fn create_view( api: &ApiClient, description: Option<&str>, name: &str, @@ -88,7 +89,7 @@ fn create_dataset( std::process::exit(1); } - let dataset: CreateResponse = match serde_json::from_str(&resp_body) { + let view: CreateResponse = match serde_json::from_str(&resp_body) { Ok(v) => v, Err(e) => { eprintln!("error parsing response: {e}"); @@ -98,15 +99,15 @@ fn create_dataset( use crossterm::style::Stylize; match format { - "json" => println!("{}", serde_json::to_string_pretty(&dataset).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&dataset).unwrap()), + "json" => println!("{}", serde_json::to_string_pretty(&view).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&view).unwrap()), "table" => { - eprintln!("{}", "Dataset created".green()); - println!("id: {}", dataset.id); - println!("label: {}", dataset.label); + eprintln!("{}", "View created".green()); + println!("id: {}", view.id); + println!("label: {}", view.label); println!( - "full_name: datasets.{}.{}", - dataset.schema_name, dataset.table_name + "full_name: views.{}.{}", + view.schema_name, view.table_name ); } _ => unreachable!(), @@ -115,7 +116,7 @@ fn create_dataset( pub fn create_from_query(workspace_id: &str, sql: &str, description: Option<&str>, name: &str, format: &str) { let api = ApiClient::new(Some(workspace_id)); - create_dataset(&api, description, name, json!({ "type": "sql_query", "sql": sql }), format); + create_view(&api, description, name, json!({ "type": "sql_query", "sql": sql }), format); } pub fn create_from_saved_query( @@ -126,7 +127,7 @@ pub fn create_from_saved_query( format: &str, ) { let api = ApiClient::new(Some(workspace_id)); - create_dataset(&api, description, name, json!({ "type": "saved_query", "saved_query_id": query_id }), format); + create_view(&api, description, name, json!({ "type": "saved_query", "saved_query_id": query_id }), format); } pub fn list(workspace_id: &str, limit: Option, offset: Option, format: &str) { @@ -139,22 +140,22 @@ pub fn list(workspace_id: &str, limit: Option, offset: Option, format: let body: ListResponse = api.get_with_params("/datasets", ¶ms); match format { - "json" => println!("{}", serde_json::to_string_pretty(&body.datasets).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&body.datasets).unwrap()), + "json" => println!("{}", serde_json::to_string_pretty(&body.views).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&body.views).unwrap()), "table" => { - if body.datasets.is_empty() { + if body.views.is_empty() { use crossterm::style::Stylize; - eprintln!("{}", "No datasets found.".dark_grey()); + eprintln!("{}", "No views found.".dark_grey()); } else { let rows: Vec> = body - .datasets + .views .iter() - .map(|d| { + .map(|v| { vec![ - d.id.clone(), - d.label.clone(), - format!("datasets.{}.{}", d.schema_name, d.table_name), - crate::util::format_date(&d.created_at), + v.id.clone(), + v.label.clone(), + format!("views.{}.{}", v.schema_name, v.table_name), + crate::util::format_date(&v.created_at), ] }) .collect(); @@ -177,26 +178,26 @@ pub fn list(workspace_id: &str, limit: Option, offset: Option, format: } } -pub fn get(dataset_id: &str, workspace_id: &str, format: &str) { +pub fn get(view_id: &str, workspace_id: &str, format: &str) { let api = ApiClient::new(Some(workspace_id)); - let d: DatasetDetail = api.get(&format!("/datasets/{dataset_id}")); + let v: ViewDetail = api.get(&format!("/datasets/{view_id}")); match format { - "json" => println!("{}", serde_json::to_string_pretty(&d).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&d).unwrap()), + "json" => println!("{}", serde_json::to_string_pretty(&v).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&v).unwrap()), "table" => { - let created_at = crate::util::format_date(&d.created_at); - let updated_at = crate::util::format_date(&d.updated_at); - println!("id: {}", d.id); - println!("label: {}", d.label); - println!("full_name: datasets.main.{}", d.table_name); - println!("source_type: {}", d.source_type); + let created_at = crate::util::format_date(&v.created_at); + let updated_at = crate::util::format_date(&v.updated_at); + println!("id: {}", v.id); + println!("label: {}", v.label); + println!("full_name: views.{}.{}", v.schema_name, v.table_name); + println!("source_type: {}", v.source_type); println!("created_at: {created_at}"); println!("updated_at: {updated_at}"); - if !d.columns.is_empty() { + if !v.columns.is_empty() { println!(); - let rows: Vec> = d + let rows: Vec> = v .columns .iter() .map(|col| { @@ -215,7 +216,7 @@ pub fn get(dataset_id: &str, workspace_id: &str, format: &str) { } pub fn update( - dataset_id: &str, + view_id: &str, workspace_id: &str, description: Option<&str>, name: Option<&str>, @@ -236,43 +237,43 @@ pub fn update( body["table_name"] = json!(n); } - let d: UpdateResponse = api.put(&format!("/datasets/{dataset_id}"), &body); + let v: UpdateResponse = api.put(&format!("/datasets/{view_id}"), &body); use crossterm::style::Stylize; - eprintln!("{}", "Dataset updated".green()); + eprintln!("{}", "View updated".green()); match format { - "json" => println!("{}", serde_json::to_string_pretty(&d).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&d).unwrap()), + "json" => println!("{}", serde_json::to_string_pretty(&v).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&v).unwrap()), "table" => { - println!("id: {}", d.id); - println!("label: {}", d.label); - match &d.schema_name { + println!("id: {}", v.id); + println!("label: {}", v.label); + match &v.schema_name { Some(schema) => { - println!("full_name: datasets.{}.{}", schema, d.table_name); + println!("full_name: views.{}.{}", schema, v.table_name); } None => { - println!("table_name: {}", d.table_name); + println!("table_name: {}", v.table_name); eprintln!( "{}", format!( - "(run `hotdata datasets {}` to see the qualified name)", - d.id + "(run `hotdata views {}` to see the qualified name)", + v.id ) .dark_grey() ); } } - println!("updated_at: {}", crate::util::format_date(&d.updated_at)); + println!("updated_at: {}", crate::util::format_date(&v.updated_at)); } _ => unreachable!(), } } -pub fn refresh(workspace_id: &str, dataset_id: &str, async_mode: bool) { +pub fn refresh(workspace_id: &str, view_id: &str, async_mode: bool) { use crossterm::style::Stylize; let mut body = json!({ - "dataset_id": dataset_id, + "dataset_id": view_id, }); if async_mode { body["async"] = json!(true); @@ -290,7 +291,7 @@ pub fn refresh(workspace_id: &str, dataset_id: &str, async_mode: bool) { if async_mode { let job_id = parsed["id"].as_str().unwrap_or("unknown"); - println!("{}", "Dataset refresh submitted.".green()); + println!("{}", "View refresh submitted.".green()); println!("job_id: {}", job_id); println!( "{}", @@ -301,11 +302,11 @@ pub fn refresh(workspace_id: &str, dataset_id: &str, async_mode: bool) { let id = parsed["id"].as_str().unwrap_or("unknown"); let version = parsed["version"].as_i64().unwrap_or(0); - let dataset_status = parsed["status"].as_str().unwrap_or(""); - println!("{}", "Dataset refresh completed.".green()); + let view_status = parsed["status"].as_str().unwrap_or(""); + println!("{}", "View refresh completed.".green()); println!( "{}", - format!(" id: {id}, version: {version}, status: {dataset_status}").dark_grey() + format!(" id: {id}, version: {version}, status: {view_status}").dark_grey() ); } @@ -331,8 +332,8 @@ mod tests { assert_eq!(resp.label, "url_test"); assert_eq!(resp.table_name, "url_test"); // The server doesn't currently send schema_name, so we don't synthesize - // one — sandbox-scoped datasets live under datasets..
, - // not datasets.main.*, and a fabricated "main" would mislead users. + // one — sandbox-scoped views live under views..
, + // not views.main.*, and a fabricated "main" would mislead users. assert!(resp.schema_name.is_none()); assert_eq!(resp.latest_version, Some(3)); assert!(resp.pinned_version.is_none());