From ebda8ff238a051ef9c3489922c57bfd19a04480e Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 27 May 2026 16:08:11 -0700 Subject: [PATCH 1/9] chore: remove datasets command Removing the datasets CLI surface for now. Deletes datasets.rs entirely and strips the Datasets variant from Commands, DatasetsCommands enum, the dispatch block in main.rs, and the stale cross-reference in the databases error message. --- src/command.rs | 86 ----------- src/databases.rs | 2 +- src/datasets.rs | 381 ----------------------------------------------- src/main.rs | 71 +-------- 4 files changed, 2 insertions(+), 538 deletions(-) delete mode 100644 src/datasets.rs diff --git a/src/command.rs b/src/command.rs index a8c33ef..8afc01e 100644 --- a/src/command.rs +++ b/src/command.rs @@ -8,23 +8,6 @@ pub enum Commands { command: Option, }, - /// Derived views — virtual SQL tables built from queries over your data - Datasets { - /// Dataset ID to show details - id: Option, - - /// Workspace ID (defaults to first workspace from login) - #[arg(long, short = 'w', global = true)] - workspace_id: Option, - - /// Output format (used with dataset ID) - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - - #[command(subcommand)] - command: Option, - }, - /// Execute a SQL query, or check status of a running query Query { /// SQL query string (omit when using a subcommand) @@ -444,75 +427,6 @@ pub enum JobsCommands { }, } -#[derive(Subcommand)] -pub enum DatasetsCommands { - /// List all datasets in a workspace - List { - /// Maximum number of results (default: 100, max: 1000) - #[arg(long)] - limit: Option, - - /// Pagination offset - #[arg(long)] - offset: Option, - - /// Output format - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - }, - - /// Create a derived view from a SQL query or saved query - Create { - /// SQL table name the dataset is addressable as (e.g. my_view) - #[arg(long)] - name: String, - - /// Human-readable display label - #[arg(long)] - description: Option, - - /// SQL query to create the dataset from - #[arg(long, conflicts_with = "query_id", required_unless_present = "query_id")] - sql: Option, - - /// Saved query ID to create the dataset from - #[arg(long, conflicts_with = "sql", required_unless_present = "sql")] - query_id: Option, - - /// Output format - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - }, - - /// Update a dataset's description and/or name - Update { - /// Dataset ID - id: String, - - /// New display label - #[arg(long)] - description: Option, - - /// New SQL table name (must be a valid identifier) - #[arg(long)] - name: Option, - - /// Output format - #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] - output: String, - }, - - /// Refresh a dataset by re-running its source (URL fetch or saved query) and creating a new version - Refresh { - /// Dataset ID - id: String, - - /// Submit as a background job - #[arg(long)] - r#async: bool, - }, -} - #[derive(Subcommand)] pub enum WorkspaceCommands { /// List all workspaces diff --git a/src/databases.rs b/src/databases.rs index 15526dd..d43497b 100644 --- a/src/databases.rs +++ b/src/databases.rs @@ -231,7 +231,7 @@ fn upload_parquet_file(api: &ApiClient, path: &str) -> String { if !is_parquet_path(path) { eprintln!( "error: managed table loads require a parquet file (got '{}'). \ - Convert your data to parquet or use `hotdata datasets create` for CSV/JSON.", + Convert your data to parquet before loading.", path ); std::process::exit(1); diff --git a/src/datasets.rs b/src/datasets.rs deleted file mode 100644 index 735031e..0000000 --- a/src/datasets.rs +++ /dev/null @@ -1,381 +0,0 @@ -use crate::api::ApiClient; -use serde::{Deserialize, Serialize}; -use serde_json::json; - -#[derive(Deserialize, Serialize)] -struct Dataset { - id: String, - label: String, - #[serde(default = "default_schema")] - schema_name: String, - table_name: String, - created_at: String, - updated_at: String, -} - -fn default_schema() -> String { - "main".to_string() -} - -#[derive(Deserialize, Serialize)] -struct CreateResponse { - id: String, - label: String, - #[serde(default = "default_schema")] - schema_name: String, - table_name: String, -} - -#[derive(Deserialize)] -struct ListResponse { - datasets: Vec, - count: u64, - has_more: bool, -} - -#[derive(Deserialize, Serialize)] -struct Column { - name: String, - data_type: String, - nullable: bool, -} - -#[derive(Deserialize, Serialize)] -struct DatasetDetail { - id: String, - label: String, - schema_name: String, - table_name: String, - source_type: String, - created_at: String, - updated_at: String, - columns: Vec, -} - -#[derive(Deserialize, Serialize)] -struct UpdateResponse { - id: String, - label: String, - // Not currently in runtimedb's UpdateDatasetResponse; kept Optional so we - // print `full_name` only when the server actually returns the schema. - // Synthesizing "main" is wrong for sandbox-scoped datasets where - // schema_name == sandbox_id. - #[serde(default)] - schema_name: Option, - table_name: String, - #[serde(default)] - latest_version: Option, - #[serde(default)] - pinned_version: Option, - updated_at: String, -} - -fn create_dataset( - api: &ApiClient, - description: Option<&str>, - name: &str, - source: serde_json::Value, - format: &str, -) { - let label = description.unwrap_or(name); - let body = json!({ "table_name": name, "label": label, "source": source }); - - let (status, resp_body) = api.post_raw("/datasets", &body); - - if !status.is_success() { - use crossterm::style::Stylize; - eprintln!("{}", crate::util::api_error(resp_body).red()); - std::process::exit(1); - } - - let dataset: CreateResponse = match serde_json::from_str(&resp_body) { - Ok(v) => v, - Err(e) => { - eprintln!("error parsing response: {e}"); - std::process::exit(1); - } - }; - - use crossterm::style::Stylize; - match format { - "json" => println!("{}", serde_json::to_string_pretty(&dataset).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&dataset).unwrap()), - "table" => { - eprintln!("{}", "Dataset created".green()); - println!("id: {}", dataset.id); - println!("label: {}", dataset.label); - println!( - "full_name: datasets.{}.{}", - dataset.schema_name, dataset.table_name - ); - } - _ => unreachable!(), - } -} - -pub fn create_from_query(workspace_id: &str, sql: &str, description: Option<&str>, name: &str, format: &str) { - let api = ApiClient::new(Some(workspace_id)); - create_dataset(&api, description, name, json!({ "type": "sql_query", "sql": sql }), format); -} - -pub fn create_from_saved_query( - workspace_id: &str, - query_id: &str, - description: Option<&str>, - name: &str, - format: &str, -) { - let api = ApiClient::new(Some(workspace_id)); - create_dataset(&api, description, name, json!({ "type": "saved_query", "saved_query_id": query_id }), format); -} - -pub fn list(workspace_id: &str, limit: Option, offset: Option, format: &str) { - let api = ApiClient::new(Some(workspace_id)); - - let params = [ - ("limit", limit.map(|l| l.to_string())), - ("offset", offset.map(|o| o.to_string())), - ]; - let body: ListResponse = api.get_with_params("/datasets", ¶ms); - - match format { - "json" => println!("{}", serde_json::to_string_pretty(&body.datasets).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&body.datasets).unwrap()), - "table" => { - if body.datasets.is_empty() { - use crossterm::style::Stylize; - eprintln!("{}", "No datasets found.".dark_grey()); - } else { - let rows: Vec> = body - .datasets - .iter() - .map(|d| { - vec![ - d.id.clone(), - d.label.clone(), - format!("datasets.{}.{}", d.schema_name, d.table_name), - crate::util::format_date(&d.created_at), - ] - }) - .collect(); - crate::table::print(&["ID", "LABEL", "FULL NAME", "CREATED AT"], &rows); - } - if body.has_more { - let next = offset.unwrap_or(0) + body.count as u32; - use crossterm::style::Stylize; - eprintln!( - "{}", - format!( - "showing {} results — use --offset {next} for more", - body.count - ) - .dark_grey() - ); - } - } - _ => unreachable!(), - } -} - -pub fn get(dataset_id: &str, workspace_id: &str, format: &str) { - let api = ApiClient::new(Some(workspace_id)); - - let d: DatasetDetail = api.get(&format!("/datasets/{dataset_id}")); - - match format { - "json" => println!("{}", serde_json::to_string_pretty(&d).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&d).unwrap()), - "table" => { - let created_at = crate::util::format_date(&d.created_at); - let updated_at = crate::util::format_date(&d.updated_at); - println!("id: {}", d.id); - println!("label: {}", d.label); - println!("full_name: datasets.main.{}", d.table_name); - println!("source_type: {}", d.source_type); - println!("created_at: {created_at}"); - println!("updated_at: {updated_at}"); - if !d.columns.is_empty() { - println!(); - let rows: Vec> = d - .columns - .iter() - .map(|col| { - vec![ - col.name.clone(), - col.data_type.clone(), - col.nullable.to_string(), - ] - }) - .collect(); - crate::table::print(&["COLUMN", "DATA TYPE", "NULLABLE"], &rows); - } - } - _ => unreachable!(), - } -} - -pub fn update( - dataset_id: &str, - workspace_id: &str, - description: Option<&str>, - name: Option<&str>, - format: &str, -) { - if description.is_none() && name.is_none() { - eprintln!("error: provide at least one of --description or --name."); - std::process::exit(1); - } - - let api = ApiClient::new(Some(workspace_id)); - - let mut body = json!({}); - if let Some(d) = description { - body["label"] = json!(d); - } - if let Some(n) = name { - body["table_name"] = json!(n); - } - - let d: UpdateResponse = api.put(&format!("/datasets/{dataset_id}"), &body); - - use crossterm::style::Stylize; - eprintln!("{}", "Dataset updated".green()); - match format { - "json" => println!("{}", serde_json::to_string_pretty(&d).unwrap()), - "yaml" => print!("{}", serde_yaml::to_string(&d).unwrap()), - "table" => { - println!("id: {}", d.id); - println!("label: {}", d.label); - match &d.schema_name { - Some(schema) => { - println!("full_name: datasets.{}.{}", schema, d.table_name); - } - None => { - println!("table_name: {}", d.table_name); - eprintln!( - "{}", - format!( - "(run `hotdata datasets {}` to see the qualified name)", - d.id - ) - .dark_grey() - ); - } - } - println!("updated_at: {}", crate::util::format_date(&d.updated_at)); - } - _ => unreachable!(), - } -} - -pub fn refresh(workspace_id: &str, dataset_id: &str, async_mode: bool) { - use crossterm::style::Stylize; - - let mut body = json!({ - "dataset_id": dataset_id, - }); - if async_mode { - body["async"] = json!(true); - } - - let api = ApiClient::new(Some(workspace_id)); - let (status, resp_body) = api.post_raw("/refresh", &body); - - if !status.is_success() { - eprintln!("{}", crate::util::api_error(resp_body).red()); - std::process::exit(1); - } - - let parsed: serde_json::Value = serde_json::from_str(&resp_body).unwrap_or_default(); - - if async_mode { - let job_id = parsed["id"].as_str().unwrap_or("unknown"); - println!("{}", "Dataset refresh submitted.".green()); - println!("job_id: {}", job_id); - println!( - "{}", - format!("Use 'hotdata jobs {}' to check status.", job_id).dark_grey() - ); - return; - } - - let id = parsed["id"].as_str().unwrap_or("unknown"); - let version = parsed["version"].as_i64().unwrap_or(0); - let dataset_status = parsed["status"].as_str().unwrap_or(""); - println!("{}", "Dataset refresh completed.".green()); - println!( - "{}", - format!(" id: {id}, version: {version}, status: {dataset_status}").dark_grey() - ); -} - -#[cfg(test)] -mod tests { - use super::*; - - /// Mirrors runtimedb's `UpdateDatasetResponse` (see runtimedb/src/http/models.rs). - /// The CLI must deserialize this exact shape — schema_name, source_type, - /// created_at, and columns are NOT in the response. If runtimedb's response - /// gains or loses fields, update this fixture in lockstep. - #[test] - fn update_response_deserializes_runtimedb_payload() { - let body = serde_json::json!({ - "id": "ds_abc123", - "label": "url_test", - "table_name": "url_test", - "latest_version": 3, - "updated_at": "2026-04-28T18:30:00Z", - }); - let resp: UpdateResponse = serde_json::from_value(body).unwrap(); - assert_eq!(resp.id, "ds_abc123"); - assert_eq!(resp.label, "url_test"); - assert_eq!(resp.table_name, "url_test"); - // The server doesn't currently send schema_name, so we don't synthesize - // one — sandbox-scoped datasets live under datasets.., - // not datasets.main.*, and a fabricated "main" would mislead users. - assert!(resp.schema_name.is_none()); - assert_eq!(resp.latest_version, Some(3)); - assert!(resp.pinned_version.is_none()); - } - - #[test] - fn update_response_uses_schema_name_when_server_supplies_it() { - // Forward-compat: if runtimedb later includes schema_name, we use it. - let body = serde_json::json!({ - "id": "ds_abc123", - "label": "x", - "schema_name": "sandbox_xyz", - "table_name": "x", - "updated_at": "2026-04-28T18:30:00Z", - }); - let resp: UpdateResponse = serde_json::from_value(body).unwrap(); - assert_eq!(resp.schema_name.as_deref(), Some("sandbox_xyz")); - } - - #[test] - fn update_response_handles_pinned_version() { - let body = serde_json::json!({ - "id": "ds_abc123", - "label": "x", - "table_name": "x", - "latest_version": 5, - "pinned_version": 2, - "updated_at": "2026-04-28T18:30:00Z", - }); - let resp: UpdateResponse = serde_json::from_value(body).unwrap(); - assert_eq!(resp.pinned_version, Some(2)); - } - - #[test] - fn update_response_tolerates_missing_latest_version() { - // Defensive: treat latest_version as optional in case the server omits it. - let body = serde_json::json!({ - "id": "ds_abc123", - "label": "x", - "table_name": "x", - "updated_at": "2026-04-28T18:30:00Z", - }); - let resp: UpdateResponse = serde_json::from_value(body).unwrap(); - assert!(resp.latest_version.is_none()); - } -} diff --git a/src/main.rs b/src/main.rs index e5cb8dc..669deb9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,7 +6,6 @@ mod connections; mod connections_new; mod context; mod databases; -mod datasets; mod embedding_providers; mod indexes; mod jobs; @@ -27,7 +26,7 @@ use anstyle::AnsiColor; use clap::{Parser, builder::Styles}; use command::{ AuthCommands, Commands, ConnectionsCommands, ConnectionsCreateCommands, ContextCommands, - DatabaseTablesCommands, DatabasesCommands, DatasetsCommands, EmbeddingProvidersCommands, + DatabaseTablesCommands, DatabasesCommands, EmbeddingProvidersCommands, IndexesCommands, JobsCommands, QueriesCommands, QueryCommands, ResultsCommands, SandboxCommands, SkillCommands, TablesCommands, WorkspaceCommands, }; @@ -195,74 +194,6 @@ fn main() { Some(AuthCommands::Status) => auth::status("default"), Some(AuthCommands::Logout) => auth::logout("default"), }, - Commands::Datasets { - id, - workspace_id, - output, - command, - } => { - let workspace_id = resolve_workspace(workspace_id); - if let Some(id) = id { - datasets::get(&id, &workspace_id, &output) - } else { - match command { - Some(DatasetsCommands::List { - limit, - offset, - output, - }) => datasets::list(&workspace_id, limit, offset, &output), - Some(DatasetsCommands::Create { - name, - description, - sql, - query_id, - output, - }) => { - if let Some(sql) = sql { - datasets::create_from_query( - &workspace_id, - &sql, - description.as_deref(), - &name, - &output, - ) - } else { - datasets::create_from_saved_query( - &workspace_id, - query_id.as_deref().unwrap_or_else(|| unreachable!("clap enforces --sql or --query-id")), - description.as_deref(), - &name, - &output, - ) - } - } - Some(DatasetsCommands::Update { - id, - description, - name, - output, - }) => datasets::update( - &id, - &workspace_id, - description.as_deref(), - name.as_deref(), - &output, - ), - Some(DatasetsCommands::Refresh { id, r#async }) => { - datasets::refresh(&workspace_id, &id, r#async) - } - None => { - use clap::CommandFactory; - let mut cmd = Cli::command(); - cmd.build(); - cmd.find_subcommand_mut("datasets") - .unwrap() - .print_help() - .unwrap(); - } - } - } - } Commands::Query { sql, workspace_id, From 4a45f5df6be3b040aad69654611cccc8c953d1ce Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 27 May 2026 16:28:40 -0700 Subject: [PATCH 2/9] feat: rename datasets command to views Renames the `hotdata datasets` CLI command to `hotdata views` with a new `src/views.rs` module. The command and all user-facing terminology (help text, output messages, SQL prefix `views.`, skill docs) now use "view" / "views". Server-side API paths remain unchanged (`/datasets`). - Add `src/views.rs` (renamed from deleted `datasets.rs`) - Add `Views` / `ViewsCommands` to `command.rs` - Wire dispatch in `main.rs` - Update README, SKILL.md, WORKFLOWS.md, DATA_MODEL.template.md, MODEL_BUILD.md across hotdata and hotdata-analytics skills --- README.md | 27 +- skills/hotdata-analytics/SKILL.md | 16 +- .../hotdata-analytics/references/WORKFLOWS.md | 24 +- skills/hotdata/SKILL.md | 77 ++-- .../hotdata/references/DATA_MODEL.template.md | 10 +- skills/hotdata/references/MODEL_BUILD.md | 10 +- skills/hotdata/references/WORKFLOWS.md | 69 ++-- src/command.rs | 86 ++++ src/main.rs | 71 +++- src/views.rs | 382 ++++++++++++++++++ 10 files changed, 651 insertions(+), 121 deletions(-) create mode 100644 src/views.rs diff --git a/README.md b/README.md index af968d5..5525399 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ API key priority (lowest to highest): config file → `HOTDATA_API_KEY` env var | `connections` | `list`, `create`, `refresh`, `new` | Manage connections | | `databases` | `list`, `create`, `delete`, `tables` | Managed databases (create and load tables via parquet) | | `tables` | `list` | List tables and columns | -| `datasets` | `list`, `create`, `update` | Manage uploaded datasets | +| `views` | `list`, `create`, `update`, `refresh` | Manage SQL-derived views | | `context` | `list`, `show`, `pull`, `push` | Workspace Markdown context (e.g. data model `DATAMODEL`) via the context API | | `query` | | Execute a SQL query | | `queries` | `list` | Inspect query run history | @@ -146,7 +146,7 @@ hotdata databases tables delete
[--schema public] - `create` registers a managed connection (`source_type: managed`) with no external credentials. Use `--table` to declare tables up front (required before `tables load` on the current API). - `tables load` uploads a **parquet** file (or uses a staged `upload_id` from `POST /v1/files`) and publishes it as the table generation (`replace` mode). -- For CSV/JSON uploads without a managed database, use `hotdata datasets create` instead (`datasets.main.*`). +- For SQL-query materializations without a managed database, use `hotdata views create` instead (`views.main.*`). Example: @@ -167,24 +167,19 @@ hotdata tables list [--workspace-id ] [--connection-id ] [--schema ..
` — use this format in SQL queries. -## Datasets +## Views ```sh -hotdata datasets list [--workspace-id ] [--limit ] [--offset ] [--format table|json|yaml] -hotdata datasets [--workspace-id ] [--format table|json|yaml] -hotdata datasets create --file data.csv [--label "My Dataset"] [--table-name my_dataset] -hotdata datasets create --sql "SELECT ..." --label "My Dataset" -hotdata datasets create --url "https://example.com/data.parquet" --label "My Dataset" -hotdata datasets update [--label "New Label"] [--table-name new_table] -hotdata datasets refresh [--workspace-id ] [--async] +hotdata views list [--workspace-id ] [--limit ] [--offset ] [--output table|json|yaml] +hotdata views [--workspace-id ] [--output table|json|yaml] +hotdata views create --name my_view [--description "My View"] (--sql "SELECT ..." | --query-id ) +hotdata views update [--description "New Label"] [--name new_table] +hotdata views refresh [--workspace-id ] [--async] ``` -- Datasets are queryable as `datasets.main.`. -- `--file`, `--sql`, `--query-id`, and `--url` are mutually exclusive. -- `--url` imports data directly from a URL (supports csv, json, parquet). -- Format is auto-detected from file extension or content. -- Piped stdin is supported: `cat data.csv | hotdata datasets create --label "My Dataset"` -- `refresh` re-runs the dataset's source (URL fetch or saved query) and creates a new version. Not supported for upload-source datasets. +- Views are queryable as `views.main.`. +- `--sql` and `--query-id` are mutually exclusive; exactly one is required for `create`. +- `refresh` re-runs the view's source query and creates a new version. - `--async` submits the refresh as a background job and returns a job ID; poll with `hotdata jobs `. ## Workspace context diff --git a/skills/hotdata-analytics/SKILL.md b/skills/hotdata-analytics/SKILL.md index 66a22a3..b98a0fb 100644 --- a/skills/hotdata-analytics/SKILL.md +++ b/skills/hotdata-analytics/SKILL.md @@ -1,6 +1,6 @@ --- name: hotdata-analytics -description: Use this skill when the user wants OLAP-style SQL analytics in Hotdata — aggregations, GROUP BY, JOINs, reporting, exploratory queries, query run history, stored results, or materialized follow-up tables (Chain via datasets or managed databases). Activate for "analyze", "aggregate", "rollup", "pivot", "report", "metrics", "GROUP BY", "query history", "past queries", "query runs", "stored results", "materialize", "chain", "intermediate table", or sorted indexes for filters/range scans. Do not load for BM25/vector search or geospatial SQL — use hotdata-search or hotdata-geospatial. Requires the core hotdata skill for connections, tables, datasets, and auth. +description: Use this skill when the user wants OLAP-style SQL analytics in Hotdata — aggregations, GROUP BY, JOINs, reporting, exploratory queries, query run history, stored results, or materialized follow-up tables (Chain via views or managed databases). Activate for "analyze", "aggregate", "rollup", "pivot", "report", "metrics", "GROUP BY", "query history", "past queries", "query runs", "stored results", "materialize", "chain", "intermediate table", or sorted indexes for filters/range scans. Do not load for BM25/vector search or geospatial SQL — use hotdata-search or hotdata-geospatial. Requires the core hotdata skill for connections, tables, views, and auth. version: 0.3.2 --- @@ -8,7 +8,7 @@ version: 0.3.2 **OLAP-style analytics** in Hotdata: PostgreSQL-dialect SQL, query execution, run history, stored results, **Chain** materializations, and **sorted** indexes for filters and joins. -**Prerequisites:** Authenticate, workspace, and catalog discovery via the **`hotdata`** skill (`connections`, `tables`, `datasets`, `databases`). +**Prerequisites:** Authenticate, workspace, and catalog discovery via the **`hotdata`** skill (`connections`, `tables`, `views`, `databases`). **Related skills:** **`hotdata-search`** (BM25, vector, retrieval indexes), **`hotdata-geospatial`** (spatial SQL). @@ -23,7 +23,7 @@ hotdata query status [--output table|json|csv] - **PostgreSQL dialect.** Quote mixed-case identifiers: `"CustomerName"`. - Use **`hotdata tables list`** for schema discovery — not `information_schema` via `query`. -- Fully qualified names: `..
`, `datasets..
`, `..
`. +- Fully qualified names: `..
`, `views..
`, `..
`. - Long-running queries may return `query_run_id` → poll with **`query status`** (exit `2` = still running). Do not re-run identical heavy SQL while polling. - For **workspace-wide** joins and naming, load **context:DATAMODEL** when listed (`hotdata context list` → `show DATAMODEL`) — see **`hotdata`** skill. @@ -82,8 +82,8 @@ hotdata results [--workspace-id ] [--output table|json 2. **Materialize** (pick one) ```bash - hotdata datasets create --name chain_slice [--description "chain slice"] --sql "SELECT ..." - hotdata datasets create --name chain_from_saved [--description "from saved"] --query-id + hotdata views create --name chain_slice --description "chain slice" --sql "SELECT ..." + hotdata views create --name chain_from_saved --description "from saved" --query-id ``` Or managed parquet: @@ -94,10 +94,10 @@ hotdata results [--workspace-id ] [--output table|json hotdata databases tables load slice --file ./slice.parquet ``` -3. **Chain query** — use printed **`full_name`** or `datasets list` **FULL NAME** column: +3. **Chain query** — use printed **`full_name`** or `views list` **FULL NAME** column: ```bash - hotdata query "SELECT * FROM datasets.main.chain_slice WHERE ..." + hotdata query "SELECT * FROM views.main.chain_slice WHERE ..." hotdata query "SELECT * FROM analytics.public.slice WHERE ..." ``` @@ -122,4 +122,4 @@ List and delete use the same `hotdata indexes` commands as in the search skill; ## Sandboxes and chains -Sandbox datasets use **`datasets..
`**, not `datasets.main`. Run queries with active sandbox config or `hotdata sandbox run hotdata query "..."`. See **`hotdata`** skill **Sandboxes**. +Sandbox views use **`views..
`**, not `views.main`. Run queries with active sandbox config or `hotdata sandbox run hotdata query "..."`. See **`hotdata`** skill **Sandboxes**. diff --git a/skills/hotdata-analytics/references/WORKFLOWS.md b/skills/hotdata-analytics/references/WORKFLOWS.md index 0a11385..affeffe 100644 --- a/skills/hotdata-analytics/references/WORKFLOWS.md +++ b/skills/hotdata-analytics/references/WORKFLOWS.md @@ -2,7 +2,7 @@ OLAP-style SQL, **History** (query runs and stored results), and **Chain** (materialized follow-ups). Requires **`hotdata`** for auth, workspaces, and catalog commands. -**Related:** **`hotdata-search`** for BM25/vector indexes and `hotdata search`; **`hotdata`** [WORKFLOWS.md](../../hotdata/references/WORKFLOWS.md) for datasets vs managed databases. +**Related:** **`hotdata-search`** for BM25/vector indexes and `hotdata search`; **`hotdata`** [WORKFLOWS.md](../../hotdata/references/WORKFLOWS.md) for views vs managed databases. --- @@ -66,11 +66,11 @@ hotdata query "SELECT ..." Land a smaller table — pick one: -**Datasets** (CSV/JSON/URL/SQL snapshot → `datasets..
`): +**Views** (SQL snapshot → `views..
`): ```bash -hotdata datasets create --label "chain revenue slice" --sql "SELECT ..." [--table-name chain_revenue_slice] -hotdata datasets create --label "from saved" --query-id [--table-name ...] +hotdata views create --name chain_revenue_slice --description "chain revenue slice" --sql "SELECT ..." +hotdata views create --name chain_from_saved --description "from saved" --query-id ``` **Managed database** (parquet → `..
`): @@ -80,17 +80,17 @@ hotdata databases create --name chain_db --table revenue_slice hotdata databases tables load chain_db revenue_slice --file ./revenue_slice.parquet ``` -Note the printed **`full_name`** (e.g. `datasets.main.chain_revenue_slice` or `chain_db.public.revenue_slice`). For datasets, **`FULL NAME`** from `datasets list` is authoritative. +Note the printed **`full_name`** (e.g. `views.main.chain_revenue_slice` or `chain_db.public.revenue_slice`). For views, **`FULL NAME`** from `views list` is authoritative. ### 3. Chain query -Query using that name — do not hardcode `datasets.main` if the schema segment is a sandbox id: +Query using that name — do not hardcode `views.main` if the schema segment is a sandbox id: ```bash -hotdata datasets list -hotdata query "SELECT * FROM datasets.main.chain_revenue_slice WHERE ..." +hotdata views list +hotdata query "SELECT * FROM views.main.chain_revenue_slice WHERE ..." # Sandbox example (use actual full_name from create or list): -# hotdata query "SELECT * FROM datasets.s_ufmblmvq.chain_revenue_slice WHERE ..." +# hotdata query "SELECT * FROM views.s_ufmblmvq.chain_revenue_slice WHERE ..." # Managed database: # hotdata query "SELECT * FROM chain_db.public.revenue_slice WHERE ..." ``` @@ -99,18 +99,18 @@ hotdata query "SELECT * FROM datasets.main.chain_revenue_slice WHERE ..." For **sandbox-scoped** chain tables: -- Qualified name is **`datasets..
`**, not `datasets.main`. +- Qualified name is **`views..
`**, not `views.main`. - Run queries with **active sandbox** in config (`hotdata sandbox set`) **or** inside **`hotdata sandbox run hotdata query "…"`**. - Without sandbox context, you may get **access denied** on sandbox-only tables. ### Naming and documentation - Prefer predictable `--table-name` values: `chain__`. -- Record long-lived chains in **context:DATAMODEL → Derived tables (Chain)** with the **full** SQL name you use (`datasets.…` or `database.schema.table`). +- Record long-lived chains in **context:DATAMODEL → Derived tables (Chain)** with the **full** SQL name you use (`views.…` or `database.schema.table`). - Promote join/grain findings to **context:DATAMODEL** when they should outlive the sandbox (**`hotdata`** skill). ### Guardrails - Materialize when the base scan is large and the follow-up runs many times. - Keep Chain tables focused; avoid wide `SELECT *` materializations when a narrow projection suffices. -- For upload format choice (datasets vs databases), see **`hotdata`** WORKFLOWS — [Datasets vs managed databases](../../hotdata/references/WORKFLOWS.md#datasets-vs-managed-databases). +- For source format choice (views vs databases), see **`hotdata`** WORKFLOWS — [Views vs managed databases](../../hotdata/references/WORKFLOWS.md#views-vs-managed-databases). diff --git a/skills/hotdata/SKILL.md b/skills/hotdata/SKILL.md index ef45914..0bec6df 100644 --- a/skills/hotdata/SKILL.md +++ b/skills/hotdata/SKILL.md @@ -1,6 +1,6 @@ --- name: hotdata -description: Use this skill when the user wants to run core hotdata CLI commands — auth, workspaces, connections, managed databases, datasets, tables, basic SQL query, sandboxes, database context (context:DATAMODEL), jobs, and skill install. Activate for "run hotdata", "list workspaces", "list connections", "create a connection", "list databases", "managed database", "load parquet", "list tables", "list datasets", "create a dataset", "execute a query", "list sandboxes", "database context", "context:DATAMODEL", or general Hotdata CLI usage. For full-text/vector search and retrieval indexes use hotdata-search; for OLAP analytics, query history, stored results, and Chain materializations use hotdata-analytics; for geospatial/GIS use hotdata-geospatial. +description: Use this skill when the user wants to run core hotdata CLI commands — auth, workspaces, connections, managed databases, views, tables, basic SQL query, sandboxes, database context (context:DATAMODEL), jobs, and skill install. Activate for "run hotdata", "list workspaces", "list connections", "create a connection", "list databases", "managed database", "load parquet", "list tables", "list views", "create a view", "execute a query", "list sandboxes", "database context", "context:DATAMODEL", or general Hotdata CLI usage. For full-text/vector search and retrieval indexes use hotdata-search; for OLAP analytics, query history, stored results, and Chain materializations use hotdata-analytics; for geospatial/GIS use hotdata-geospatial. version: 0.3.2 --- @@ -20,7 +20,7 @@ Install all skills with **`hotdata skills install`**. Load specialized skills on | Skill | Use for | |-------|---------| -| **`hotdata`** (this file) | Auth, workspaces, connections, databases, datasets, tables, basic `query`, context, sandboxes, jobs | +| **`hotdata`** (this file) | Auth, workspaces, connections, databases, views, tables, basic `query`, context, sandboxes, jobs | | **`hotdata-search`** | BM25, vector search, `hotdata search`, bm25/vector indexes, embedding providers | | **`hotdata-analytics`** | OLAP SQL, aggregations, query/results history, Chain materializations, sorted indexes | | **`hotdata-geospatial`** | PostGIS-style `ST_*`, WKB, spatial joins | @@ -82,15 +82,15 @@ Use [references/DATA_MODEL.template.md](references/DATA_MODEL.template.md) and [ These are **patterns** built from the commands below—not separate CLI subcommands: -- **Model (`context:DATAMODEL`)** — The **shared** Markdown semantic map of the active database (entities, keys, joins across connections). **Store and read it only via database context** (`hotdata context list`, then `hotdata context show DATAMODEL` **only when listed**, `context push DATAMODEL`); refresh using `connections`, `connections refresh`, `tables list`, and `datasets list`. For a **deep** pass (connector enrichment, indexes, per-table detail), see [references/MODEL_BUILD.md](references/MODEL_BUILD.md). Contrast **analysis modeling** in sandboxes or chat (see [Analysis modeling vs context:DATAMODEL](#analysis-modeling-vs-contextdatamodel)). +- **Model (`context:DATAMODEL`)** — The **shared** Markdown semantic map of the active database (entities, keys, joins across connections). **Store and read it only via database context** (`hotdata context list`, then `hotdata context show DATAMODEL` **only when listed**, `context push DATAMODEL`); refresh using `connections`, `connections refresh`, `tables list`, and `views list`. For a **deep** pass (connector enrichment, indexes, per-table detail), see [references/MODEL_BUILD.md](references/MODEL_BUILD.md). Contrast **analysis modeling** in sandboxes or chat (see [Analysis modeling vs context:DATAMODEL](#analysis-modeling-vs-contextdatamodel)). - **History / Chain / OLAP SQL** — See **`hotdata-analytics`** and [references/WORKFLOWS.md](references/WORKFLOWS.md). - **Search / retrieval indexes** — See **`hotdata-search`**. -Catalog, skill decision tree, epic flows (onboard, chain, retrieval), datasets vs databases, and sandbox procedures: [references/WORKFLOWS.md](references/WORKFLOWS.md). +Catalog, skill decision tree, epic flows (onboard, chain, retrieval), views vs databases, and sandbox procedures: [references/WORKFLOWS.md](references/WORKFLOWS.md). ## Available Commands -Top-level subcommands (each detailed below): **`auth`**, **`datasets`**, **`query`**, **`workspaces`**, **`connections`**, **`databases`**, **`tables`**, **`skills`**, **`results`**, **`jobs`**, **`indexes`**, **`embedding-providers`**, **`search`**, **`queries`**, **`sandbox`**, **`context`**, **`completions`**. Search, indexes (bm25/vector), and embedding providers are documented in **`hotdata-search`**; query history, results, Chain, and OLAP patterns in **`hotdata-analytics`**. +Top-level subcommands (each detailed below): **`auth`**, **`views`**, **`query`**, **`workspaces`**, **`connections`**, **`databases`**, **`tables`**, **`skills`**, **`results`**, **`jobs`**, **`indexes`**, **`embedding-providers`**, **`search`**, **`queries`**, **`sandbox`**, **`context`**, **`completions`**. Search, indexes (bm25/vector), and embedding providers are documented in **`hotdata-search`**; query history, results, Chain, and OLAP patterns in **`hotdata-analytics`**. Global CLI options: **`--api-key`**, **`-v` / `--version`**, **`-h` / `--help`**. Hidden developer flag: **`--debug`** (verbose HTTP logs). @@ -181,7 +181,7 @@ hotdata connections create \ **Managed databases** are Hotdata-owned catalogs you create and populate yourself — no remote source to sync. Query them in SQL as **`..
`**. Prefer **`hotdata databases`** for this workflow. -**Parquet vs datasets:** `databases tables load` accepts **parquet only**. For SQL-query or saved-query materializations, use **`hotdata datasets create`**. +**Parquet vs views:** `databases tables load` accepts **parquet only**. For SQL-query or saved-query materializations, use **`hotdata views create`**. **Active database:** `hotdata databases set ` saves the active database to config. All `databases tables` subcommands and all `context` commands default to the active database; pass **`--database `** to override per-command. @@ -231,63 +231,62 @@ hotdata tables list [--workspace-id ] [--connection-id ] [--limit ] [--offset ] [--output table|json|yaml] +hotdata views list [--workspace-id ] [--limit ] [--offset ] [--output table|json|yaml] ``` - Default format is `table`. -- Returns `id`, `label`, and `created_at`; table output includes a **`FULL NAME`** column (`datasets..
`). +- Returns `id`, `label`, and `created_at`; table output includes a **`FULL NAME`** column (`views..
`). - Results are paginated (default 100). Use `--offset` to fetch further pages. -- **There is no filter for “this sandbox only.”** `datasets list` always returns **all** datasets in the workspace. To tell sandbox-scoped datasets from workspace-wide ones, read **`FULL NAME`**: the middle segment is the sandbox id (e.g. `datasets.s_ufmblmvq.tac_csat`) for sandbox data, and usually **`main`** (e.g. `datasets.main.my_table`) for ordinary uploads. +- **There is no filter for “this sandbox only.”** `views list` always returns **all** views in the workspace. To tell sandbox-scoped views from workspace-wide ones, read **`FULL NAME`**: the middle segment is the sandbox id (e.g. `views.s_ufmblmvq.tac_csat`) for sandbox data, and usually **`main`** (e.g. `views.main.my_table`) for ordinary views. -#### Get dataset details +#### Get view details ``` -hotdata datasets [--workspace-id ] [--output table|json|yaml] +hotdata views [--workspace-id ] [--output table|json|yaml] ``` -- Shows dataset metadata and a full column listing with `name`, `data_type`, `nullable`. +- Shows view metadata and a full column listing with `name`, `data_type`, `nullable`. - Use this to inspect schema before querying. -- For the **qualified SQL name**, prefer **`FULL NAME` from `datasets list`** or the **`full_name` printed by `datasets create`**—especially for sandbox datasets, where the schema is **`datasets.`**, not `datasets.main`. +- For the **qualified SQL name**, prefer **`FULL NAME` from `views list`** or the **`full_name` printed by `views create`**—especially for sandbox views, where the schema is **`views.`**, not `views.main`. -#### Update a dataset +#### Update a view ``` -hotdata datasets update [--description
` and active sandbox or `hotdata sandbox run …` +5. [ ] (Sandbox) Use `views..
` and active sandbox or `hotdata sandbox run …` 6. [ ] Record stable chains in **context:DATAMODEL** when they should outlive the session **Detail:** [hotdata-analytics WORKFLOWS — Chain](../../hotdata-analytics/references/WORKFLOWS.md#chain) @@ -80,38 +80,37 @@ End-to-end checklists. Use the linked sections for command detail and guardrails --- -## Datasets vs managed databases +## Views vs managed databases -Both land queryable tables in the workspace; the path depends on **format** and **how you want to name tables in SQL**. +Both land queryable tables in the workspace; the path depends on **source** and **how you want to name tables in SQL**. -| | **Datasets** | **Managed databases** | -|---|-------------|------------------------| -| **Best for** | CSV, JSON, URL import, stdin, SQL/query snapshot | Parquet files you own; catalog-style `name.schema.table` | -| **SQL prefix** | `datasets..
` (often `datasets.main.*`) | `..
` (database = connection name) | -| **CLI** | `hotdata datasets create` | `hotdata databases create` + `databases tables load` | +| | **Views** | **Managed databases** | +|---|-----------|------------------------| +| **Best for** | SQL/query snapshot | Parquet files you own; catalog-style `name.schema.table` | +| **SQL prefix** | `views..
` (often `views.main.*`) | `..
` (database = connection name) | +| **CLI** | `hotdata views create` | `hotdata databases create` + `databases tables load` | | **Declare schema up front** | No | Yes — `--table` on create (required before load on current API) | -| **Parquet** | Yes (`--file`, `--url`, `--upload-id`) | **Only** parquet on `tables load` | -| **Refresh upstream** | `datasets refresh` (URL/query sources) | Replace via `tables load` again | +| **Parquet** | No | **Only** parquet on `tables load` | +| **Refresh upstream** | `views refresh` (query sources) | Replace via `tables load` again | -**Rule of thumb:** CSV/JSON or “upload a file from a URL” → **datasets**. Parquet catalog you control as **`mydb.public.orders`** → **databases**. +**Rule of thumb:** SQL-query snapshot → **views**. Parquet catalog you control as **`mydb.public.orders`** → **databases**. -### Workflow: dataset upload and query +### Workflow: view creation and query 1. Authenticate and set workspace (`hotdata auth`, `hotdata workspaces set` if needed). -2. Create the dataset (one source): +2. Create the view: ```bash - hotdata datasets create --label "Orders" --file ./orders.csv - # or: --url "https://example.com/orders.parquet" - # or: --sql "SELECT ..." # materialize from a query + hotdata views create --name orders --sql “SELECT ...” + # or: --query-id # materialize from a saved query ``` -3. Note the printed **`full_name`** (e.g. `datasets.main.orders`) — do not assume `datasets.main`. -4. Inspect if needed: `hotdata datasets list`, `hotdata datasets `. +3. Note the printed **`full_name`** (e.g. `views.main.orders`) — do not assume `views.main`. +4. Inspect if needed: `hotdata views list`, `hotdata views `. 5. Query: ```bash - hotdata query "SELECT count(*) FROM datasets.main.orders" + hotdata query “SELECT count(*) FROM views.main.orders” ``` ### Workflow: managed database (parquet) @@ -137,7 +136,7 @@ Both land queryable tables in the workspace; the path depends on **format** and hotdata query "SELECT count(*) FROM sales.public.orders" ``` -For **Chain** materializations into datasets or databases, see **`hotdata-analytics`**. +For **Chain** materializations into views or databases, see **`hotdata-analytics`**. --- @@ -165,8 +164,8 @@ hotdata connections list hotdata connections refresh # after DDL / stale remote metadata hotdata tables list hotdata tables list --connection-id -hotdata datasets list -hotdata datasets +hotdata views list +hotdata views hotdata databases list ``` @@ -174,24 +173,24 @@ Use `hotdata tables list` for discovery; do not query `information_schema` for t --- -## Sandboxes and datasets +## Sandboxes and views -Use this when work is isolated in a **sandbox** (exploratory runs, ephemeral datasets). +Use this when work is isolated in a **sandbox** (exploratory runs, ephemeral views). -**Active sandbox vs `sandbox run`:** After `sandbox new` or `sandbox set`, run **`datasets create`**, **`query`**, etc. **directly**. **`sandbox run `** (no id before `run`) **always creates a new sandbox**. +**Active sandbox vs `sandbox run`:** After `sandbox new` or `sandbox set`, run **`views create`**, **`query`**, etc. **directly**. **`sandbox run `** (no id before `run`) **always creates a new sandbox**. -**Qualified names:** Workspace datasets → **`datasets.main.
`**. Sandbox datasets → **`datasets..
`**. Use **`full_name`** from create or **FULL NAME** from `datasets list`. +**Qualified names:** Workspace views → **`views.main.
`**. Sandbox views → **`views..
`**. Use **`full_name`** from create or **FULL NAME** from `views list`. **Access:** Sandbox-only tables need active sandbox config or **`hotdata sandbox run …`**. **SQL:** Quote mixed-case columns with double quotes. -**Listing:** `datasets list` returns all workspace datasets; use **FULL NAME** to spot sandbox vs `main` rows. +**Listing:** `views list` returns all workspace views; use **FULL NAME** to spot sandbox vs `main` rows. --- ## Cross-cutting - **Workspace:** Active workspace or `--workspace-id`. **`hotdata queries`** uses the active workspace only (no `--workspace-id`). -- **Jobs:** `hotdata jobs list` / `jobs ` for async refreshes, dataset refresh, and index builds. +- **Jobs:** `hotdata jobs list` / `jobs ` for async refreshes, view refresh, and index builds. - **Discovery:** `hotdata tables list` — not `query` on `information_schema`. diff --git a/src/command.rs b/src/command.rs index 8afc01e..97b7971 100644 --- a/src/command.rs +++ b/src/command.rs @@ -56,6 +56,23 @@ pub enum Commands { command: Option, }, + /// SQL-derived views materialized from queries or saved queries + Views { + /// View ID to show details + id: Option, + + /// Workspace ID (defaults to first workspace from login) + #[arg(long, short = 'w', global = true)] + workspace_id: Option, + + /// Output format (used with view ID) + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + + #[command(subcommand)] + command: Option, + }, + /// Managed databases you create and populate with tables (parquet uploads) Databases { /// Database id or description (omit to use a subcommand) @@ -456,6 +473,75 @@ pub enum ConnectionsCreateCommands { }, } +#[derive(Subcommand)] +pub enum ViewsCommands { + /// List all views in a workspace + List { + /// Maximum number of results (default: 100, max: 1000) + #[arg(long)] + limit: Option, + + /// Pagination offset + #[arg(long)] + offset: Option, + + /// Output format + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + }, + + /// Create a view from a SQL query or saved query + Create { + /// SQL table name the view is addressable as (e.g. my_view) + #[arg(long)] + name: String, + + /// Human-readable display label + #[arg(long)] + description: Option, + + /// SQL query to create the view from + #[arg(long, conflicts_with = "query_id", required_unless_present = "query_id")] + sql: Option, + + /// Saved query ID to create the view from + #[arg(long, conflicts_with = "sql", required_unless_present = "sql")] + query_id: Option, + + /// Output format + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + }, + + /// Update a view's description and/or name + Update { + /// View ID + id: String, + + /// New display label + #[arg(long)] + description: Option, + + /// New SQL table name (must be a valid identifier) + #[arg(long)] + name: Option, + + /// Output format + #[arg(long = "output", short = 'o', default_value = "table", value_parser = ["table", "json", "yaml"])] + output: String, + }, + + /// Refresh a view by re-running its source query and creating a new version + Refresh { + /// View ID + id: String, + + /// Submit as a background job + #[arg(long)] + r#async: bool, + }, +} + #[derive(Subcommand)] pub enum DatabasesCommands { /// List managed databases in the workspace diff --git a/src/main.rs b/src/main.rs index 669deb9..aca7472 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,7 @@ mod table; mod tables; mod update; mod util; +mod views; mod workspace; use anstyle::AnsiColor; @@ -28,7 +29,7 @@ use command::{ AuthCommands, Commands, ConnectionsCommands, ConnectionsCreateCommands, ContextCommands, DatabaseTablesCommands, DatabasesCommands, EmbeddingProvidersCommands, IndexesCommands, JobsCommands, QueriesCommands, QueryCommands, ResultsCommands, - SandboxCommands, SkillCommands, TablesCommands, WorkspaceCommands, + SandboxCommands, SkillCommands, TablesCommands, ViewsCommands, WorkspaceCommands, }; #[derive(Parser)] @@ -314,6 +315,74 @@ fn main() { } } } + Commands::Views { + id, + workspace_id, + output, + command, + } => { + let workspace_id = resolve_workspace(workspace_id); + if let Some(id) = id { + views::get(&id, &workspace_id, &output) + } else { + match command { + Some(ViewsCommands::List { + limit, + offset, + output, + }) => views::list(&workspace_id, limit, offset, &output), + Some(ViewsCommands::Create { + name, + description, + sql, + query_id, + output, + }) => { + if let Some(sql) = sql { + views::create_from_query( + &workspace_id, + &sql, + description.as_deref(), + &name, + &output, + ) + } else { + views::create_from_saved_query( + &workspace_id, + query_id.as_deref().unwrap_or_else(|| unreachable!("clap enforces --sql or --query-id")), + description.as_deref(), + &name, + &output, + ) + } + } + Some(ViewsCommands::Update { + id, + description, + name, + output, + }) => views::update( + &id, + &workspace_id, + description.as_deref(), + name.as_deref(), + &output, + ), + Some(ViewsCommands::Refresh { id, r#async }) => { + views::refresh(&workspace_id, &id, r#async) + } + None => { + use clap::CommandFactory; + let mut cmd = Cli::command(); + cmd.build(); + cmd.find_subcommand_mut("views") + .unwrap() + .print_help() + .unwrap(); + } + } + } + } Commands::Databases { name_or_id, workspace_id, diff --git a/src/views.rs b/src/views.rs new file mode 100644 index 0000000..e1e28b0 --- /dev/null +++ b/src/views.rs @@ -0,0 +1,382 @@ +use crate::api::ApiClient; +use serde::{Deserialize, Serialize}; +use serde_json::json; + +#[derive(Deserialize, Serialize)] +struct View { + id: String, + label: String, + #[serde(default = "default_schema")] + schema_name: String, + table_name: String, + created_at: String, + updated_at: String, +} + +fn default_schema() -> String { + "main".to_string() +} + +#[derive(Deserialize, Serialize)] +struct CreateResponse { + id: String, + label: String, + #[serde(default = "default_schema")] + schema_name: String, + table_name: String, +} + +#[derive(Deserialize)] +struct ListResponse { + #[serde(rename = "datasets")] + views: Vec, + count: u64, + has_more: bool, +} + +#[derive(Deserialize, Serialize)] +struct Column { + name: String, + data_type: String, + nullable: bool, +} + +#[derive(Deserialize, Serialize)] +struct ViewDetail { + id: String, + label: String, + schema_name: String, + table_name: String, + source_type: String, + created_at: String, + updated_at: String, + columns: Vec, +} + +#[derive(Deserialize, Serialize)] +struct UpdateResponse { + id: String, + label: String, + // Not currently in runtimedb's UpdateDatasetResponse (see runtimedb/src/http/models.rs). + // Kept Optional so we print `full_name` only when the server actually returns the schema. + // Synthesizing "main" is wrong for sandbox-scoped views where + // schema_name == sandbox_id. + #[serde(default)] + schema_name: Option, + table_name: String, + #[serde(default)] + latest_version: Option, + #[serde(default)] + pinned_version: Option, + updated_at: String, +} + +fn create_view( + api: &ApiClient, + description: Option<&str>, + name: &str, + source: serde_json::Value, + format: &str, +) { + let label = description.unwrap_or(name); + let body = json!({ "table_name": name, "label": label, "source": source }); + + let (status, resp_body) = api.post_raw("/datasets", &body); + + if !status.is_success() { + use crossterm::style::Stylize; + eprintln!("{}", crate::util::api_error(resp_body).red()); + std::process::exit(1); + } + + let view: CreateResponse = match serde_json::from_str(&resp_body) { + Ok(v) => v, + Err(e) => { + eprintln!("error parsing response: {e}"); + std::process::exit(1); + } + }; + + use crossterm::style::Stylize; + match format { + "json" => println!("{}", serde_json::to_string_pretty(&view).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&view).unwrap()), + "table" => { + eprintln!("{}", "View created".green()); + println!("id: {}", view.id); + println!("label: {}", view.label); + println!( + "full_name: views.{}.{}", + view.schema_name, view.table_name + ); + } + _ => unreachable!(), + } +} + +pub fn create_from_query(workspace_id: &str, sql: &str, description: Option<&str>, name: &str, format: &str) { + let api = ApiClient::new(Some(workspace_id)); + create_view(&api, description, name, json!({ "type": "sql_query", "sql": sql }), format); +} + +pub fn create_from_saved_query( + workspace_id: &str, + query_id: &str, + description: Option<&str>, + name: &str, + format: &str, +) { + let api = ApiClient::new(Some(workspace_id)); + create_view(&api, description, name, json!({ "type": "saved_query", "saved_query_id": query_id }), format); +} + +pub fn list(workspace_id: &str, limit: Option, offset: Option, format: &str) { + let api = ApiClient::new(Some(workspace_id)); + + let params = [ + ("limit", limit.map(|l| l.to_string())), + ("offset", offset.map(|o| o.to_string())), + ]; + let body: ListResponse = api.get_with_params("/datasets", ¶ms); + + match format { + "json" => println!("{}", serde_json::to_string_pretty(&body.views).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&body.views).unwrap()), + "table" => { + if body.views.is_empty() { + use crossterm::style::Stylize; + eprintln!("{}", "No views found.".dark_grey()); + } else { + let rows: Vec> = body + .views + .iter() + .map(|v| { + vec![ + v.id.clone(), + v.label.clone(), + format!("views.{}.{}", v.schema_name, v.table_name), + crate::util::format_date(&v.created_at), + ] + }) + .collect(); + crate::table::print(&["ID", "LABEL", "FULL NAME", "CREATED AT"], &rows); + } + if body.has_more { + let next = offset.unwrap_or(0) + body.count as u32; + use crossterm::style::Stylize; + eprintln!( + "{}", + format!( + "showing {} results — use --offset {next} for more", + body.count + ) + .dark_grey() + ); + } + } + _ => unreachable!(), + } +} + +pub fn get(view_id: &str, workspace_id: &str, format: &str) { + let api = ApiClient::new(Some(workspace_id)); + + let v: ViewDetail = api.get(&format!("/datasets/{view_id}")); + + match format { + "json" => println!("{}", serde_json::to_string_pretty(&v).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&v).unwrap()), + "table" => { + let created_at = crate::util::format_date(&v.created_at); + let updated_at = crate::util::format_date(&v.updated_at); + println!("id: {}", v.id); + println!("label: {}", v.label); + println!("full_name: views.main.{}", v.table_name); + println!("source_type: {}", v.source_type); + println!("created_at: {created_at}"); + println!("updated_at: {updated_at}"); + if !v.columns.is_empty() { + println!(); + let rows: Vec> = v + .columns + .iter() + .map(|col| { + vec![ + col.name.clone(), + col.data_type.clone(), + col.nullable.to_string(), + ] + }) + .collect(); + crate::table::print(&["COLUMN", "DATA TYPE", "NULLABLE"], &rows); + } + } + _ => unreachable!(), + } +} + +pub fn update( + view_id: &str, + workspace_id: &str, + description: Option<&str>, + name: Option<&str>, + format: &str, +) { + if description.is_none() && name.is_none() { + eprintln!("error: provide at least one of --description or --name."); + std::process::exit(1); + } + + let api = ApiClient::new(Some(workspace_id)); + + let mut body = json!({}); + if let Some(d) = description { + body["label"] = json!(d); + } + if let Some(n) = name { + body["table_name"] = json!(n); + } + + let v: UpdateResponse = api.put(&format!("/datasets/{view_id}"), &body); + + use crossterm::style::Stylize; + eprintln!("{}", "View updated".green()); + match format { + "json" => println!("{}", serde_json::to_string_pretty(&v).unwrap()), + "yaml" => print!("{}", serde_yaml::to_string(&v).unwrap()), + "table" => { + println!("id: {}", v.id); + println!("label: {}", v.label); + match &v.schema_name { + Some(schema) => { + println!("full_name: views.{}.{}", schema, v.table_name); + } + None => { + println!("table_name: {}", v.table_name); + eprintln!( + "{}", + format!( + "(run `hotdata views {}` to see the qualified name)", + v.id + ) + .dark_grey() + ); + } + } + println!("updated_at: {}", crate::util::format_date(&v.updated_at)); + } + _ => unreachable!(), + } +} + +pub fn refresh(workspace_id: &str, view_id: &str, async_mode: bool) { + use crossterm::style::Stylize; + + let mut body = json!({ + "dataset_id": view_id, + }); + if async_mode { + body["async"] = json!(true); + } + + let api = ApiClient::new(Some(workspace_id)); + let (status, resp_body) = api.post_raw("/refresh", &body); + + if !status.is_success() { + eprintln!("{}", crate::util::api_error(resp_body).red()); + std::process::exit(1); + } + + let parsed: serde_json::Value = serde_json::from_str(&resp_body).unwrap_or_default(); + + if async_mode { + let job_id = parsed["id"].as_str().unwrap_or("unknown"); + println!("{}", "View refresh submitted.".green()); + println!("job_id: {}", job_id); + println!( + "{}", + format!("Use 'hotdata jobs {}' to check status.", job_id).dark_grey() + ); + return; + } + + let id = parsed["id"].as_str().unwrap_or("unknown"); + let version = parsed["version"].as_i64().unwrap_or(0); + let view_status = parsed["status"].as_str().unwrap_or(""); + println!("{}", "View refresh completed.".green()); + println!( + "{}", + format!(" id: {id}, version: {version}, status: {view_status}").dark_grey() + ); +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Mirrors runtimedb's `UpdateDatasetResponse` (see runtimedb/src/http/models.rs). + /// The CLI must deserialize this exact shape — schema_name, source_type, + /// created_at, and columns are NOT in the response. If runtimedb's response + /// gains or loses fields, update this fixture in lockstep. + #[test] + fn update_response_deserializes_runtimedb_payload() { + let body = serde_json::json!({ + "id": "ds_abc123", + "label": "url_test", + "table_name": "url_test", + "latest_version": 3, + "updated_at": "2026-04-28T18:30:00Z", + }); + let resp: UpdateResponse = serde_json::from_value(body).unwrap(); + assert_eq!(resp.id, "ds_abc123"); + assert_eq!(resp.label, "url_test"); + assert_eq!(resp.table_name, "url_test"); + // The server doesn't currently send schema_name, so we don't synthesize + // one — sandbox-scoped views live under views..
, + // not views.main.*, and a fabricated "main" would mislead users. + assert!(resp.schema_name.is_none()); + assert_eq!(resp.latest_version, Some(3)); + assert!(resp.pinned_version.is_none()); + } + + #[test] + fn update_response_uses_schema_name_when_server_supplies_it() { + // Forward-compat: if runtimedb later includes schema_name, we use it. + let body = serde_json::json!({ + "id": "ds_abc123", + "label": "x", + "schema_name": "sandbox_xyz", + "table_name": "x", + "updated_at": "2026-04-28T18:30:00Z", + }); + let resp: UpdateResponse = serde_json::from_value(body).unwrap(); + assert_eq!(resp.schema_name.as_deref(), Some("sandbox_xyz")); + } + + #[test] + fn update_response_handles_pinned_version() { + let body = serde_json::json!({ + "id": "ds_abc123", + "label": "x", + "table_name": "x", + "latest_version": 5, + "pinned_version": 2, + "updated_at": "2026-04-28T18:30:00Z", + }); + let resp: UpdateResponse = serde_json::from_value(body).unwrap(); + assert_eq!(resp.pinned_version, Some(2)); + } + + #[test] + fn update_response_tolerates_missing_latest_version() { + // Defensive: treat latest_version as optional in case the server omits it. + let body = serde_json::json!({ + "id": "ds_abc123", + "label": "x", + "table_name": "x", + "updated_at": "2026-04-28T18:30:00Z", + }); + let resp: UpdateResponse = serde_json::from_value(body).unwrap(); + assert!(resp.latest_version.is_none()); + } +} From 74e6bf7ccfc3e28e603bf765cfff7550d44e02c1 Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 27 May 2026 17:35:44 -0700 Subject: [PATCH 3/9] feat(ux): replace dot-notation args with explicit --catalog/--schema/--table flags Removes the `connection.schema.table` and `airbnb.listings[col1,col2]` positional/dot-notation patterns across load, search, and indexes create. All three commands now use named flags for discoverability and consistency. Also adds --name (SQL catalog alias) to databases create, resolves databases by name instead of description, and updates list/create output accordingly. Deletes parse_db_target, parse_index_target, and resolve_connection_id which were only needed to destructure the old dot-notation strings. --- src/command.rs | 76 +++++++++----- src/connections.rs | 31 ------ src/databases.rs | 81 ++++++++++----- src/indexes.rs | 16 +-- src/main.rs | 252 ++++++++++++++------------------------------- 5 files changed, 189 insertions(+), 267 deletions(-) diff --git a/src/command.rs b/src/command.rs index 97b7971..3790461 100644 --- a/src/command.rs +++ b/src/command.rs @@ -173,8 +173,15 @@ pub enum Commands { #[arg(long, value_parser = ["vector", "bm25"])] r#type: Option, - /// Table to search (`connection.table` or `connection.schema.table`). - /// Schema defaults to `public` when omitted. + /// Catalog (database name) to search in. Defaults to the current database. + #[arg(long)] + catalog: Option, + + /// Schema to search in (default: public) + #[arg(long)] + schema: Option, + + /// Table to search #[arg(long)] table: String, @@ -328,28 +335,29 @@ pub enum IndexesCommands { output: String, }, - /// Create an index on a table or dataset. - /// - /// For connection-scoped indexes, pass the table and columns using bracket notation: - /// `connection.table[col1,col2]` or `connection.schema.table[col1,col2]` - /// (schema defaults to `public` when omitted) - /// - /// For dataset-scoped indexes, use `--dataset-id` with `--columns`. + /// Create an index on a table Create { - /// Table and columns to index: `connection.table[col1,col2]` - /// or `connection.schema.table[col1,col2]`. Schema defaults to `public`. - /// - /// Quote the argument to prevent shell glob expansion: - /// `hotdata indexes create 'airbnb.listings[description]' --type bm25` - #[arg(conflicts_with = "dataset_id")] - target: Option, + /// Catalog (database name) for the table to index. Defaults to the current database. + #[arg(long, conflicts_with = "dataset_id")] + catalog: Option, + + /// Schema for the table to index (default: public) + #[arg(long, conflicts_with = "dataset_id")] + schema: Option, + + /// Table name to index + #[arg(long = "table", conflicts_with = "dataset_id")] + table_name: Option, - /// Dataset ID (alternative scope to the positional target) - #[arg(long, conflicts_with = "target")] + /// Column to index + #[arg(long)] + column: Option, + + /// Dataset ID (alternative scope — use with --columns) + #[arg(long, conflicts_with_all = ["catalog", "table_name"])] dataset_id: Option, - /// Columns to index (comma-separated). Required with --dataset-id; - /// for connection scope use bracket notation in the target instead. + /// Columns to index (comma-separated). Required with --dataset-id. #[arg(long)] columns: Option, @@ -563,7 +571,13 @@ pub enum DatabasesCommands { /// Create a new managed database Create { - /// Optional display label (not unique, not an identifier — databases are addressed by id) + /// SQL catalog alias — becomes the catalog name in queries: + /// SELECT ... FROM .public.
. + /// Must be [a-z_][a-z0-9_]*, globally unique. + #[arg(long)] + name: Option, + + /// Optional display label #[arg(long)] description: Option, @@ -576,7 +590,8 @@ pub enum DatabasesCommands { tables: Vec, /// When the database expires. Accepts a relative duration (e.g. 24h, 7d, 90m) - /// or an RFC 3339 timestamp. Defaults to 24h when omitted. + /// or an RFC 3339 timestamp. Omitting with --name means no expiry; omitting + /// without --name defaults to 24h. #[arg(long)] expires_at: Option, @@ -597,11 +612,20 @@ pub enum DatabasesCommands { name_or_id: String, }, - /// Load a parquet file into a table using dot notation: `database.table` or `database.schema.table` + /// Load a parquet file into a managed database table Load { - /// Table to load into: `database.table` or `database.schema.table`. - /// Schema defaults to `public` when omitted. - target: String, + /// Table name to load into + #[arg(long, required = true)] + table: String, + + /// Catalog (database name) to load into. Defaults to the current database set via + /// `databases set`. Required when no current database is configured. + #[arg(long)] + catalog: Option, + + /// Schema to load into (default: public) + #[arg(long)] + schema: Option, /// Path to a local parquet file to upload and load #[arg(long, conflicts_with_all = ["upload_id", "url"])] diff --git a/src/connections.rs b/src/connections.rs index 135663f..3a5be6d 100644 --- a/src/connections.rs +++ b/src/connections.rs @@ -157,37 +157,6 @@ struct ListResponse { connections: Vec, } -/// Resolve a connection name or ID to a connection ID, exiting on failure. -/// -/// If `name_or_id` looks like a raw connection ID (starts with "conn"), tries -/// `GET /connections/{id}` directly first to avoid listing the full workspace. -/// Falls back to listing and matching by name on a 404 or when given a plain name. -pub fn resolve_connection_id(api: &ApiClient, name_or_id: &str) -> String { - use crossterm::style::Stylize; - - if name_or_id.starts_with("conn") { - let (status, _) = api.get_raw(&format!("/connections/{name_or_id}")); - if status.is_success() { - return name_or_id.to_string(); - } - } - - let body: ListResponse = api.get("/connections"); - match body - .connections - .iter() - .find(|c| c.id == name_or_id || c.name == name_or_id) - { - Some(conn) => conn.id.clone(), - None => { - eprintln!( - "{}", - format!("error: no connection named or with id '{name_or_id}'").red() - ); - std::process::exit(1); - } - } -} pub fn get(workspace_id: &str, connection_id: &str, format: &str) { let api = ApiClient::new(Some(workspace_id)); diff --git a/src/databases.rs b/src/databases.rs index d43497b..5feab59 100644 --- a/src/databases.rs +++ b/src/databases.rs @@ -9,6 +9,9 @@ const DEFAULT_SCHEMA: &str = "public"; #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)] struct DatabaseSummary { id: String, + #[serde(default)] + name: Option, + #[serde(default)] description: Option, } @@ -21,6 +24,9 @@ struct ListDatabasesResponse { #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)] pub struct Database { pub id: String, + #[serde(default)] + pub name: Option, + #[serde(default)] pub description: Option, pub default_connection_id: String, #[serde(default)] @@ -62,8 +68,13 @@ struct TableRow { #[derive(Deserialize, Serialize)] struct CreateDatabaseResponse { id: String, + #[serde(default)] + name: Option, + #[serde(default)] description: Option, default_connection_id: String, + #[serde(default)] + expires_at: Option, } #[derive(Deserialize)] @@ -90,21 +101,21 @@ pub fn try_resolve_database(api: &ApiClient, id_or_description: &str) -> Result< return Ok(db); } - // Fall back to listing and matching by description. + // Fall back to listing and matching by name. let body: ListDatabasesResponse = api.get("/databases"); - let desc_matches: Vec<&DatabaseSummary> = body + let name_matches: Vec<&DatabaseSummary> = body .databases .iter() - .filter(|d| d.description.as_deref() == Some(id_or_description)) + .filter(|d| d.name.as_deref() == Some(id_or_description)) .collect(); - match desc_matches.len() { + match name_matches.len() { 0 => Err(format!( - "no database with id or description '{id_or_description}'" + "no database with id or name '{id_or_description}'" )), - 1 => Ok(fetch_database(api, &desc_matches[0].id)), + 1 => Ok(fetch_database(api, &name_matches[0].id)), _ => Err(format!( - "multiple databases have description '{}' — use the database id instead", + "multiple databases have name '{}' — use the database id instead", id_or_description )), } @@ -127,6 +138,7 @@ fn schema_name(schema: Option<&str>) -> &str { /// Build the request body for `POST /v1/databases`. pub fn create_database_request( + name: Option<&str>, description: Option<&str>, schema: &str, tables: &[String], @@ -134,6 +146,13 @@ pub fn create_database_request( ) -> serde_json::Value { let mut req = serde_json::Map::new(); + if let Some(n) = name { + req.insert( + "name".to_string(), + serde_json::Value::String(n.to_string()), + ); + } + if let Some(desc) = description { req.insert( "description".to_string(), @@ -359,12 +378,13 @@ pub fn list(workspace_id: &str, format: &str) { .iter() .map(|d| { vec![ + d.name.as_deref().unwrap_or("-").to_string(), d.description.as_deref().unwrap_or("-").to_string(), d.id.clone(), ] }) .collect(); - crate::table::print(&["DESCRIPTION", "ID"], &rows); + crate::table::print(&["NAME", "DESCRIPTION", "ID"], &rows); } } _ => unreachable!(), @@ -419,6 +439,7 @@ pub fn get(workspace_id: &str, id_or_description: &str, format: &str) { pub fn create( workspace_id: &str, + name: Option<&str>, description: Option<&str>, schema: &str, tables: &[String], @@ -427,7 +448,7 @@ pub fn create( ) { use crossterm::style::Stylize; - let body = create_database_request(description, schema, tables, expires_at); + let body = create_database_request(name, description, schema, tables, expires_at); let api = ApiClient::new(Some(workspace_id)); let spinner = (format == "table").then(|| crate::util::spinner("Creating database...")); @@ -459,23 +480,29 @@ pub fn create( "yaml" => print!("{}", serde_yaml::to_string(&result).unwrap()), "table" => { println!("{}", "Database created".green()); + if let Some(n) = &result.name { + println!("name: {}", n.clone().cyan()); + } if let Some(desc) = &result.description { println!("description: {desc}"); } println!("id: {}", result.id); + if let Some(exp) = &result.expires_at { + println!("expires_at: {exp}"); + } + let catalog = result.name.as_deref().unwrap_or("default"); println!(); println!( "{}", format!( concat!( "Load a table:\n", - " hotdata databases load --file {}.\n", + " hotdata databases load --catalog {} --table --file \n", "\nQuery with:\n", - " hotdata query --database {} \"SELECT * FROM default.public.
LIMIT 10\"\n", - "\n Tip: use 'default..
' as the SQL prefix (not the database or connection id)\n", - " Column names are case-sensitive — wrap uppercase names in double quotes", + " hotdata query --database {} \"SELECT * FROM {}.public.
LIMIT 10\"\n", + "\n Tip: column names are case-sensitive — wrap uppercase names in double quotes", ), - result.id, result.id + catalog, result.id, catalog ) .dark_grey() ); @@ -682,13 +709,13 @@ mod tests { #[test] fn create_database_request_empty_without_description_or_tables() { - let req = create_database_request(None, "public", &[], None); + let req = create_database_request(None, None, "public", &[], None); assert_eq!(req, serde_json::json!({})); } #[test] fn create_database_request_includes_description() { - let req = create_database_request(Some("my db"), "public", &[], None); + let req = create_database_request(None, Some("my db"), "public", &[], None); assert_eq!(req["description"], "my db"); assert!(req.get("schemas").is_none()); } @@ -696,6 +723,7 @@ mod tests { #[test] fn create_database_request_includes_schemas_when_tables_declared() { let req = create_database_request( + None, Some("sales"), "public", &["orders".to_string(), "customers".to_string()], @@ -709,20 +737,20 @@ mod tests { #[test] fn create_database_request_schemas_without_description() { - let req = create_database_request(None, "analytics", &["events".to_string()], None); + let req = create_database_request(None, None, "analytics", &["events".to_string()], None); assert!(req.get("description").is_none()); assert_eq!(req["schemas"][0]["name"], "analytics"); } #[test] fn create_database_request_includes_expires_at_when_provided() { - let req = create_database_request(None, "public", &[], Some("24h")); + let req = create_database_request(None, None, "public", &[], Some("24h")); assert_eq!(req["expires_at"], "24h"); } #[test] fn create_database_request_omits_expires_at_when_none() { - let req = create_database_request(None, "public", &[], None); + let req = create_database_request(None, None, "public", &[], None); assert!(req.get("expires_at").is_none()); } @@ -751,7 +779,7 @@ mod tests { .mock("GET", "/databases") .with_status(200) .with_body( - r#"{"databases":[{"id":"db_abc","description":"sales"},{"id":"db_xyz","description":"warehouse"}]}"#, + r#"{"databases":[{"id":"db_abc","name":"sales"},{"id":"db_xyz","name":"warehouse"}]}"#, ) .create(); let detail = server @@ -789,24 +817,24 @@ mod tests { let api = ApiClient::test_new(&server.url(), "k", None); let err = try_resolve_database(&api, "missing").unwrap_err(); - assert!(err.contains("no database with id or description")); + assert!(err.contains("no database with id or name")); } #[test] - fn try_resolve_database_rejects_ambiguous_description() { + fn try_resolve_database_rejects_ambiguous_name() { let mut server = mockito::Server::new(); - // Direct id lookup returns 404 (description isn't a valid id) + // Direct id lookup returns 404 (name isn't a valid id) server .mock("GET", "/databases/sales") .with_status(404) .with_body(r#"{"error":"not found"}"#) .create(); - // List returns two entries with the same description + // List returns two entries with the same name server .mock("GET", "/databases") .with_status(200) .with_body( - r#"{"databases":[{"id":"db_1","description":"sales"},{"id":"db_2","description":"sales"}]}"#, + r#"{"databases":[{"id":"db_1","name":"sales"},{"id":"db_2","name":"sales"}]}"#, ) .create(); @@ -905,6 +933,7 @@ mod tests { .match_body(mockito::Matcher::JsonString( serde_json::to_string(&create_database_request( Some("mydb"), + None, "public", &["gdp".to_string()], None, @@ -914,7 +943,7 @@ mod tests { .create(); let api = ApiClient::test_new(&server.url(), "k", Some("ws-test")); - let body = create_database_request(Some("mydb"), "public", &["gdp".to_string()], None); + let body = create_database_request(Some("mydb"), None, "public", &["gdp".to_string()], None); let (status, resp_body) = api.post_raw("/databases", &body); assert_eq!(status.as_u16(), 201); let parsed: CreateDatabaseResponse = serde_json::from_str(&resp_body).unwrap(); diff --git a/src/indexes.rs b/src/indexes.rs index 2465b2b..37ce35f 100644 --- a/src/indexes.rs +++ b/src/indexes.rs @@ -219,18 +219,12 @@ pub fn infer_for_search( let api = ApiClient::new(Some(workspace_id)); - // Resolve connection name → ID + // Resolve connection name → ID, or treat as a raw ID when name lookup fails. let conn_map = connection_lookup(&api); - let connection_id = match conn_map.get(connection_name) { - Some(id) => id.clone(), - None => { - eprintln!( - "{}", - format!("Connection '{}' not found.", connection_name).red() - ); - std::process::exit(1); - } - }; + let connection_id = conn_map + .get(connection_name) + .cloned() + .unwrap_or_else(|| connection_name.to_string()); // Fetch indexes for this table let indexes = list_one_table(&api, &connection_id, schema, table); diff --git a/src/main.rs b/src/main.rs index aca7472..6d085bc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -401,6 +401,7 @@ fn main() { databases::get(&workspace_id, &name_or_id, &output) } Some(DatabasesCommands::Create { + name, description, schema, tables, @@ -408,6 +409,7 @@ fn main() { output, }) => databases::create( &workspace_id, + name.as_deref(), description.as_deref(), &schema, &tables, @@ -421,17 +423,20 @@ fn main() { databases::delete(&workspace_id, &name_or_id) } Some(DatabasesCommands::Load { - target, + table, + catalog, + schema, file, url, upload_id, }) => { - let (database, schema, table) = parse_db_target(&target); + let resolved_schema = + schema.unwrap_or_else(|| "public".to_string()); databases::tables_load( &workspace_id, - Some(database.as_str()), + catalog.as_deref(), &table, - Some(schema.as_str()), + Some(resolved_schema.as_str()), file.as_deref(), url.as_deref(), upload_id.as_deref(), @@ -626,7 +631,10 @@ fn main() { &output, ), IndexesCommands::Create { - target, + catalog, + schema, + table_name, + column, dataset_id, columns, name, @@ -640,31 +648,14 @@ fn main() { } => { let api = api::ApiClient::new(Some(&workspace_id)); let (scope, resolved_columns, auto_name) = - match (target.as_deref(), dataset_id.as_deref()) { - (Some(tgt), None) => { - let (conn_name, schema, table, cols) = - parse_index_target(tgt); - let conn_id = - connections::resolve_connection_id(&api, &conn_name); - let auto = format!( - "{table}_{cols}_{type}", - cols = cols.join("_"), - type = r#type - ); - ( - (conn_id, schema, table), - cols.join(","), - auto, - ) - } - (None, Some(did)) => { - let cols = - columns.as_deref().unwrap_or_else(|| { - eprintln!( - "error: --columns is required with --dataset-id" - ); - std::process::exit(1); - }); + match dataset_id.as_deref() { + Some(did) => { + let cols = columns.as_deref().unwrap_or_else(|| { + eprintln!( + "error: --columns is required with --dataset-id" + ); + std::process::exit(1); + }); let auto = format!( "dataset_{cols}_{type}", cols = cols.replace(',', "_"), @@ -676,23 +667,46 @@ fn main() { auto, ) } - _ => { - eprintln!( - "error: provide either (e.g. airbnb.listings[col1,col2]) or --dataset-id with --columns" - ); - std::process::exit(1); + None => { + let tbl = table_name.unwrap_or_else(|| { + eprintln!("error: --table is required"); + std::process::exit(1); + }); + let col = column.or(columns).unwrap_or_else(|| { + eprintln!("error: --column is required"); + std::process::exit(1); + }); + let sch = schema.unwrap_or_else(|| "public".to_string()); + let cat = catalog + .or_else(|| { + crate::config::load_current_database( + "default", + &workspace_id, + ) + }) + .unwrap_or_else(|| { + eprintln!( + "error: --catalog is required (or set a current database with 'hotdata databases set')" + ); + std::process::exit(1); + }); + let db = databases::resolve_database(&api, &cat); + let conn_id = db.default_connection_id; + let auto = + format!("{tbl}_{col}_{type}", type = r#type); + ((conn_id, sch, tbl), col, auto) } }; let index_name = name.unwrap_or(auto_name); let is_dataset = dataset_id.is_some(); - let (conn_id, schema, table) = scope; + let (conn_id, idx_schema, idx_table) = scope; let resolved_scope = if is_dataset { indexes::IndexScope::Dataset { dataset_id: &conn_id } } else { indexes::IndexScope::Connection { connection_id: &conn_id, - schema: &schema, - table: &table, + schema: &idx_schema, + table: &idx_table, } }; indexes::create( @@ -793,6 +807,8 @@ fn main() { Commands::Search { query, r#type, + catalog, + schema, table, column, select, @@ -802,22 +818,28 @@ fn main() { } => { let workspace_id = resolve_workspace(workspace_id); - // Parse `connection.table` or `connection.schema.table`. - // Schema defaults to `public` when omitted. - let parts: Vec<&str> = table.splitn(4, '.').collect(); - let (conn_name, schema, table_name) = match parts.as_slice() { - [conn, schema, tbl] => { - (conn.to_string(), schema.to_string(), tbl.to_string()) - } - [conn, tbl] => (conn.to_string(), "public".to_string(), tbl.to_string()), - _ => { + let api = api::ApiClient::new(Some(&workspace_id)); + let cat = catalog + .or_else(|| { + crate::config::load_current_database("default", &workspace_id) + }) + .unwrap_or_else(|| { eprintln!( - "error: --table must be 'connection.table' or 'connection.schema.table'" + "error: --catalog is required (or set a current database with 'hotdata databases set')" ); std::process::exit(1); - } - }; - let normalized_table = format!("{}.{}.{}", conn_name, schema, table_name); + }); + let db = databases::resolve_database(&api, &cat); + let resolved_schema = schema.unwrap_or_else(|| "public".to_string()); + let db_id = db.id.clone(); + let conn_id = db.default_connection_id; + + // bm25_search takes a string literal for the table path; the server resolves + // catalog aliases (like "default") only in SQL FROM clauses, not in string + // arguments. Use the connection ID as the catalog prefix so it resolves directly. + let bm25_table = format!("{}.{}.{}", conn_id, resolved_schema, table); + // vector queries run as standard SQL with X-Database-Id, so the catalog alias works. + let vector_table = format!("{}.{}.{}", cat, resolved_schema, table); // Infer --type and --column from the table's indexes when either is omitted. let (resolved_type, resolved_column) = @@ -826,9 +848,9 @@ fn main() { } else { let (inferred_type, inferred_column) = indexes::infer_for_search( &workspace_id, - &conn_name, - &schema, - &table_name, + &conn_id, + &resolved_schema, + &table, r#type.as_deref(), column.as_deref(), ); @@ -849,7 +871,7 @@ fn main() { format!( "SELECT {} FROM bm25_search('{}', '{}', '{}') ORDER BY score DESC LIMIT {}", bm25_columns, - normalized_table.replace('\'', "''"), + bm25_table.replace('\'', "''"), resolved_column.replace('\'', "''"), query.replace('\'', "''"), limit, @@ -862,12 +884,12 @@ fn main() { select_cols, resolved_column, query.replace('\'', "''"), - normalized_table, + vector_table, limit, ), _ => unreachable!(), }; - query::execute(&sql, &workspace_id, None, None, &output) + query::execute(&sql, &workspace_id, None, Some(db_id.as_str()), &output) } Commands::Queries { id, @@ -1023,122 +1045,6 @@ fn main() { update::maybe_print_update_notice(update_handle); } -/// Parse a database target like `airbnb.listings` or `airbnb.public.listings` -/// into `(database, schema, table)`. Schema defaults to `public`. -fn parse_db_target(target: &str) -> (String, String, String) { - let parts: Vec<&str> = target.splitn(4, '.').collect(); - match parts.as_slice() { - [db, tbl] => (db.to_string(), "public".to_string(), tbl.to_string()), - [db, schema, tbl] => (db.to_string(), schema.to_string(), tbl.to_string()), - _ => { - eprintln!( - "error: target must be 'database.table' or 'database.schema.table'" - ); - std::process::exit(1); - } - } -} - -/// Parse an index target like `airbnb.listings[col1,col2]` or -/// `airbnb.public.listings[col1,col2]` into `(conn_name, schema, table, columns)`. -/// Schema defaults to `public` when only two dot-parts are given. -fn parse_index_target(target: &str) -> (String, String, String, Vec) { - let Some(bracket_pos) = target.find('[') else { - eprintln!( - "error: target must include columns in brackets, e.g. airbnb.listings[col1,col2]" - ); - std::process::exit(1); - }; - if !target.ends_with(']') { - eprintln!( - "error: target bracket is not closed — use e.g. 'airbnb.listings[col1,col2]'" - ); - std::process::exit(1); - } - let table_part = &target[..bracket_pos]; - let cols_raw = &target[bracket_pos + 1..target.len() - 1]; - - let parts: Vec<&str> = table_part.splitn(4, '.').collect(); - let (conn, schema, table) = match parts.as_slice() { - [c, t] => (c.to_string(), "public".to_string(), t.to_string()), - [c, s, t] => (c.to_string(), s.to_string(), t.to_string()), - _ => { - eprintln!( - "error: target must be 'connection.table[cols]' or 'connection.schema.table[cols]'" - ); - std::process::exit(1); - } - }; - - let columns: Vec = cols_raw - .split(',') - .map(|s| s.trim().to_string()) - .filter(|s| !s.is_empty()) - .collect(); - - if columns.is_empty() { - eprintln!("error: no columns specified in brackets"); - std::process::exit(1); - } - - (conn, schema, table, columns) -} - -#[cfg(test)] -mod tests { - use super::*; - - // --- parse_db_target --- - - #[test] - fn db_target_two_parts_defaults_schema_to_public() { - let (db, schema, table) = parse_db_target("airbnb.listings"); - assert_eq!(db, "airbnb"); - assert_eq!(schema, "public"); - assert_eq!(table, "listings"); - } - - #[test] - fn db_target_three_parts_uses_explicit_schema() { - let (db, schema, table) = parse_db_target("airbnb.staging.listings"); - assert_eq!(db, "airbnb"); - assert_eq!(schema, "staging"); - assert_eq!(table, "listings"); - } - - // --- parse_index_target --- - - #[test] - fn index_target_two_parts_defaults_schema_to_public() { - let (conn, schema, table, cols) = parse_index_target("airbnb.listings[description]"); - assert_eq!(conn, "airbnb"); - assert_eq!(schema, "public"); - assert_eq!(table, "listings"); - assert_eq!(cols, vec!["description"]); - } - - #[test] - fn index_target_three_parts_uses_explicit_schema() { - let (conn, schema, table, cols) = - parse_index_target("airbnb.public.listings[name,description]"); - assert_eq!(conn, "airbnb"); - assert_eq!(schema, "public"); - assert_eq!(table, "listings"); - assert_eq!(cols, vec!["name", "description"]); - } - - #[test] - fn index_target_multiple_columns() { - let (_, _, _, cols) = parse_index_target("db.tbl[a,b,c]"); - assert_eq!(cols, vec!["a", "b", "c"]); - } - - #[test] - fn index_target_trims_column_whitespace() { - let (_, _, _, cols) = parse_index_target("db.tbl[a, b]"); - assert_eq!(cols, vec!["a", "b"]); - } -} pub fn get_styles() -> clap::builder::Styles { Styles::styled() From f2b2798b57010172c83e3f14816f2a7204b8eadd Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:56:55 -0700 Subject: [PATCH 4/9] fix(ux): address review feedback on catalog-ux-redesign - Fix views.rs:194 to use v.schema_name instead of hardcoded "main" - Update command.rs doc strings: Set uses "id or name", Delete uses "name or ID" - Update README Search and Indexes sections to use --catalog/--table flags - Update hotdata skill: databases load/create syntax, fix views->datasets ref - Update hotdata-search skill: search and indexes create use --catalog/--table --- README.md | 18 +++++++++--------- skills/hotdata-search/SKILL.md | 28 +++++++++++++++------------- skills/hotdata/SKILL.md | 34 ++++++++++++++++------------------ src/command.rs | 4 ++-- src/views.rs | 2 +- 5 files changed, 43 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 5525399..8fa74e0 100644 --- a/README.md +++ b/README.md @@ -229,10 +229,10 @@ hotdata queries [-o table|json|yaml] ```sh # BM25 full-text search (requires a BM25 index on the column) -hotdata search "" --type bm25 --table --column [--select ] [--limit ] [-o table|json|csv] +hotdata search "" --type bm25 --catalog --table
--column [--schema ] [--select ] [--limit ] [-o table|json|csv] # Vector search (requires a vector index with auto-embedding on the column) -hotdata search "" --type vector --table
--column [--limit ] +hotdata search "" --type vector --catalog --table
--column [--schema ] [--limit ] ``` - **`--type vector`** — pass your query as **plain text**, name the **source text column** (e.g. `title`). The server embeds the query at the same time, using the same provider that auto-embedded the column when the index was built — so distance metric, model, and dimensions all match automatically. No `OPENAI_API_KEY`, no client-side embedding, no need to know about the auto-generated `_embedding` column. Generated SQL: `vector_distance(col, 'query')` server-side. @@ -244,16 +244,16 @@ hotdata search "" --type vector --table
--column --schema --table
[-o table|json|yaml] -hotdata indexes create --connection-id --schema --table
\ - --name --columns --type sorted|bm25|vector \ - [--metric l2|cosine|dot] [--async] \ +# Catalog-table scope +hotdata indexes list --catalog --table
[--schema ] [-o table|json|yaml] +hotdata indexes create --catalog --table
[--schema ] \ + --column --type sorted|bm25|vector \ + [--name ] [--metric l2|cosine|dot] [--async] \ [--embedding-provider-id ] [--dimensions ] [--output-column ] [--description ] -hotdata indexes delete --connection-id --schema --table
--name +hotdata indexes delete --catalog --table
[--schema ] --name # Dataset scope hotdata indexes list --dataset-id [-o table|json|yaml] diff --git a/skills/hotdata-search/SKILL.md b/skills/hotdata-search/SKILL.md index 9b38267..1b9dae1 100644 --- a/skills/hotdata-search/SKILL.md +++ b/skills/hotdata-search/SKILL.md @@ -20,12 +20,12 @@ Retrieval workloads in Hotdata: **BM25 full-text**, **vector similarity**, and t ```bash # BM25 (requires a BM25 index on the column) -hotdata search "" --type bm25 --table --column \ - [--select ] [--limit ] [--workspace-id ] [--output table|json|csv] +hotdata search "" --type bm25 --catalog --table
--column \ + [--schema ] [--select ] [--limit ] [--workspace-id ] [--output table|json|csv] # Vector (requires a vector index; server auto-embeds the query text) -hotdata search "" --type vector --table --column \ - [--select ] [--limit ] [--workspace-id ] [--output table|json|csv] +hotdata search "" --type vector --catalog --table
--column \ + [--schema ] [--select ] [--limit ] [--workspace-id ] [--output table|json|csv] ``` | Type | Behavior | @@ -41,22 +41,24 @@ hotdata search "" --type vector --table --colum ## Indexes (BM25 and vector) -Indexes attach to a **connection table** (`--connection-id` + `--schema` + `--table`) or a **dataset** (`--dataset-id`). Scopes are mutually exclusive for create/delete. +Indexes attach to a **catalog table** (`--catalog` + `--table`) or a **dataset** (`--dataset-id`). Scopes are mutually exclusive for create/delete. + +**Note:** `indexes create` uses `--catalog`/`--table`; `indexes list` and `indexes delete` still use `--connection-id`/`--schema`/`--table`. ```bash -# List — workspace scan on connection tables (filter with -c / --schema / --table) +# List — workspace scan (filter with --connection-id / --schema / --table) hotdata indexes list [--connection-id ] [--schema ] [--table
] [--workspace-id ] [--output table|json|yaml] hotdata indexes list --dataset-id [--workspace-id ] [--output table|json|yaml] -# Connection table -hotdata indexes create --connection-id --schema --table
\ - --name --columns --type bm25|vector \ - [--metric l2|cosine|dot] [--async] \ +# Catalog table (create uses --catalog; list/delete use --connection-id) +hotdata indexes create --catalog --table
--column --type bm25|vector \ + [--schema ] [--name ] [--metric l2|cosine|dot] [--async] \ [--embedding-provider-id ] [--dimensions ] [--output-column ] [--description ] hotdata indexes delete --connection-id --schema --table
--name # Dataset -hotdata indexes create --dataset-id --name --columns --type bm25|vector ... +hotdata indexes create --dataset-id --columns --type bm25|vector \ + [--name ] [--metric l2|cosine|dot] [--async] ... hotdata indexes delete --dataset-id --name ``` @@ -89,6 +91,6 @@ hotdata embedding-providers delete [--workspace-id ] 1. `hotdata tables list --connection-id ` — confirm column types. 2. `hotdata indexes list` — avoid duplicate indexes. -3. `hotdata indexes create ... --type bm25|vector` (add `--async` if large). -4. `hotdata search "..." --type bm25|vector --table ... --column ...` +3. `hotdata indexes create --catalog --table
--column --type bm25|vector` (add `--async` if large). +4. `hotdata search "..." --type bm25|vector --catalog --table
--column ` 5. Record what exists in **context:DATAMODEL** (core skill) when the workspace should remember index choices. diff --git a/skills/hotdata/SKILL.md b/skills/hotdata/SKILL.md index 0bec6df..367ef17 100644 --- a/skills/hotdata/SKILL.md +++ b/skills/hotdata/SKILL.md @@ -181,31 +181,30 @@ hotdata connections create \ **Managed databases** are Hotdata-owned catalogs you create and populate yourself — no remote source to sync. Query them in SQL as **`..
`**. Prefer **`hotdata databases`** for this workflow. -**Parquet vs views:** `databases tables load` accepts **parquet only**. For SQL-query or saved-query materializations, use **`hotdata views create`**. +**Parquet vs datasets:** `databases tables load` accepts **parquet only**. For SQL-query or saved-query materializations, use **`hotdata datasets create`**. **Active database:** `hotdata databases set ` saves the active database to config. All `databases tables` subcommands and all `context` commands default to the active database; pass **`--database `** to override per-command. ``` hotdata databases list [--workspace-id ] [--output table|json|yaml] -hotdata databases create [--description
...] [--schema public] [--expires-at ] [--workspace-id ] [--output table|json|yaml] -hotdata databases set -hotdata databases [--workspace-id ] [--output table|json|yaml] -hotdata databases delete [--workspace-id ] +hotdata databases create [--name ] [--description
...] [--schema public] [--expires-at ] [--workspace-id ] [--output table|json|yaml] +hotdata databases set +hotdata databases [--workspace-id ] [--output table|json|yaml] +hotdata databases delete [--workspace-id ] -# Dot-notation shorthand for load: database.table or database.schema.table -hotdata databases load [--file ./data.parquet] [--url ] [--upload-id ] [--workspace-id ] +hotdata databases load --table
[--catalog ] [--schema public] [--file ./data.parquet] [--url ] [--upload-id ] [--workspace-id ] -hotdata databases tables list [--database ] [--schema ] [--workspace-id ] [--output table|json|yaml] -hotdata databases tables load
[--database ] [--schema public] [--file ./data.parquet] [--url ] [--upload-id ] [--workspace-id ] -hotdata databases tables delete
[--database ] [--schema public] [--workspace-id ] +hotdata databases tables list [--database ] [--schema ] [--workspace-id ] [--output table|json|yaml] +hotdata databases tables load
[--database ] [--schema public] [--file ./data.parquet] [--url ] [--upload-id ] [--workspace-id ] +hotdata databases tables delete
[--database ] [--schema public] [--workspace-id ] ``` - `list` — all managed databases in the workspace. -- `create` — creates a new managed database. `--description` is an optional human-readable label (databases are addressed by id, not description). `--expires-at` accepts relative durations (`24h`, `7d`, `90m`) or an RFC 3339 timestamp; defaults to `24h` when omitted. Repeat `--table` to declare tables up front. -- `set` — saves `` as the active database. Subsequent `databases tables` and `context` commands use it automatically. -- `` — inspect one database (id, description, expires_at). +- `create` — creates a new managed database. `--name` sets the SQL catalog alias used in queries (`SELECT … FROM .public.
`); must be `[a-z_][a-z0-9_]*`, globally unique, and omitting it means no expiry default. `--description` is an optional display label. `--expires-at` accepts relative durations (`24h`, `7d`, `90m`) or an RFC 3339 timestamp; defaults to `24h` when `--name` is omitted. Repeat `--table` to declare tables up front. +- `set` — saves `` as the active database. Subsequent `databases tables` and `context` commands use it automatically. +- `` — inspect one database (id, name, expires_at). - `delete` — removes the managed database; clears the active-database config if it matched. -- `load` — shorthand with dot notation (`database.table` or `database.schema.table`). Schema defaults to `public`. +- `load` — loads a parquet file into a table. `--catalog` selects the database by name; defaults to the current database set via `databases set`. Schema defaults to `public`. - `tables list` — lists tables with `TABLE` (`..
`), `SYNCED`, `LAST_SYNC`. Uses active database when `--database` is omitted. - `tables load` — uploads a local parquet file (`--file`), a remote parquet URL (`--url`), or a pre-staged upload (`--upload-id`) and publishes with **replace** mode. - `tables delete` — drops a table from the managed database. @@ -213,10 +212,9 @@ hotdata databases tables delete
[--database ] [--schema publ Example: ``` -hotdata databases create --description "sales" --table orders -hotdata databases set -hotdata databases tables load orders --file ./orders.parquet -hotdata query "SELECT count(*) FROM .public.orders" +hotdata databases create --name sales --table orders +hotdata databases load --catalog sales --table orders --file ./orders.parquet +hotdata query "SELECT count(*) FROM sales.public.orders" ``` ### List Tables and Columns diff --git a/src/command.rs b/src/command.rs index 3790461..36af60d 100644 --- a/src/command.rs +++ b/src/command.rs @@ -602,13 +602,13 @@ pub enum DatabasesCommands { /// Set the current database (used by default when no database is specified) Set { - /// Database id or description + /// Database id or name id_or_description: String, }, /// Delete a managed database and its tables Delete { - /// Database name or connection ID + /// Database name or ID name_or_id: String, }, diff --git a/src/views.rs b/src/views.rs index e1e28b0..9afbe62 100644 --- a/src/views.rs +++ b/src/views.rs @@ -191,7 +191,7 @@ pub fn get(view_id: &str, workspace_id: &str, format: &str) { let updated_at = crate::util::format_date(&v.updated_at); println!("id: {}", v.id); println!("label: {}", v.label); - println!("full_name: views.main.{}", v.table_name); + println!("full_name: views.{}.{}", v.schema_name, v.table_name); println!("source_type: {}", v.source_type); println!("created_at: {created_at}"); println!("updated_at: {updated_at}"); From 95af8ebb6f1f86134c2be7963be67265c037122b Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 3 Jun 2026 14:16:27 -0700 Subject: [PATCH 5/9] fix(databases): use catalog name in full_name output instead of hardcoded default --- src/databases.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/databases.rs b/src/databases.rs index 5feab59..3e93f91 100644 --- a/src/databases.rs +++ b/src/databases.rs @@ -202,11 +202,11 @@ pub fn is_parquet_path(path: &str) -> bool { || Path::new(path).extension().and_then(|e| e.to_str()) == Some("parquet") } -fn table_rows(tables: Vec) -> Vec { +fn table_rows(catalog: &str, tables: Vec) -> Vec { tables .into_iter() .map(|t| TableRow { - full_name: format!("default.{}.{}", t.schema, t.table), + full_name: format!("{catalog}.{}.{}", t.schema, t.table), schema: t.schema, table: t.table, synced: t.synced, @@ -566,7 +566,8 @@ pub fn tables_list(workspace_id: &str, database: Option<&str>, schema: Option<&s let db = resolve_database(&api, &database); let tables = collect_tables(&api, &db.default_connection_id, schema); - let rows = table_rows(tables); + let catalog = db.name.as_deref().unwrap_or("default"); + let rows = table_rows(catalog, tables); match format { "json" => println!("{}", serde_json::to_string_pretty(&rows).unwrap()), @@ -655,7 +656,8 @@ pub fn tables_load( } }; - let full_name = format!("default.{}.{}", result.schema_name, result.table_name); + let catalog = db.name.as_deref().unwrap_or("default"); + let full_name = format!("{catalog}.{}.{}", result.schema_name, result.table_name); println!("{}", "Table loaded".green()); println!("full_name: {}", full_name.clone().green()); println!("rows: {}", result.row_count); @@ -872,8 +874,8 @@ mod tests { } #[test] - fn table_rows_uses_default_prefix() { - let rows = table_rows(vec![InfoTable { + fn table_rows_uses_catalog_prefix() { + let rows = table_rows("mydb", vec![InfoTable { connection: "ignored".into(), schema: "public".into(), table: "orders".into(), @@ -881,7 +883,7 @@ mod tests { last_sync: Some("2026-05-19T00:00:00Z".into()), }]); assert_eq!(rows.len(), 1); - assert_eq!(rows[0].full_name, "default.public.orders"); + assert_eq!(rows[0].full_name, "mydb.public.orders"); assert!(rows[0].synced); } From 2cea084391ea0512046ee17629cbac28013e0533 Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 3 Jun 2026 14:50:13 -0700 Subject: [PATCH 6/9] feat(search): make --catalog required and use default_catalog for vector queries - Add `default_catalog` field to `Database` struct (returned by API) - Make `--catalog` required on `search` and `indexes create` commands instead of falling back to the active database config - Use `db.default_catalog` as the vector table prefix so the query resolves against the correct catalog alias, not the resolved name --- src/command.rs | 8 ++++---- src/databases.rs | 3 ++- src/main.rs | 28 +++------------------------- 3 files changed, 9 insertions(+), 30 deletions(-) diff --git a/src/command.rs b/src/command.rs index bee5d45..2440689 100644 --- a/src/command.rs +++ b/src/command.rs @@ -173,9 +173,9 @@ pub enum Commands { #[arg(long, value_parser = ["vector", "bm25"])] r#type: Option, - /// Catalog (database name) to search in. Defaults to the current database. + /// Catalog (database id or name) to search in. #[arg(long)] - catalog: Option, + catalog: String, /// Schema to search in (default: public) #[arg(long)] @@ -337,8 +337,8 @@ pub enum IndexesCommands { /// Create an index on a table Create { - /// Catalog (database name) for the table to index. Defaults to the current database. - #[arg(long, conflicts_with = "dataset_id")] + /// Catalog (database id or name) for the table to index. + #[arg(long, conflicts_with = "dataset_id", required_unless_present = "dataset_id")] catalog: Option, /// Schema for the table to index (default: public) diff --git a/src/databases.rs b/src/databases.rs index b7a2aed..9cac557 100644 --- a/src/databases.rs +++ b/src/databases.rs @@ -25,6 +25,7 @@ pub struct Database { #[serde(default)] pub name: Option, pub default_connection_id: String, + pub default_catalog: String, #[serde(default)] attachments: Vec, } @@ -891,7 +892,7 @@ mod tests { fn full_detail(id: &str, name: &str, conn_id: &str) -> String { format!( - r#"{{"id":"{id}","name":"{name}","default_connection_id":"{conn_id}","attachments":[]}}"# + r#"{{"id":"{id}","name":"{name}","default_connection_id":"{conn_id}","default_catalog":"default","attachments":[]}}"# ) } diff --git a/src/main.rs b/src/main.rs index 0ec586d..d452170 100644 --- a/src/main.rs +++ b/src/main.rs @@ -709,19 +709,7 @@ fn main() { std::process::exit(1); }); let sch = schema.unwrap_or_else(|| "public".to_string()); - let cat = catalog - .or_else(|| { - crate::config::load_current_database( - "default", - &workspace_id, - ) - }) - .unwrap_or_else(|| { - eprintln!( - "error: --catalog is required (or set a current database with 'hotdata databases set')" - ); - std::process::exit(1); - }); + let cat = catalog.unwrap(); let db = databases::resolve_database(&api, &cat); let conn_id = db.default_connection_id; let auto = @@ -851,17 +839,7 @@ fn main() { let workspace_id = resolve_workspace(workspace_id); let api = api::ApiClient::new(Some(&workspace_id)); - let cat = catalog - .or_else(|| { - crate::config::load_current_database("default", &workspace_id) - }) - .unwrap_or_else(|| { - eprintln!( - "error: --catalog is required (or set a current database with 'hotdata databases set')" - ); - std::process::exit(1); - }); - let db = databases::resolve_database(&api, &cat); + let db = databases::resolve_database(&api, &catalog); let resolved_schema = schema.unwrap_or_else(|| "public".to_string()); let db_id = db.id.clone(); let conn_id = db.default_connection_id; @@ -871,7 +849,7 @@ fn main() { // arguments. Use the connection ID as the catalog prefix so it resolves directly. let bm25_table = format!("{}.{}.{}", conn_id, resolved_schema, table); // vector queries run as standard SQL with X-Database-Id, so the catalog alias works. - let vector_table = format!("{}.{}.{}", cat, resolved_schema, table); + let vector_table = format!("{}.{}.{}", db.default_catalog, resolved_schema, table); // Infer --type and --column from the table's indexes when either is omitted. let (resolved_type, resolved_column) = From c65202bb2bc27bc758ff774e4aefeadc04f177b2 Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 3 Jun 2026 15:28:30 -0700 Subject: [PATCH 7/9] fix(search): use catalog alias for bm25_search table ref bm25_search passes its table reference as a string literal to the server, which now rewrites catalog aliases (e.g. "default") to the real managed connection name before execution. Use db.default_catalog as the prefix for both BM25 and vector table refs so both search types route through the same catalog alias resolution path. --- src/main.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main.rs b/src/main.rs index d452170..6796b7b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -844,11 +844,9 @@ fn main() { let db_id = db.id.clone(); let conn_id = db.default_connection_id; - // bm25_search takes a string literal for the table path; the server resolves - // catalog aliases (like "default") only in SQL FROM clauses, not in string - // arguments. Use the connection ID as the catalog prefix so it resolves directly. - let bm25_table = format!("{}.{}.{}", conn_id, resolved_schema, table); - // vector queries run as standard SQL with X-Database-Id, so the catalog alias works. + // Both search types run as SQL with X-Database-Id; the server rewrites + // catalog aliases (like "default") to the real connection name before execution. + let bm25_table = format!("{}.{}.{}", db.default_catalog, resolved_schema, table); let vector_table = format!("{}.{}.{}", db.default_catalog, resolved_schema, table); // Infer --type and --column from the table's indexes when either is omitted. From 0f5d76acb516298397acbef740e1bcf145c8a667 Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:04:25 -0700 Subject: [PATCH 8/9] docs: remove --table from databases create example --- README.md | 2 +- skills/hotdata/SKILL.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 34e7e5e..a6fb73d 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,7 @@ hotdata databases tables delete
[--schema public] Example: ```sh -hotdata databases create --name sales --table orders +hotdata databases create --name sales hotdata databases tables load sales orders --file ./orders.parquet hotdata query "SELECT count(*) FROM sales.public.orders" ``` diff --git a/skills/hotdata/SKILL.md b/skills/hotdata/SKILL.md index 899e545..24624ed 100644 --- a/skills/hotdata/SKILL.md +++ b/skills/hotdata/SKILL.md @@ -215,7 +215,7 @@ hotdata databases tables delete
[--database ] [--schema publ Example: ``` -hotdata databases create --name sales --table orders +hotdata databases create --name sales hotdata databases load --catalog sales --table orders --file ./orders.parquet hotdata query "SELECT count(*) FROM sales.public.orders" ``` From 53ea13606ba9af8419afebed0102bb78839af258 Mon Sep 17 00:00:00 2001 From: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Date: Wed, 3 Jun 2026 16:11:15 -0700 Subject: [PATCH 9/9] fix(databases): set/query --database accept names; table full_name uses default_catalog --- src/databases.rs | 16 +++++----------- src/main.rs | 6 +++++- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/databases.rs b/src/databases.rs index 9cac557..6f50a20 100644 --- a/src/databases.rs +++ b/src/databases.rs @@ -625,16 +625,12 @@ pub fn create( pub fn set(workspace_id: &str, id: &str) { use crossterm::style::Stylize; let api = ApiClient::new(Some(workspace_id)); - let encoded = urlencoding::encode(id); - if api.get_none_if_not_found::(&format!("/databases/{encoded}")).is_none() { - eprintln!("{}", format!("error: no database with id '{id}'").red()); - std::process::exit(1); - } - if let Err(e) = crate::config::save_current_database("default", workspace_id, id) { + let db = resolve_database(&api, id); + if let Err(e) = crate::config::save_current_database("default", workspace_id, &db.id) { eprintln!("{}", format!("error saving current database: {e}").red()); std::process::exit(1); } - println!("{}", format!("Current database set to {id}").green()); + println!("{}", format!("Current database set to {}", db.id).green()); } fn resolve_current_database(provided: Option<&str>, workspace_id: &str) -> String { @@ -681,8 +677,7 @@ pub fn tables_list(workspace_id: &str, database: Option<&str>, schema: Option<&s let db = resolve_database(&api, &database); let tables = collect_tables(&api, &db.default_connection_id, schema); - let catalog = db.name.as_deref().unwrap_or("default"); - let rows = table_rows(catalog, tables); + let rows = table_rows(&db.default_catalog, tables); match format { "json" => println!("{}", serde_json::to_string_pretty(&rows).unwrap()), @@ -771,8 +766,7 @@ pub fn tables_load( } }; - let catalog = db.name.as_deref().unwrap_or("default"); - let full_name = format!("{catalog}.{}.{}", result.schema_name, result.table_name); + let full_name = format!("{}.{}.{}", db.default_catalog, result.schema_name, result.table_name); println!("{}", "Table loaded".green()); println!("full_name: {}", full_name.clone().green()); println!("rows: {}", result.row_count); diff --git a/src/main.rs b/src/main.rs index 6796b7b..e998dbf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -214,11 +214,15 @@ fn main() { Some(QueryCommands::Status { id }) => query::poll(&id, &workspace_id, &output), None => match sql { Some(sql) => { + let resolved_db = database.as_deref().map(|d| { + let api = api::ApiClient::new(Some(&workspace_id)); + databases::resolve_database(&api, d).id + }); query::execute( &sql, &workspace_id, connection.as_deref(), - database.as_deref(), + resolved_db.as_deref(), &output, ) }