hotdata-dev · eddietejeda · May 27, 2026 · May 27, 2026 · May 28, 2026 · Jun 3, 2026
diff --git a/README.md b/README.md
@@ -67,7 +67,7 @@ API key priority (lowest to highest): config file → `HOTDATA_API_KEY` env var
 | `connections` | `list`, `create`, `refresh`, `new` | Manage connections |
 | `databases` | `list`, `create`, `delete`, `tables` | Managed databases (create and load tables via parquet) |
 | `tables` | `list` | List tables and columns |
-| `datasets` | `list`, `create`, `update` | Manage uploaded datasets |
+| `views` | `list`, `create`, `update`, `refresh` | Manage SQL-derived views |
 | `context` | `list`, `show`, `pull`, `push` | Workspace Markdown context (e.g. data model `DATAMODEL`) via the context API |
 | `query` | | Execute a SQL query |
 | `queries` | `list` | Inspect query run history |
@@ -154,7 +154,7 @@ hotdata databases tables delete <database> <table> [--schema public]
 Example:
 
 ```sh
-hotdata databases create --name sales --table orders
+hotdata databases create --name sales
 hotdata databases tables load sales orders --file ./orders.parquet
 hotdata query "SELECT count(*) FROM sales.public.orders"
 ```
@@ -170,24 +170,19 @@ hotdata tables list [--workspace-id <id>] [--connection-id <id>] [--schema <patt
 - `--schema` and `--table` support SQL `%` wildcard patterns.
 - Tables are displayed as `<connection>.<schema>.<table>` — use this format in SQL queries.
 
-## Datasets
+## Views
 
 ```sh
-hotdata datasets list [--workspace-id <id>] [--limit <n>] [--offset <n>] [--format table|json|yaml]
-hotdata datasets <dataset_id> [--workspace-id <id>] [--format table|json|yaml]
-hotdata datasets create --file data.csv [--label "My Dataset"] [--table-name my_dataset]
-hotdata datasets create --sql "SELECT ..." --label "My Dataset"
-hotdata datasets create --url "https://example.com/data.parquet" --label "My Dataset"
-hotdata datasets update <dataset_id> [--label "New Label"] [--table-name new_table]
-hotdata datasets refresh <dataset_id> [--workspace-id <id>] [--async]
+hotdata views list [--workspace-id <id>] [--limit <n>] [--offset <n>] [--output table|json|yaml]
+hotdata views <view_id> [--workspace-id <id>] [--output table|json|yaml]
+hotdata views create --name my_view [--description "My View"] (--sql "SELECT ..." | --query-id <id>)
+hotdata views update <view_id> [--description "New Label"] [--name new_table]
+hotdata views refresh <view_id> [--workspace-id <id>] [--async]
 ```
 
-- Datasets are queryable as `datasets.main.<table_name>`.
-- `--file`, `--sql`, `--query-id`, and `--url` are mutually exclusive.
-- `--url` imports data directly from a URL (supports csv, json, parquet).
-- Format is auto-detected from file extension or content.
-- Piped stdin is supported: `cat data.csv | hotdata datasets create --label "My Dataset"`
-- `refresh` re-runs the dataset's source (URL fetch or saved query) and creates a new version. Not supported for upload-source datasets.
+- Views are queryable as `views.main.<name>`.
+- `--sql` and `--query-id` are mutually exclusive; exactly one is required for `create`.
+- `refresh` re-runs the view's source query and creates a new version.
 - `--async` submits the refresh as a background job and returns a job ID; poll with `hotdata jobs <job_id>`.
 
 ## Workspace context
@@ -237,10 +232,10 @@ hotdata queries <query_run_id> [-o table|json|yaml]
 
 ```sh
 # BM25 full-text search (requires a BM25 index on the column)
-hotdata search "<query>" --type bm25 --table <connection.schema.table> --column <column> [--select <columns>] [--limit <n>] [-o table|json|csv]
+hotdata search "<query>" --type bm25 --catalog <catalog> --table <table> --column <column> [--schema <schema>] [--select <columns>] [--limit <n>] [-o table|json|csv]
 
 # Vector search (requires a vector index with auto-embedding on the column)
-hotdata search "<query>" --type vector --table <table> --column <source_text_column> [--limit <n>]
+hotdata search "<query>" --type vector --catalog <catalog> --table <table> --column <source_text_column> [--schema <schema>] [--limit <n>]
 ```
 
 - **`--type vector`** — pass your query as **plain text**, name the **source text column** (e.g. `title`). The server embeds the query at the same time, using the same provider that auto-embedded the column when the index was built — so distance metric, model, and dimensions all match automatically. No `OPENAI_API_KEY`, no client-side embedding, no need to know about the auto-generated `_embedding` column. Generated SQL: `vector_distance(col, 'query')` server-side.
@@ -252,16 +247,16 @@ hotdata search "<query>" --type vector --table <table> --column <source_text_col
 
 ## Indexes
 
-Indexes attach to either a connection-table (`--connection-id` + `--schema` + `--table`) or a dataset (`--dataset-id`). The two scopes are mutually exclusive.
+Indexes attach to either a catalog-table (`--catalog` + `--table`) or a dataset (`--dataset-id`). The two scopes are mutually exclusive.
 
 ```sh
-# Connection-table scope
-hotdata indexes list   --connection-id <id> --schema <schema> --table <table> [-o table|json|yaml]
-hotdata indexes create --connection-id <id> --schema <schema> --table <table> \
-  --name <name> --columns <cols> --type sorted|bm25|vector \
-  [--metric l2|cosine|dot] [--async] \
+# Catalog-table scope
+hotdata indexes list   --catalog <catalog> --table <table> [--schema <schema>] [-o table|json|yaml]
+hotdata indexes create --catalog <catalog> --table <table> [--schema <schema>] \
+  --column <col> --type sorted|bm25|vector \
+  [--name <name>] [--metric l2|cosine|dot] [--async] \
   [--embedding-provider-id <id>] [--dimensions <n>] [--output-column <name>] [--description <text>]
-hotdata indexes delete --connection-id <id> --schema <schema> --table <table> --name <name>
+hotdata indexes delete --catalog <catalog> --table <table> [--schema <schema>] --name <name>
 
 # Dataset scope
 hotdata indexes list   --dataset-id <id> [-o table|json|yaml]

diff --git a/skills/hotdata-analytics/SKILL.md b/skills/hotdata-analytics/SKILL.md
@@ -8,7 +8,7 @@ version: 0.3.3
 
 **OLAP-style analytics** in Hotdata: PostgreSQL-dialect SQL, query execution, run history, stored results, **Chain** materializations, and **sorted** indexes for filters and joins.
 
-**Prerequisites:** Authenticate, workspace, and catalog discovery via the **`hotdata`** skill (`connections`, `tables`, `datasets`, `databases`).
+**Prerequisites:** Authenticate, workspace, and catalog discovery via the **`hotdata`** skill (`connections`, `tables`, `views`, `databases`).
 
 **Related skills:** **`hotdata-search`** (BM25, vector, retrieval indexes), **`hotdata-geospatial`** (spatial SQL).
 
@@ -23,7 +23,7 @@ hotdata query status <query_run_id> [--output table|json|csv]
 
 - **PostgreSQL dialect.** Quote mixed-case identifiers: `"CustomerName"`.
 - Use **`hotdata tables list`** for schema discovery — not `information_schema` via `query`.
-- Fully qualified names: `<connection>.<schema>.<table>`, `datasets.<schema>.<table>`, `<database>.<schema>.<table>`.
+- Fully qualified names: `<connection>.<schema>.<table>`, `views.<schema>.<table>`, `<database>.<schema>.<table>`.
 - Long-running queries may return `query_run_id` → poll with **`query status`** (exit `2` = still running). Do not re-run identical heavy SQL while polling.
 - For **workspace-wide** joins and naming, load **context:DATAMODEL** when listed (`hotdata context list` → `show DATAMODEL`) — see **`hotdata`** skill.
 
@@ -82,8 +82,8 @@ hotdata results <result_id> [--workspace-id <workspace_id>] [--output table|json
 2. **Materialize** (pick one)
 
    ```bash
-   hotdata datasets create --name chain_slice [--description "chain slice"] --sql "SELECT ..."
-   hotdata datasets create --name chain_from_saved [--description "from saved"] --query-id <query_id>
+   hotdata views create --name chain_slice --description "chain slice" --sql "SELECT ..."
+   hotdata views create --name chain_from_saved --description "from saved" --query-id <query_id>
    ```
 
    Or managed parquet:
@@ -94,10 +94,10 @@ hotdata results <result_id> [--workspace-id <workspace_id>] [--output table|json
    hotdata databases tables load slice --file ./slice.parquet
    ```
 
-3. **Chain query** — use printed **`full_name`** or `datasets list` **FULL NAME** column:
+3. **Chain query** — use printed **`full_name`** or `views list` **FULL NAME** column:
 
    ```bash
-   hotdata query "SELECT * FROM datasets.main.chain_slice WHERE ..."
+   hotdata query "SELECT * FROM views.main.chain_slice WHERE ..."
    hotdata query "SELECT * FROM analytics.public.slice WHERE ..."
    ```
 
@@ -122,4 +122,4 @@ List and delete use the same `hotdata indexes` commands as in the search skill;
 
 ## Sandboxes and chains
 
-Sandbox datasets use **`datasets.<sandbox_id>.<table>`**, not `datasets.main`. Run queries with active sandbox config or `hotdata sandbox <id> run hotdata query "..."`. See **`hotdata`** skill **Sandboxes**.
+Sandbox views use **`views.<sandbox_id>.<table>`**, not `views.main`. Run queries with active sandbox config or `hotdata sandbox <id> run hotdata query "..."`. See **`hotdata`** skill **Sandboxes**.
diff --git a/skills/hotdata-analytics/references/WORKFLOWS.md b/skills/hotdata-analytics/references/WORKFLOWS.md
@@ -2,7 +2,7 @@
 
 OLAP-style SQL, **History** (query runs and stored results), and **Chain** (materialized follow-ups). Requires **`hotdata`** for auth, workspaces, and catalog commands.
 
-**Related:** **`hotdata-search`** for BM25/vector indexes and `hotdata search`; **`hotdata`** [WORKFLOWS.md](../../hotdata/references/WORKFLOWS.md) for datasets vs managed databases.
+**Related:** **`hotdata-search`** for BM25/vector indexes and `hotdata search`; **`hotdata`** [WORKFLOWS.md](../../hotdata/references/WORKFLOWS.md) for views vs managed databases.
 
 ---
 
@@ -66,11 +66,11 @@ hotdata query "SELECT ..."
 
 Land a smaller table — pick one:
 
-**Datasets** (CSV/JSON/URL/SQL snapshot → `datasets.<schema>.<table>`):
+**Views** (SQL snapshot → `views.<schema>.<table>`):
 
 ```bash
-hotdata datasets create --label "chain revenue slice" --sql "SELECT ..." [--table-name chain_revenue_slice]
-hotdata datasets create --label "from saved" --query-id <query_id> [--table-name ...]
+hotdata views create --name chain_revenue_slice --description "chain revenue slice" --sql "SELECT ..."
+hotdata views create --name chain_from_saved --description "from saved" --query-id <query_id>
 ```
 
 **Managed database** (parquet → `<database>.<schema>.<table>`):
@@ -80,17 +80,17 @@ hotdata databases create --name chain_db --table revenue_slice
 hotdata databases tables load chain_db revenue_slice --file ./revenue_slice.parquet
 ```
 
-Note the printed **`full_name`** (e.g. `datasets.main.chain_revenue_slice` or `chain_db.public.revenue_slice`). For datasets, **`FULL NAME`** from `datasets list` is authoritative.
+Note the printed **`full_name`** (e.g. `views.main.chain_revenue_slice` or `chain_db.public.revenue_slice`). For views, **`FULL NAME`** from `views list` is authoritative.
 
 ### 3. Chain query
 
-Query using that name — do not hardcode `datasets.main` if the schema segment is a sandbox id:
+Query using that name — do not hardcode `views.main` if the schema segment is a sandbox id:
 
 ```bash
-hotdata datasets list
-hotdata query "SELECT * FROM datasets.main.chain_revenue_slice WHERE ..."
+hotdata views list
+hotdata query "SELECT * FROM views.main.chain_revenue_slice WHERE ..."
 # Sandbox example (use actual full_name from create or list):
-# hotdata query "SELECT * FROM datasets.s_ufmblmvq.chain_revenue_slice WHERE ..."
+# hotdata query "SELECT * FROM views.s_ufmblmvq.chain_revenue_slice WHERE ..."
 # Managed database:
 # hotdata query "SELECT * FROM chain_db.public.revenue_slice WHERE ..."
 ```
@@ -99,18 +99,18 @@ hotdata query "SELECT * FROM datasets.main.chain_revenue_slice WHERE ..."
 
 For **sandbox-scoped** chain tables:
 
-- Qualified name is **`datasets.<sandbox_id>.<table>`**, not `datasets.main`.
+- Qualified name is **`views.<sandbox_id>.<table>`**, not `views.main`.
 - Run queries with **active sandbox** in config (`hotdata sandbox set`) **or** inside **`hotdata sandbox <sandbox_id> run hotdata query "…"`**.
 - Without sandbox context, you may get **access denied** on sandbox-only tables.
 
 ### Naming and documentation
 
 - Prefer predictable `--table-name` values: `chain_<topic>_<YYYYMMDD>`.
-- Record long-lived chains in **context:DATAMODEL → Derived tables (Chain)** with the **full** SQL name you use (`datasets.…` or `database.schema.table`).
+- Record long-lived chains in **context:DATAMODEL → Derived tables (Chain)** with the **full** SQL name you use (`views.…` or `database.schema.table`).
 - Promote join/grain findings to **context:DATAMODEL** when they should outlive the sandbox (**`hotdata`** skill).
 
 ### Guardrails
 
 - Materialize when the base scan is large and the follow-up runs many times.
 - Keep Chain tables focused; avoid wide `SELECT *` materializations when a narrow projection suffices.
-- For upload format choice (datasets vs databases), see **`hotdata`** WORKFLOWS — [Datasets vs managed databases](../../hotdata/references/WORKFLOWS.md#datasets-vs-managed-databases).
+- For source format choice (views vs databases), see **`hotdata`** WORKFLOWS — [Views vs managed databases](../../hotdata/references/WORKFLOWS.md#views-vs-managed-databases).
diff --git a/skills/hotdata-search/SKILL.md b/skills/hotdata-search/SKILL.md
@@ -20,12 +20,12 @@ Retrieval workloads in Hotdata: **BM25 full-text**, **vector similarity**, and t
 
 ```bash
 # BM25 (requires a BM25 index on the column)
-hotdata search "<query>" --type bm25 --table <connection.schema.table> --column <column> \
-  [--select <columns>] [--limit <n>] [--workspace-id <workspace_id>] [--output table|json|csv]
+hotdata search "<query>" --type bm25 --catalog <catalog> --table <table> --column <column> \
+  [--schema <schema>] [--select <columns>] [--limit <n>] [--workspace-id <workspace_id>] [--output table|json|csv]
 
 # Vector (requires a vector index; server auto-embeds the query text)
-hotdata search "<query>" --type vector --table <connection.schema.table> --column <source_text_column> \
-  [--select <columns>] [--limit <n>] [--workspace-id <workspace_id>] [--output table|json|csv]
+hotdata search "<query>" --type vector --catalog <catalog> --table <table> --column <source_text_column> \
+  [--schema <schema>] [--select <columns>] [--limit <n>] [--workspace-id <workspace_id>] [--output table|json|csv]
 ```
 
 | Type | Behavior |
@@ -41,22 +41,24 @@ hotdata search "<query>" --type vector --table <connection.schema.table> --colum
 
 ## Indexes (BM25 and vector)
 
-Indexes attach to a **connection table** (`--connection-id` + `--schema` + `--table`) or a **dataset** (`--dataset-id`). Scopes are mutually exclusive for create/delete.
+Indexes attach to a **catalog table** (`--catalog` + `--table`) or a **dataset** (`--dataset-id`). Scopes are mutually exclusive for create/delete.
+
+**Note:** `indexes create` uses `--catalog`/`--table`; `indexes list` and `indexes delete` still use `--connection-id`/`--schema`/`--table`.
 
 ```bash
-# List — workspace scan on connection tables (filter with -c / --schema / --table)
+# List — workspace scan (filter with --connection-id / --schema / --table)
 hotdata indexes list [--connection-id <id>] [--schema <schema>] [--table <table>] [--workspace-id <ws>] [--output table|json|yaml]
 hotdata indexes list --dataset-id <dataset_id> [--workspace-id <ws>] [--output table|json|yaml]
 
-# Connection table
-hotdata indexes create --connection-id <id> --schema <schema> --table <table> \
-  --name <name> --columns <cols> --type bm25|vector \
-  [--metric l2|cosine|dot] [--async] \
+# Catalog table (create uses --catalog; list/delete use --connection-id)
+hotdata indexes create --catalog <catalog> --table <table> --column <col> --type bm25|vector \
+  [--schema <schema>] [--name <name>] [--metric l2|cosine|dot] [--async] \
   [--embedding-provider-id <id>] [--dimensions <n>] [--output-column <name>] [--description <text>]
 hotdata indexes delete --connection-id <id> --schema <schema> --table <table> --name <name>
 
 # Dataset
-hotdata indexes create --dataset-id <dataset_id> --name <name> --columns <cols> --type bm25|vector ...
+hotdata indexes create --dataset-id <dataset_id> --columns <cols> --type bm25|vector \
+  [--name <name>] [--metric l2|cosine|dot] [--async] ...
 hotdata indexes delete --dataset-id <dataset_id> --name <name>
 ```
 
@@ -89,6 +91,6 @@ hotdata embedding-providers delete <id> [--workspace-id <workspace_id>]
 
 1. `hotdata tables list --connection-id <id>` — confirm column types.
 2. `hotdata indexes list` — avoid duplicate indexes.
-3. `hotdata indexes create ... --type bm25|vector` (add `--async` if large).
-4. `hotdata search "..." --type bm25|vector --table ... --column ...`
+3. `hotdata indexes create --catalog <catalog> --table <table> --column <col> --type bm25|vector` (add `--async` if large).
+4. `hotdata search "..." --type bm25|vector --catalog <catalog> --table <table> --column <col>`
 5. Record what exists in **context:DATAMODEL** (core skill) when the workspace should remember index choices.