From 5d4f759d71881b292a638a642498ccc0517eadb8 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 30 Apr 2026 21:24:31 -0700 Subject: [PATCH 1/4] explorer: dynamic cross-filter facet counts (issue #154) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements path A from issue #154 (Codex-resolved plan): facet counts recalculate when filters are toggled, matching the OpenContext UX Eric Kansa flagged. Counts reflect the full source + material + context + object_type filter combination; search and camera viewport remain out of v1 scope per the issue. How it works: - All four filter groups (source legend + material + sampled feature + specimen type) carry uniform `data-facet` / `data-value` count spans + `.facet-row` parents so refreshFacetCounts() updates text in place without rebuilding checkbox HTML (which would lose mid-interaction selections). - One debounced refreshFacetCounts() (~250 ms) with a generation/stale- result guard, mirroring the existing loadViewportSamples and refreshResultsTable patterns. - Strategy: * No filters active → restore baseline counts from facet_summaries (stashed on viewer._baselineCounts after initial load). * Exactly one facet value selected → cache lookup against isamples_202601_facet_cross_filter.parquet (~6 KB, single-filter pre-aggregation; one matching row per target facet value). * Anything else → on-the-fly: four concurrent GROUP BY queries on sample_facets_v2, each excluding the column being recomputed. - The selected facet's own values keep baseline counts (so the user can see what other values WOULD yield if they switched their selection within that group). - Zero-count rows dim (opacity 0.4) rather than hide; less layout churn and clearer signal that the selection eliminated those values. - During recompute the counts get a subtle italic .recomputing class so users see something is updating. Source counts are sample-level (matching facet_summaries / facets_url) and not H3 cluster dominant_source counts; the cluster-zoom caveat in "How It Works" already covers the dominant_source semantics for the globe view. Smoke-test: 0 JS exceptions, 0 console errors, 0 network failures. Co-Authored-By: Claude Opus 4.7 (1M context) --- tutorials/isamples_explorer.qmd | 231 +++++++++++++++++++++++++++++--- 1 file changed, 214 insertions(+), 17 deletions(-) diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index a9912c1..f42a1b3 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -114,6 +114,11 @@ format: .filter-body { padding: 4px 0; } .filter-body label { display: block; font-size: 12px; padding: 2px 0; cursor: pointer; } .filter-body label:hover { color: #1565c0; } + /* Cross-filter zero-count dimming. Don't hide — just signal that the + value has no matches under current filters (issue #154). */ + .facet-row.zero { opacity: 0.4; } + .facet-row.zero:hover { opacity: 0.65; } + .facet-count.recomputing { opacity: 0.55; font-style: italic; } /* Results table below the globe */ #resultsTableWrap { margin-top: 16px; } #resultsTable { @@ -185,10 +190,10 @@ Circle size = log(sample count). Color = dominant data source.
- - - - + + + +
@@ -268,6 +273,10 @@ wide_url = `${R2_BASE}/current/wide.parquet` // v2 carries object_type alongside material and context (URI-string columns). facets_url = `${R2_BASE}/isamples_202601_sample_facets_v2.parquet` facet_summaries_url = `${R2_BASE}/isamples_202601_facet_summaries.parquet` +// Pre-aggregated cache for fast cross-filter count lookups in the +// single-facet-value-active case. Multi-filter combinations fall back to +// on-the-fly GROUP BY queries against facets_url. See issue #154. +cross_filter_url = `${R2_BASE}/isamples_202601_facet_cross_filter.parquet` // SKOS prefLabels for Material / Sampled Feature / Specimen Type URIs. // ~60 KB lookup; falls back to URI tail if a URI isn't covered. vocab_labels_url = `${R2_BASE}/vocab_labels.parquet` @@ -531,6 +540,32 @@ function updateResultsTableMeta(text, isLoading) { el.textContent = text; el.style.color = isLoading ? '#1565c0' : '#555'; } + +// === Cross-filter facet count updates (issue #154) === +// Updates the count span next to each checkbox for one facet group. If +// `countsMap` is null, falls back to baseline counts on `viewer._baselineCounts` +// (set in facetFilters). Dims rows where count === 0 so the user sees that +// their selection eliminated those values rather than silently hiding them. +function applyFacetCounts(facetKey, countsMap) { + const baseline = (viewer && viewer._baselineCounts) ? viewer._baselineCounts[facetKey] : null; + document.querySelectorAll(`.facet-count[data-facet="${facetKey}"]`).forEach(el => { + const value = el.getAttribute('data-value'); + let count; + if (countsMap) { + count = countsMap.has(value) ? countsMap.get(value) : 0; + } else { + count = baseline ? (baseline.get(value) ?? 0) : 0; + } + el.textContent = `(${Number(count).toLocaleString()})`; + el.classList.remove('recomputing'); + const row = document.querySelector(`.facet-row[data-facet="${facetKey}"][data-value="${CSS.escape(value)}"]`); + if (row) row.classList.toggle('zero', count === 0); + }); +} + +function markFacetCountsRecomputing() { + document.querySelectorAll('.facet-count').forEach(el => el.classList.add('recomputing')); +} ``` ```{ojs} @@ -835,18 +870,34 @@ facetFilters = { } } - // Update source count badges in the (hardcoded) legend. - const srcCountMap = new Map(grouped.source.map(s => [s.uri, s.count])); - document.querySelectorAll('#sourceFilter .src-count').forEach(el => { - const key = el.getAttribute('data-source'); - const c = srcCountMap.get(key); - if (c != null) el.textContent = `(${Number(c).toLocaleString()})`; - }); + // Stash baseline counts on `viewer` so refreshFacetCounts() can + // restore them when filters return to the no-active-filters state. + // Schema: viewer._baselineCounts[facetKey] = Map(uri → count). + viewer._baselineCounts = { + source: new Map(grouped.source.map(s => [s.uri, s.count])), + material: new Map(grouped.material.map(m => [m.uri, m.count])), + context: new Map(grouped.context.map(c => [c.uri, c.count])), + object_type: new Map(grouped.object_type.map(o => [o.uri, o.count])), + }; + + // Paint baseline counts now so the source legend and (after this + // function's renderFilter calls below) the facet rows show numbers + // immediately, before any user interaction debounces refresh. + // Wrapped in setTimeout(0) so the renderFilter calls below land first. + setTimeout(() => { + applyFacetCounts('source', null); + applyFacetCounts('material', null); + applyFacetCounts('context', null); + applyFacetCounts('object_type', null); + }, 0); const escAttr = (s) => String(s).replace(/&/g, '&').replace(/"/g, '"').replace(/ String(s).replace(/&/g, '&').replace(/ { + // Render checkboxes with `data-facet` / `data-value` count spans so + // refreshFacetCounts() can update text in place without rebuilding + // the HTML (which would lose mid-interaction selections). See #154. + const renderFilter = (bodyId, facetKey, items) => { const body = document.getElementById(bodyId); if (!body) return; if (items.length === 0) { @@ -854,13 +905,13 @@ facetFilters = { return; } body.innerHTML = items.map(it => - `` + `` ).join(''); }; - renderFilter('materialFilterBody', grouped.material); - renderFilter('contextFilterBody', grouped.context); - renderFilter('objectTypeFilterBody', grouped.object_type); + renderFilter('materialFilterBody', 'material', grouped.material); + renderFilter('contextFilterBody', 'context', grouped.context); + renderFilter('objectTypeFilterBody', 'object_type', grouped.object_type); console.log(`Facet filters loaded: ${grouped.material.length} materials, ${grouped.context.length} contexts, ${grouped.object_type.length} object types (vocab labels: ${vocabMap.size})`); } catch(err) { @@ -1189,6 +1240,148 @@ zoomWatcher = { } } + // === Cross-filter facet count refresh (issue #154) === + // + // Strategy: + // - No filters active → restore baseline counts (no query). + // - Exactly one facet value → cache lookup against cross_filter_url. + // - Anything else → on-the-fly group-by on facets_url, four + // concurrent queries (one per target facet), + // each excluding the column being recomputed. + // + // Debounced ~250 ms with a generation guard so rapid clicking only + // resolves the latest selection. Source counts are sample-level + // (not H3 dominant_source counts). + let facetCountsReqId = 0; + let facetCountsDebounce = null; + + function describeActiveFilters() { + const sources = getActiveSources(); + const allSourcesChecked = sources.length === 4; + const mat = getCheckedValues('materialFilterBody'); + const ctx = getCheckedValues('contextFilterBody'); + const ot = getCheckedValues('objectTypeFilterBody'); + const dims = [ + { key: 'source', col: 'source', values: allSourcesChecked ? [] : sources }, + { key: 'material', col: 'material', values: mat }, + { key: 'context', col: 'context', values: ctx }, + { key: 'object_type', col: 'object_type', values: ot }, + ]; + const activeDims = dims.filter(d => d.values.length > 0); + const totalActiveValues = activeDims.reduce((n, d) => n + d.values.length, 0); + return { dims, activeDims, totalActiveValues }; + } + + function buildExcludeWhere(activeDims, excludeKey) { + const conds = activeDims + .filter(d => d.key !== excludeKey) + .map(d => { + const list = d.values.map(v => `'${String(v).replace(/'/g, "''")}'`).join(','); + return `${d.col} IN (${list})`; + }); + return conds.length > 0 ? conds.join(' AND ') : '1=1'; + } + + async function refreshFacetCountsNow(myReq) { + const { dims, activeDims, totalActiveValues } = describeActiveFilters(); + + // Case 1: no filters → restore baseline. + if (activeDims.length === 0) { + for (const d of dims) applyFacetCounts(d.key, null); + return; + } + + markFacetCountsRecomputing(); + + // Case 2: single-filter cache. The pre-aggregated parquet has a + // (filter_source, filter_material, filter_context, filter_object_type) + // schema where exactly one is non-null per row. Use it only when + // exactly one facet value is active across all dims (the cache shape). + const singleActiveDim = activeDims.length === 1 && activeDims[0].values.length === 1 + ? activeDims[0] : null; + if (singleActiveDim) { + try { + const filterCols = ['filter_source', 'filter_material', 'filter_context', 'filter_object_type']; + const filterColForKey = { + source: 'filter_source', + material: 'filter_material', + context: 'filter_context', + object_type: 'filter_object_type', + }; + const targetCol = filterColForKey[singleActiveDim.key]; + const value = String(singleActiveDim.values[0]).replace(/'/g, "''"); + const whereParts = filterCols.map(c => + c === targetCol ? `${c} = '${value}'` : `${c} IS NULL` + ); + const sql = ` + SELECT facet_type, facet_value, count + FROM read_parquet('${cross_filter_url}') + WHERE ${whereParts.join(' AND ')} + `; + const rows = await db.query(sql); + if (myReq !== facetCountsReqId) return; + if (rows && rows.length > 0) { + const grouped = { source: new Map(), material: new Map(), context: new Map(), object_type: new Map() }; + for (const r of rows) { + if (grouped[r.facet_type]) grouped[r.facet_type].set(r.facet_value, Number(r.count)); + } + for (const d of dims) { + if (d.key === singleActiveDim.key) { + // Selected facet keeps baseline counts (so the + // user can see what other values *would* yield). + applyFacetCounts(d.key, null); + } else { + applyFacetCounts(d.key, grouped[d.key]); + } + } + return; + } + // empty → fall through to on-the-fly + } catch (err) { + console.warn('Cross-filter cache lookup failed; falling back to on-the-fly:', err); + } + } + + // Case 3: on-the-fly — four GROUP BY queries against facets_url, each + // excluding the dim being recomputed. The selected dim's own counts + // stay at baseline (so the user can switch their selection within it). + const queries = dims.map(async (d) => { + if (d.key === (singleActiveDim?.key) || activeDims.find(a => a.key === d.key)) { + // Don't recompute the column whose own values the user is + // selecting. Show baseline so they can switch. + applyFacetCounts(d.key, null); + return; + } + const where = buildExcludeWhere(activeDims, d.key); + const sql = ` + SELECT ${d.col} AS value, COUNT(*) AS count + FROM read_parquet('${facets_url}') + WHERE ${where} AND ${d.col} IS NOT NULL + GROUP BY ${d.col} + `; + try { + const rows = await db.query(sql); + if (myReq !== facetCountsReqId) return; + const map = new Map(); + for (const r of rows) map.set(r.value, Number(r.count)); + applyFacetCounts(d.key, map); + } catch (err) { + if (myReq !== facetCountsReqId) return; + console.warn(`Cross-filter on-the-fly failed for ${d.key}:`, err); + applyFacetCounts(d.key, null); // give up on this dim, leave baseline + } + }); + await Promise.all(queries); + } + + function refreshFacetCounts() { + clearTimeout(facetCountsDebounce); + facetCountsDebounce = setTimeout(() => { + const myReq = ++facetCountsReqId; + refreshFacetCountsNow(myReq); + }, 250); + } + // --- Source filter change handler --- const resUrls = { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }; document.getElementById('sourceFilter').addEventListener('change', async () => { @@ -1219,6 +1412,7 @@ zoomWatcher = { await loadViewportSamples(); } refreshResultsTable(); + refreshFacetCounts(); }); // --- Material/Context/Specimen filter change handler --- @@ -1231,6 +1425,7 @@ zoomWatcher = { loadViewportSamples(); } refreshResultsTable(); + refreshFacetCounts(); } document.getElementById('materialFilterBody').addEventListener('change', handleFacetFilterChange); document.getElementById('contextFilterBody').addEventListener('change', handleFacetFilterChange); @@ -1266,8 +1461,10 @@ zoomWatcher = { }); } - // Initial table load + // Initial table load + initial facet counts (paints baseline counts + + // applies any dimming if a `?sources=` URL param has narrowed the source set). refreshResultsTable(); + refreshFacetCounts(); // --- Camera change handler --- let timer = null; From e4eb5a682c09f69c0fd44c952b2833fb017ef05a Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 30 Apr 2026 21:43:37 -0700 Subject: [PATCH 2/4] explorer: fix multi-active dim counts to compose cross-dim filters Per semantics (A) confirmed in the PR #155 thread: every dim's value counts should reflect cross-dim filters, not just dims without active selections. Previously refreshFacetCountsNow's on-the-fly path skipped any dim that had selections (paint baseline), which broke multi-filter compositions: with Source=OpenContext + Material=Pottery selected, Material values still showed unfiltered baseline counts instead of "count of each material under Source=OpenContext." Fix: remove the skip in the on-the-fly fallback. The existing buildExcludeWhere() already excludes the dim being recomputed from the WHERE, so every dim's GROUP BY yields the correct (A)-semantic count. Single-active cache path is unchanged: under one active dim with no other filters, recomputing that dim's values = baseline by definition, and the cache is empirically confirmed to not carry rows for the filter-dim's own facet_type. Updated the comment to explain why. Smoke test still PASS. Co-Authored-By: Claude Opus 4.7 (1M context) --- tutorials/isamples_explorer.qmd | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index f42a1b3..d83a506 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -1327,8 +1327,10 @@ zoomWatcher = { } for (const d of dims) { if (d.key === singleActiveDim.key) { - // Selected facet keeps baseline counts (so the - // user can see what other values *would* yield). + // Active dim with no OTHER filters → recomputing + // its own values under "no other dim filters" = + // baseline. Cache also doesn't carry rows for the + // filter-dim's own facet_type (verified empirically). applyFacetCounts(d.key, null); } else { applyFacetCounts(d.key, grouped[d.key]); @@ -1343,15 +1345,13 @@ zoomWatcher = { } // Case 3: on-the-fly — four GROUP BY queries against facets_url, each - // excluding the dim being recomputed. The selected dim's own counts - // stay at baseline (so the user can switch their selection within it). + // excluding the dim being recomputed. Per semantics (A) (issue #154 + // / PR #155 thread): for *every* dim D, including dims with active + // selections, count(value=V) = `WHERE V AND `. + // This makes each value's count answer "how many samples have THIS + // value under my other filters", regardless of which values within + // this dim are currently selected. const queries = dims.map(async (d) => { - if (d.key === (singleActiveDim?.key) || activeDims.find(a => a.key === d.key)) { - // Don't recompute the column whose own values the user is - // selecting. Show baseline so they can switch. - applyFacetCounts(d.key, null); - return; - } const where = buildExcludeWhere(activeDims, d.key); const sql = ` SELECT ${d.col} AS value, COUNT(*) AS count From 1bcf1ad193dc9ab04dd1b4b74346513dc302be23 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 30 Apr 2026 22:03:02 -0700 Subject: [PATCH 3/4] =?UTF-8?q?explorer:=20address=20Codex=20review=20on?= =?UTF-8?q?=20#155=20=E2=80=94=20source=20semantics=20+=20debounce=20race?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes from the Codex review of #155: 1. Source filter "0 checked" now means "all" (no filter), matching Raymond's stated intuition for facet UX (PR #155 thread) and the semantics already used by material/context/object_type. Previously sourceFilterSQL returned `AND 1=0` when zero source checkboxes were checked, while refreshFacetCountsNow treated zero-checked as "no active source filter" and restored baseline counts — meaning the UI could show non-zero facet counts while the actual result set was empty. The describeActiveFilters helper already mapped both 0-checked and 4-checked to "source not filtering," so this just unifies sourceFilterSQL with that view. 2. Debounce race fix: refreshFacetCounts() now increments facetCountsReqId synchronously when scheduling, not after the 250 ms debounce fires. Previously, an in-flight count query could resolve during the debounce window of a newer schedule and still pass its generation check (because the gen was unchanged), repainting stale counts. Bumping the gen on schedule invalidates all in-flight queries immediately. Also added an early-stale guard at the top of refreshFacetCountsNow. Smoke test still PASS. Co-Authored-By: Claude Opus 4.7 (1M context) --- tutorials/isamples_explorer.qmd | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index d83a506..463bfb0 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -305,8 +305,12 @@ function getActiveSources() { function sourceFilterSQL(col) { const active = getActiveSources(); - if (active.length === 0) return ' AND 1=0'; // nothing checked = show nothing - if (active.length === 4) return ''; // all checked = no filter + // Empty == all (no filter), matching the semantics of the other facet + // groups (material / context / object_type) and Raymond's stated + // intuition (PR #155 thread). All-4 checked is also "all" by virtue + // of the IN list covering everything; we short-circuit to skip an + // unnecessary IN clause. + if (active.length === 0 || active.length === 4) return ''; const list = active.map(s => `'${s}'`).join(','); return ` AND ${col} IN (${list})`; } @@ -1283,6 +1287,9 @@ zoomWatcher = { } async function refreshFacetCountsNow(myReq) { + // Stale check up front: if a newer schedule has happened during the + // debounce window, drop this run before doing any work. + if (myReq !== facetCountsReqId) return; const { dims, activeDims, totalActiveValues } = describeActiveFilters(); // Case 1: no filters → restore baseline. @@ -1375,9 +1382,14 @@ zoomWatcher = { } function refreshFacetCounts() { + // Bump the generation synchronously so any in-flight queries + // (whose `myReq` is now strictly less) are invalidated immediately + // — without this, a stale query could finish during the debounce + // window of the new request and pass the gen check, repainting + // stale counts (Codex review on PR #155). clearTimeout(facetCountsDebounce); + const myReq = ++facetCountsReqId; facetCountsDebounce = setTimeout(() => { - const myReq = ++facetCountsReqId; refreshFacetCountsNow(myReq); }, 250); } From a0345a6494193d876e2054bb307088c6a8333944 Mon Sep 17 00:00:00 2001 From: Raymond Yee Date: Thu, 30 Apr 2026 22:12:13 -0700 Subject: [PATCH 4/4] explorer: don't dim source legend rows when 0 checked (Codex P3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the prior fix unifying source semantics so 0 checked == 4 checked == "all sources / no filter," the .disabled styling on the legend rows was still based on the per-checkbox checked state. Result: unchecking all 4 source boxes painted every label dimmed (looked like every source was excluded) while results and counts correctly included all sources. Cosmetically inconsistent. Fix: dim a row only when the source filter is actually narrowing the result set (1–3 checked). 0 or 4 checked → no dimming, matching the "no source filter" semantics. Also reuses the resulting filterIsActive flag for the URL persistence branch a few lines down (was duplicating the `> 0 && < 4` test). Smoke test still PASS. Co-Authored-By: Claude Opus 4.7 (1M context) --- tutorials/isamples_explorer.qmd | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tutorials/isamples_explorer.qmd b/tutorials/isamples_explorer.qmd index 463bfb0..5b7c466 100644 --- a/tutorials/isamples_explorer.qmd +++ b/tutorials/isamples_explorer.qmd @@ -1397,16 +1397,21 @@ zoomWatcher = { // --- Source filter change handler --- const resUrls = { 4: h3_res4_url, 6: h3_res6_url, 8: h3_res8_url }; document.getElementById('sourceFilter').addEventListener('change', async () => { - // Toggle visual state on labels + // Toggle visual state on labels. Per the new source semantics + // (0 == all == 4), don't dim any row when the filter isn't actually + // narrowing the result set; only dim unchecked rows when 1–3 are + // checked. Without this, unchecking all 4 made every label appear + // disabled even though semantically all sources are active. + const active = getActiveSources(); + const filterIsActive = active.length > 0 && active.length < 4; document.querySelectorAll('#sourceFilter .legend-item').forEach(li => { const cb = li.querySelector('input'); - li.classList.toggle('disabled', !cb.checked); + li.classList.toggle('disabled', filterIsActive && !cb.checked); }); // Persist source filter in URL query string for bookmarkable links // (e.g. ?sources=OPENCONTEXT). Camera state lives in the hash. - const active = getActiveSources(); const params = new URLSearchParams(location.search); - if (active.length > 0 && active.length < 4) { + if (filterIsActive) { params.set('sources', active.join(',')); } else { params.delete('sources');