diff --git a/backend/config/config.js b/backend/config/config.js index 7c15ed9..0e7d92a 100644 --- a/backend/config/config.js +++ b/backend/config/config.js @@ -1,10 +1,19 @@ require("dotenv").config(); module.exports = { + // development: { + // dialect: "sqlite", + // storage: "./database.sqlite", + // logging: console.log, + // }, development: { - dialect: "sqlite", - storage: "./database.sqlite", - logging: console.log, + dialect: "postgres", + host: "localhost", + port: 5432, + database: "neurojson_dev", + username: process.env.DB_USER_LOCAL, + password: process.env.DB_PASSWORD_LOCAL, + logging: false, }, test: { dialect: "sqlite", diff --git a/backend/migrations/20260507145253-create-search-tables.js b/backend/migrations/20260507145253-create-search-tables.js new file mode 100644 index 0000000..d2c9deb --- /dev/null +++ b/backend/migrations/20260507145253-create-search-tables.js @@ -0,0 +1,130 @@ +"use strict"; + +/** @type {import('sequelize-cli').Migration} */ +module.exports = { + async up(queryInterface, Sequelize) { + // ioviews table + await queryInterface.createTable("ioviews", { + id: { + type: Sequelize.INTEGER, + autoIncrement: true, + primaryKey: true, + allowNull: false, + }, + dbname: { + type: Sequelize.STRING(30), + allowNull: true, + }, + dsname: { + type: Sequelize.STRING(30), + allowNull: true, + }, + subj: { + type: Sequelize.STRING(12), + allowNull: true, + }, + view: { + type: Sequelize.STRING(12), + allowNull: true, + }, + json: { + type: Sequelize.JSONB, + allowNull: true, + }, + search_vector: { + type: Sequelize.DataTypes.TSVECTOR, + allowNull: true, + }, + updated_at: { + type: Sequelize.DATE, + allowNull: false, + defaultValue: Sequelize.literal("CURRENT_TIMESTAMP"), + }, + }); + + // ioviews indexes + await queryInterface.addIndex("ioviews", ["view"], { + name: "idx_ioviews_view", + }); + await queryInterface.addIndex("ioviews", ["dbname"], { + name: "idx_ioviews_dbname", + }); + await queryInterface.addIndex("ioviews", ["updated_at"], { + name: "idx_ioviews_updated_at", + }); + + // GIN indexes need raw query (not supported by addIndex) + await queryInterface.sequelize.query(` + CREATE INDEX IF NOT EXISTS idx_ioviews_search + ON ioviews USING GIN(search_vector); + CREATE INDEX IF NOT EXISTS idx_ioviews_json + ON ioviews USING GIN(json); + `); + + // iolinks table + await queryInterface.createTable("iolinks", { + id: { + type: Sequelize.INTEGER, + autoIncrement: true, + primaryKey: true, + allowNull: false, + }, + dbname: { + type: Sequelize.STRING(30), + allowNull: true, + }, + dsname: { + type: Sequelize.STRING(30), + allowNull: true, + }, + subj: { + type: Sequelize.TEXT, + allowNull: true, + }, + view: { + type: Sequelize.TEXT, + allowNull: true, + }, + json: { + type: Sequelize.JSONB, + allowNull: true, + }, + }); + + // iolinks indexes + await queryInterface.addIndex("iolinks", ["view"], { + name: "idx_iolinks_view", + }); + await queryInterface.addIndex("iolinks", ["dbname"], { + name: "idx_iolinks_dbname", + }); + await queryInterface.sequelize.query(` + CREATE INDEX IF NOT EXISTS idx_iolinks_json + ON iolinks USING GIN(json); + `); + + // sync_state table + await queryInterface.createTable("sync_state", { + dbname: { + type: Sequelize.STRING(30), + primaryKey: true, + allowNull: false, + }, + last_seq: { + type: Sequelize.TEXT, + allowNull: true, + }, + synced_at: { + type: Sequelize.DATE, + allowNull: false, + defaultValue: Sequelize.literal("CURRENT_TIMESTAMP"), + }, + }); + }, + + async down(queryInterface, Sequelize) { + await queryInterface.dropTable("ioviews"); + await queryInterface.dropTable("iolinks"); + await queryInterface.dropTable("sync_state"); + }, +}; diff --git a/backend/migrations/20260508195500-add-ioviews-unique-constraint.js b/backend/migrations/20260508195500-add-ioviews-unique-constraint.js new file mode 100644 index 0000000..757397d --- /dev/null +++ b/backend/migrations/20260508195500-add-ioviews-unique-constraint.js @@ -0,0 +1,20 @@ +"use strict"; + +/** @type {import('sequelize-cli').Migration} */ +module.exports = { + async up(queryInterface, Sequelize) { + // Required by upsertIoview's ON CONFLICT (dbname, dsname, subj, view). + await queryInterface.addConstraint("ioviews", { + fields: ["dbname", "dsname", "subj", "view"], + type: "unique", + name: "ioviews_dbname_dsname_subj_view_unique", + }); + }, + + async down(queryInterface, Sequelize) { + await queryInterface.removeConstraint( + "ioviews", + "ioviews_dbname_dsname_subj_view_unique" + ); + }, +}; diff --git a/backend/migrations/20260511145900-widen-ioviews-iolinks-text-columns.js b/backend/migrations/20260511145900-widen-ioviews-iolinks-text-columns.js new file mode 100644 index 0000000..c6cbe81 --- /dev/null +++ b/backend/migrations/20260511145900-widen-ioviews-iolinks-text-columns.js @@ -0,0 +1,60 @@ +"use strict"; + +/** @type {import('sequelize-cli').Migration} */ +module.exports = { + async up(queryInterface, Sequelize) { + // VARCHAR(n) → TEXT is a metadata-only change in Postgres (no table rewrite, + // no need to drop the unique constraint or indexes). + await queryInterface.changeColumn("ioviews", "dbname", { + type: Sequelize.TEXT, + allowNull: true, + }); + await queryInterface.changeColumn("ioviews", "dsname", { + type: Sequelize.TEXT, + allowNull: true, + }); + await queryInterface.changeColumn("ioviews", "subj", { + type: Sequelize.TEXT, + allowNull: true, + }); + await queryInterface.changeColumn("iolinks", "dbname", { + type: Sequelize.TEXT, + allowNull: true, + }); + await queryInterface.changeColumn("iolinks", "dsname", { + type: Sequelize.TEXT, + allowNull: true, + }); + await queryInterface.changeColumn("sync_state", "dbname", { + type: Sequelize.TEXT, + allowNull: false, + }); + }, + + async down(queryInterface, Sequelize) { + await queryInterface.changeColumn("ioviews", "dbname", { + type: Sequelize.STRING(30), + allowNull: true, + }); + await queryInterface.changeColumn("ioviews", "dsname", { + type: Sequelize.STRING(30), + allowNull: true, + }); + await queryInterface.changeColumn("ioviews", "subj", { + type: Sequelize.STRING(12), + allowNull: true, + }); + await queryInterface.changeColumn("iolinks", "dbname", { + type: Sequelize.STRING(30), + allowNull: true, + }); + await queryInterface.changeColumn("iolinks", "dsname", { + type: Sequelize.STRING(30), + allowNull: true, + }); + await queryInterface.changeColumn("sync_state", "dbname", { + type: Sequelize.STRING(30), + allowNull: false, + }); + }, +}; diff --git a/backend/package.json b/backend/package.json index 1dde6de..fa156b3 100644 --- a/backend/package.json +++ b/backend/package.json @@ -11,7 +11,8 @@ "migrate:undo:all": "npx sequelize-cli db:migrate:undo:all", "seed": "npx sequelize-cli db:seed:all", "seed:undo": "npx sequelize-cli db:seed:undo:all", - "db:reset": "npx sequelize-cli db:migrate:undo:all && npx sequelize-cli db:migrate" + "db:reset": "npx sequelize-cli db:migrate:undo:all && npx sequelize-cli db:migrate", + "sync": "node sync/incrementalSync.js" }, "keywords": [ "express", diff --git a/backend/src/controllers/couchdb.controller.js b/backend/src/controllers/couchdb.controller.js index e913ee3..3700fc8 100644 --- a/backend/src/controllers/couchdb.controller.js +++ b/backend/src/controllers/couchdb.controller.js @@ -1,4 +1,5 @@ const axios = require("axios"); +const { sequelize } = require("../config/database"); // const COUCHDB_BASE_URL = // process.env.COUCHDB_BASE_URL || // "https://cors.redoc.ly/https://neurojson.io:7777"; @@ -38,72 +39,310 @@ const getDbStats = async (req, res) => { } }; -// cross-database search +// cross-database search — old version proxied to https://neurojson.org/io/search.cgi +// kept for reference; replaced by the Postgres-backed version below. +// const searchAllDatabases = async (req, res) => { +// try { +// const formData = req.body; +// const map = { +// keyword: "keyword", +// age_min: "agemin", +// age_max: "agemax", +// task_min: "taskmin", +// task_max: "taskmax", +// run_min: "runmin", +// run_max: "runmax", +// sess_min: "sessmin", +// sess_max: "sessmax", +// modality: "modality", +// run_name: "run", +// type_name: "type", +// session_name: "session", +// task_name: "task", +// limit: "limit", +// skip: "skip", +// count: "count", +// unique: "unique", +// gender: "gender", +// database: "dbname", +// dataset: "dsname", +// subject: "subname", +// }; +// +// const params = new URLSearchParams(); +// params.append("_get", "dbname, dsname, json"); +// +// Object.keys(formData).forEach((key) => { +// let val = formData[key]; +// if (val === "" || val === "any" || val === undefined || val === null) { +// return; +// } +// +// const queryKey = map[key]; +// if (!queryKey) return; +// +// if (key.startsWith("age")) { +// params.append(queryKey, String(Math.floor(val * 100)).padStart(5, "0")); +// } else if (key === "gender") { +// params.append(queryKey, val[0]); +// } else if (key === "modality") { +// params.append(queryKey, val.replace(/.*\(/, "").replace(/\).*/, "")); +// } else { +// params.append(queryKey, val.toString()); +// } +// }); +// +// const queryString = `?${params.toString()}`; +// const response = await axios.get( +// `https://cors.redoc.ly/https://neurojson.org/io/search.cgi${queryString}`, +// { +// headers: { +// Origin: "https://neurojson.io", +// "X-Requested-With": "XMLHttpRequest", +// }, +// } +// ); +// res.status(200).json(response.data); +// } catch (error) { +// console.error("Error searching all databases:", error.message); +// res.status(error.response?.status || 500).json({ +// message: "Error searching databases", +// error: error.message, +// }); +// } +// }; + +// helpers for the Postgres-backed search +function isFilter(v) { + return v !== "" && v !== "any" && v !== undefined && v !== null; +} +function pad4(n) { + return String(n).padStart(4, "0"); +} +function pad5(n) { + return String(n).padStart(5, "0"); +} + +// cross-database search — Postgres-backed (queries ioviews) const searchAllDatabases = async (req, res) => { try { - const formData = req.body; - const map = { - keyword: "keyword", - age_min: "agemin", - age_max: "agemax", - task_min: "taskmin", - task_max: "taskmax", - run_min: "runmin", - run_max: "runmax", - sess_min: "sessmin", - sess_max: "sessmax", - modality: "modality", - run_name: "run", - type_name: "type", - session_name: "session", - task_name: "task", - limit: "limit", - skip: "skip", - count: "count", - unique: "unique", - gender: "gender", - database: "dbname", - dataset: "dsname", - subject: "subname", - }; + const f = req.body || {}; + const where = []; + const repl = {}; - const params = new URLSearchParams(); - params.append("_get", "dbname, dsname, json"); + // Pick which view to search. + // Subject-level filters → subjects view; otherwise dbinfo. + const subjectFilterKeys = [ + "age_min", "age_max", "gender", + "task_min", "task_max", "task_name", + "run_min", "run_max", "run_name", + "sess_min", "sess_max", "session_name", + "type_name", "modality", "subject", + ]; + const isSubjectSearch = subjectFilterKeys.some((k) => isFilter(f[k])); + where.push(`view = :view`); + repl.view = isSubjectSearch ? "subjects" : "dbinfo"; - Object.keys(formData).forEach((key) => { - let val = formData[key]; - if (val === "" || val === "any" || val === undefined || val === null) { - return; - } + // Range filters compare against zero-padded key components. + // json->'key' = [age, gender, sess, mod, task, run, subjId] + if (isFilter(f.age_min)) { + where.push(`(json->'key'->>0) >= :ageMin`); + repl.ageMin = pad5(Math.floor(Number(f.age_min) * 100)); + } + if (isFilter(f.age_max)) { + where.push(`(json->'key'->>0) <= :ageMax`); + repl.ageMax = pad5(Math.floor(Number(f.age_max) * 100)); + } + if (isFilter(f.sess_min)) { + where.push(`(json->'key'->>2) >= :sessMin`); + repl.sessMin = pad4(f.sess_min); + } + if (isFilter(f.sess_max)) { + where.push(`(json->'key'->>2) <= :sessMax`); + repl.sessMax = pad4(f.sess_max); + } + if (isFilter(f.task_min)) { + where.push(`(json->'key'->>4) >= :taskMin`); + repl.taskMin = pad4(f.task_min); + } + if (isFilter(f.task_max)) { + where.push(`(json->'key'->>4) <= :taskMax`); + repl.taskMax = pad4(f.task_max); + } + if (isFilter(f.run_min)) { + where.push(`(json->'key'->>5) >= :runMin`); + repl.runMin = pad4(f.run_min); + } + if (isFilter(f.run_max)) { + where.push(`(json->'key'->>5) <= :runMax`); + repl.runMax = pad4(f.run_max); + } + if (isFilter(f.gender)) { + // stored as one upper-case char left-padded to 4 chars + where.push(`(json->'key'->>1) LIKE :gender`); + repl.gender = `%${String(f.gender)[0].toUpperCase()}`; + } - const queryKey = map[key]; - if (!queryKey) return; + // Name filters — jsonb ? checks if a string is an element of the array. + if (isFilter(f.task_name)) { + where.push(`json->'value'->'tasks' ? :taskName`); + repl.taskName = String(f.task_name); + } + if (isFilter(f.run_name)) { + where.push(`json->'value'->'runs' ? :runName`); + repl.runName = String(f.run_name); + } + if (isFilter(f.session_name)) { + where.push(`json->'value'->'sessions' ? :sessName`); + repl.sessName = String(f.session_name); + } + if (isFilter(f.type_name)) { + where.push(`json->'value'->'types' ? :typeName`); + repl.typeName = String(f.type_name); + } + if (isFilter(f.modality)) { + // form sometimes wraps as "fNIRS (nirs)" — pull text inside parens + const mod = String(f.modality).replace(/.*\(/, "").replace(/\).*/, ""); + where.push(`json->'value'->'modalities' ? :modality`); + repl.modality = mod; + } - if (key.startsWith("age")) { - params.append(queryKey, String(Math.floor(val * 100)).padStart(5, "0")); - } else if (key === "gender") { - params.append(queryKey, val[0]); - } else if (key === "modality") { - params.append(queryKey, val.replace(/.*\(/, "").replace(/\).*/, "")); + // Dataset-level modality filter (multi-select + AND/OR). + // Queries json->'modality' on dbinfo rows, not subjects rows. + if (Array.isArray(f.modalities) && f.modalities.length > 0) { + const op = f.modality_mode === "and" ? " AND " : " OR "; + const parts = f.modalities.map((m, i) => { + repl[`dmod${i}`] = String(m); + return isSubjectSearch + ? `dsi.json->'modality' ? :dmod${i}` + : `json->'modality' ? :dmod${i}`; + }); + const condition = `(${parts.join(op)})`; + if (isSubjectSearch) { + where.push(`EXISTS ( + SELECT 1 FROM ioviews dsi + WHERE dsi.dbname = ioviews.dbname + AND dsi.dsname = ioviews.dsname + AND dsi.view = 'dbinfo' + AND ${condition} + )`); } else { - params.append(queryKey, val.toString()); + where.push(condition); } + } + + // db / ds / subj filters + if (isFilter(f.database)) { + where.push(`dbname = :dbname`); + repl.dbname = String(f.database); + } + if (isFilter(f.dataset)) { + where.push(`dsname = :dsname`); + repl.dsname = String(f.dataset); + } + if (isFilter(f.subject)) { + where.push(`subj = :subj`); + repl.subj = String(f.subject); + } + + // Keyword search — match anywhere relevant. + // plainto_tsquery treats input as plain words AND'd together; ignores + // operator chars like "-" and "OR" so dataset names with hyphens + // (e.g. "ABIDE - CMU_a") don't get parsed as NOT clauses. + // ILIKE on dbname/dsname adds substring matching so "fnirs" finds + // "bfnirs", "openfnirs", and any dataset id containing it. + // ILIKE on json->>'name' covers the human-readable name from + // dataset_description.json (e.g. "ABIDE - CMU_a"), which is where the + // user-visible dataset titles live — dsname column often stores just + // an opaque id like "CMU_a" without the prefix. + // ILIKE pattern normalizes whitespace/hyphens to % wildcards so + // "ABIDE - CMU_a" matches stored names regardless of separator style. + // The whole group is parenthesised so it ANDs cleanly with other filters. + if (isFilter(f.keyword)) { + where.push(`( + search_vector @@ plainto_tsquery('english', :keyword) + OR dbname ILIKE :keywordLike + OR dsname ILIKE :keywordLike + OR (json->>'name') ILIKE :keywordLike + )`); + repl.keyword = String(f.keyword); + repl.keywordLike = `%${String(f.keyword).replace(/[\s-]+/g, "%")}%`; + } + + // File-type filter — array of extensions like [".jdb", ".snirf"]. + // Dataset-level: include rows whose (dbname, dsname) has at least one + // iolinks file with a matching view (extension). Per-subject filtering + // isn't possible here because iolinks.subj stores file size, not subj id. + // Use IN (:array) — Sequelize replacements expand arrays as 'a','b','c', + // which fits IN(...) but NOT ANY(...). + if (Array.isArray(f.file_type) && f.file_type.length > 0) { + where.push(`EXISTS ( + SELECT 1 FROM iolinks l + WHERE l.dbname = ioviews.dbname + AND l.dsname = ioviews.dsname + AND l.view IN (:fileTypes) + )`); + repl.fileTypes = f.file_type.map((t) => String(t)); + } + + const limit = Math.min(parseInt(f.limit) || 100, 1000); + const offset = parseInt(f.skip) || 0; + repl.limit = limit; + repl.offset = offset; + + // When file_type filter is active, also return a sample of the actual + // matching iolinks rows (filename, url, path, suffix) per dataset, plus + // a total count. Frontend shows up to 10 as clickable filenames and a + // "Download manifest" button for the full list via a separate endpoint. + const matchingFilesActive = + Array.isArray(f.file_type) && f.file_type.length > 0; + const matchingFilesColumn = matchingFilesActive + ? `, + COALESCE(( + SELECT jsonb_agg(t.json) + FROM ( + SELECT l.json + FROM iolinks l + WHERE l.dbname = ioviews.dbname + AND l.dsname = ioviews.dsname + AND l.view IN (:fileTypes) + ORDER BY l.id + LIMIT 10 + ) t + ), '[]'::jsonb)::text AS matching_files, + (SELECT COUNT(*) FROM iolinks l + WHERE l.dbname = ioviews.dbname + AND l.dsname = ioviews.dsname + AND l.view IN (:fileTypes))::int AS matching_files_total` + : ""; + + // dbinfo was stored flat ({name, subj, ...}); subjects was stored wrapped + // ({key, value}). Frontend expects parsed.value.subj for datasets, so we + // wrap dbinfo on the way out. + const sql = ` + SELECT + dbname, + dsname, + subj, + CASE + WHEN view = 'dbinfo' THEN jsonb_build_object('value', json)::text + ELSE json::text + END AS json${matchingFilesColumn} + FROM ioviews + WHERE ${where.join(" AND ")} + ORDER BY dbname, dsname, subj + LIMIT :limit OFFSET :offset + `; + + const rows = await sequelize.query(sql, { + replacements: repl, + type: sequelize.QueryTypes.SELECT, }); - const queryString = `?${params.toString()}`; - const response = await axios.get( - `https://cors.redoc.ly/https://neurojson.org/io/search.cgi${queryString}`, - { - headers: { - Origin: "https://neurojson.io", - "X-Requested-With": "XMLHttpRequest", - }, - } - ); - res.status(200).json(response.data); + res.status(200).json(rows); } catch (error) { console.error("Error searching all databases:", error.message); - res.status(error.response?.status || 500).json({ + res.status(500).json({ message: "Error searching databases", error: error.message, }); @@ -237,6 +476,142 @@ const getDatasetMeta = async (req, res) => { // } +// Downloadable list of every matching iolinks URL for a dataset. +// Three formats via ?format=: +// - txt (default) → plain URL list (use with `wget -i`) +// - sh → bash script with curl commands (Mac/Linux) +// - bat → Windows batch script with curl commands +// All three avoid server-side zipping — the user's machine pulls files +// directly from neurojson.org/io, so this Express server stays light. +const getDatasetFilesManifest = async (req, res) => { + try { + const { dbName, dsName } = req.params; + const rawExt = req.query.ext; + const format = String(req.query.format || "txt").toLowerCase(); + const exts = Array.isArray(rawExt) + ? rawExt + : typeof rawExt === "string" && rawExt.length > 0 + ? rawExt.split(",") + : []; + + if (exts.length === 0) { + res.status(400).send("ext query parameter required (e.g. ?ext=.jdb)"); + return; + } + + const rows = await sequelize.query( + `SELECT json->'value'->>'url' AS url, + json->'value'->>'file' AS file + FROM iolinks + WHERE dbname = :dbname + AND dsname = :dsname + AND view IN (:exts) + ORDER BY id`, + { + replacements: { dbname: dbName, dsname: dsName, exts }, + type: sequelize.QueryTypes.SELECT, + } + ); + + const files = rows.filter((r) => r.url); + const urls = files.map((r) => r.url); + const baseName = `${dbName}_${dsName}_${exts.join("_")}`; + const extLabel = exts.join(", "); + + // Strip any path separators or quote chars from the parsed filename + // before using it in shell commands — file names come from iolinks + // and are usually content hashes, but defensive belt-and-suspenders. + const safeName = (s) => + (s || "").replace(/["\\\/\r\n]/g, "").trim(); + + let body; + let contentType; + let filename; + + if (format === "sh") { + // Bash script — curl is preinstalled on macOS and most Linux distros. + // -L follows redirects, -C - resumes interrupted downloads, -o saves + // with our parsed filename (the URL is a CGI query — using -O would + // save files as literal `stat.cgi?...`). + body = + `#!/bin/bash\n` + + `# Downloads ${extLabel} files from ${dbName}/${dsName}\n` + + `# Usage: bash ${baseName}_download.sh\n` + + `set -e\n` + + `mkdir -p "neurojson_downloads"\n` + + `cd "neurojson_downloads" || exit 1\n` + + files + .map((r) => { + const fn = safeName(r.file); + return fn + ? `curl -L -C - -o "${fn}" "${r.url}"` + : `curl -L -C - -O "${r.url}"`; + }) + .join("\n") + + `\necho "Done. Files saved to $(pwd)"\n`; + contentType = "application/x-sh; charset=utf-8"; + filename = `${baseName}_download.sh`; + } else if (format === "bat") { + // Windows batch — curl ships with Windows 10+. Uses CRLF line endings + // for proper rendering in CMD. /d on cd handles cross-drive paths. + body = + `@echo off\r\n` + + `REM Downloads ${extLabel} files from ${dbName}/${dsName}\r\n` + + `REM Usage: double-click or run ${baseName}_download.bat\r\n` + + `if not exist "neurojson_downloads" mkdir "neurojson_downloads"\r\n` + + `cd /d "neurojson_downloads"\r\n` + + files + .map((r) => { + const fn = safeName(r.file); + return fn + ? `curl -L -C - -o "${fn}" "${r.url}"` + : `curl -L -C - -O "${r.url}"`; + }) + .join("\r\n") + + `\r\necho Done. Files saved to %cd%\r\n` + + `pause\r\n`; + contentType = "text/plain; charset=utf-8"; + filename = `${baseName}_download.bat`; + } else { + // Default: plain URL list, one per line (advanced users with wget). + body = urls.join("\n") + "\n"; + contentType = "text/plain; charset=utf-8"; + filename = `${baseName}_manifest.txt`; + } + + res.setHeader("Content-Type", contentType); + res.setHeader( + "Content-Disposition", + `attachment; filename="${filename}"` + ); + res.send(body); + } catch (error) { + console.error("Error generating manifest:", error.message); + res.status(500).send(`Error generating manifest: ${error.message}`); + } +}; + +// distinct file extensions present in iolinks across all synced DBs. +// Drives the multi-select "File types" filter on the search page. +const getFileTypes = async (req, res) => { + try { + const rows = await sequelize.query( + `SELECT DISTINCT view AS type + FROM iolinks + WHERE view IS NOT NULL AND view <> '' + ORDER BY view`, + { type: sequelize.QueryTypes.SELECT } + ); + res.status(200).json(rows.map((r) => r.type)); + } catch (error) { + console.error("Error fetching file types:", error.message); + res.status(500).json({ + message: "Error fetching file types", + error: error.message, + }); + } +}; + module.exports = { getDbList, getDbStats, @@ -245,4 +620,6 @@ module.exports = { searchAllDatabases, getDatasetDetail, getDatasetMeta, + getFileTypes, + getDatasetFilesManifest, }; diff --git a/backend/src/routes/dbs.routes.js b/backend/src/routes/dbs.routes.js index c8a57fa..45979ac 100644 --- a/backend/src/routes/dbs.routes.js +++ b/backend/src/routes/dbs.routes.js @@ -6,6 +6,8 @@ const { getDbInfo, getDbDatasets, searchAllDatabases, + getFileTypes, + getDatasetFilesManifest, // searchDatabase, } = require("../controllers/couchdb.controller"); @@ -15,9 +17,18 @@ const router = express.Router(); router.get("/", getDbList); router.get("/stats", getDbStats); +// distinct file extensions across all iolinks rows (drives the file-type +// filter on the search page). Must come BEFORE the /:dbName route, otherwise +// Express treats "file-types" as a dbName. +router.get("/file-types", getFileTypes); + // cross-database search router.post("/search", searchAllDatabases); +// downloadable manifest (plain text) of all iolinks URLs for a dataset +// filtered by extension(s). e.g. /dbs/bfnirs/Motion-Yucel2014-I/files/manifest?ext=.jdb +router.get("/:dbName/:dsName/files/manifest", getDatasetFilesManifest); + // Specific database routes router.get("/:dbName", getDbInfo); router.get("/:dbName/datasets", getDbDatasets); diff --git a/backend/sync/incrementalSync.js b/backend/sync/incrementalSync.js new file mode 100644 index 0000000..766f1a5 --- /dev/null +++ b/backend/sync/incrementalSync.js @@ -0,0 +1,605 @@ +"use strict"; + +require("dotenv").config(); +const axios = require("axios"); +const { sequelize } = require("../src/config/database"); + +const COUCHDB_URL = process.env.COUCHDB_URL || "https://neurojson.io:7777"; +const CONCURRENCY = 5; + +// fetch database list dynamically from registry +// registry doc shape: { database: [{ id, name, ... }, ...] } +async function getDatabases() { + const response = await axios.get(`${COUCHDB_URL}/sys/registry`); + const entries = response.data?.database || []; + const databases = entries.map((db) => db.id).filter(Boolean); + console.log(`Found ${databases.length} databases in registry`); + return databases; +} + +// === Local ports of CouchDB _design/qq map functions === +// 1:1 ports of dbinfo / subjects / links views. If upstream views change, +// these drift silently. + +function transformDbinfo(doc) { + const txt = doc["README"] || doc["README.md"] || doc["README.rst"] || ""; + const rawtext = JSON.stringify(doc); + const datainfo = doc["dataset_description.json"] || { Name: doc._id }; + const subjlist = []; + const modalitylist = []; + + for (const item of Object.keys(doc)) { + if (item.indexOf("ub-") !== -1) { + subjlist.push(item); + for (const modal of Object.keys(doc[item] || {})) { + if (modal.indexOf("ses") === 0) { + for (const m of Object.keys(doc[item][modal] || {})) { + if (m.indexOf(".") === -1 && modalitylist.indexOf(m) === -1) { + modalitylist.push(m); + } + } + } else if ( + modal.indexOf(".") === -1 && + modalitylist.indexOf(modal) === -1 + ) { + modalitylist.push(modal); + } + } + } + } + + if (subjlist.length === 0) subjlist.push("nonbids"); + + if (modalitylist.length === 0) { + if (rawtext.indexOf('"MeshNode"') !== -1) modalitylist.push("JMesh"); + if (rawtext.indexOf('"NIFTIData"') !== -1) modalitylist.push("JNIFTI"); + if (rawtext.indexOf('"SNIRFData"') !== -1) modalitylist.push("JSNIRF"); + if (rawtext.indexOf('"_ArrayType_"') !== -1) modalitylist.push("JData"); + } + + return { + name: datainfo.Name, + length: rawtext.length, + readme: String(txt).substr(0, 256), + info: datainfo, + subj: subjlist, + modality: modalitylist, + }; +} + +function transformSubjects(doc) { + const results = []; + const skipIds = ["sidecards", "derivatives", "sourcedata", "code"]; + if (skipIds.indexOf(doc._id) !== -1) return results; + + for (const subj of Object.keys(doc)) { + if (!/^[sS]ub-/.test(subj)) continue; + + const sessionlist = []; + const modalitylist = []; + const tasklist = []; + const runlist = []; + const filetype = []; + let age = -0.01; + let gender = "N"; + + const p = doc["participants.tsv"]; + if (p && Array.isArray(p.participant_id)) { + let idx = -1; + for (let i = 0; i < p.participant_id.length; i++) { + if (subj.indexOf(String(p.participant_id[i])) > -1) { + idx = i; + break; + } + } + + if (idx >= 0) { + for (const agekey of ["age", "age_scan", "age_at_scan"]) { + if (age >= 0) break; + if (p[agekey]) { + age = p[agekey][idx]; + break; + } else if (p[agekey.toUpperCase()]) { + age = p[agekey.toUpperCase()][idx]; + break; + } else { + const cap = agekey.charAt(0).toUpperCase() + agekey.slice(1); + if (p[cap]) { + age = p[cap]; // matches upstream view (drops [idx] here) + break; + } + } + } + if (age < 0) { + for (const pfield of Object.keys(p)) { + if (pfield.toLowerCase().indexOf("age") >= 0) { + age = p[pfield][idx]; + } + } + } + for (const sexkey of ["sex", "gender"]) { + if (gender !== "N") break; + if (p[sexkey]) { + gender = p[sexkey][idx]; + break; + } else if (p[sexkey.toUpperCase()]) { + gender = p[sexkey.toUpperCase()][idx]; + break; + } else { + const cap = sexkey.charAt(0).toUpperCase() + sexkey.slice(1); + if (p[cap]) { + gender = p[cap]; // matches upstream view (drops [idx] here) + break; + } + } + } + if (gender === "N") { + for (const pfield of Object.keys(p)) { + if (pfield.toLowerCase().indexOf("sex") >= 0) { + gender = p[pfield][idx]; + } + } + } + if (gender === "N") { + for (const pfield of Object.keys(p)) { + if (pfield.toLowerCase().indexOf("gender") >= 0) { + gender = p[pfield][idx]; + } + } + } + } + } + + const subjDoc = doc[subj] || {}; + const parseFiles = (container) => { + for (const filename of Object.keys(container || {})) { + for (const task of filename.split("_")) { + if (task.indexOf("run-") === 0) { + if (runlist.indexOf(task.substring(4)) === -1) { + runlist.push(task.substring(4)); + } + } else if (task.indexOf("task-") === 0) { + if (tasklist.indexOf(task.substring(5)) === -1) { + tasklist.push(task.substring(5)); + } + } else if (task.indexOf(".") > 0) { + const tmp = task.substring(0, task.indexOf(".")); + if (filetype.indexOf(tmp) === -1) filetype.push(tmp); + } + } + } + }; + + for (const modal of Object.keys(subjDoc)) { + if (modal.indexOf("ses-") === 0) { + if (sessionlist.indexOf(modal.substring(4)) === -1) { + sessionlist.push(modal.substring(4)); + } + for (const modname of Object.keys(subjDoc[modal] || {})) { + if ( + modname.indexOf(".") === -1 && + modalitylist.indexOf(modname) === -1 + ) { + modalitylist.push(modname); + } + parseFiles(subjDoc[modal][modname]); + } + } else if ( + modal.indexOf(".") === -1 && + modalitylist.indexOf(modal) === -1 + ) { + modalitylist.push(modal); + parseFiles(subjDoc[modal]); + } + } + + if (typeof gender === "string") { + gender = gender.substring(0, 1).toUpperCase(); + } else { + gender = gender + ""; + } + if (typeof age === "string" && isNaN(+age)) age = -0.001; + if (typeof age === "string") age = +age; + if (age < 0) age = -0.01; + age = Math.floor(age * 100); + + results.push({ + id: doc._id, + key: [ + ("0000" + age).slice(-5), + ("000" + gender).slice(-4), + ("000" + sessionlist.length).slice(-4), + ("000" + modalitylist.length).slice(-4), + ("000" + tasklist.length).slice(-4), + ("000" + runlist.length).slice(-4), + subj.substring(4), + ], + value: { + sessions: sessionlist, + modalities: modalitylist, + tasks: tasklist, + runs: runlist, + types: filetype, + }, + }); + } + + return results; +} + +function transformLinks(doc) { + const results = []; + const filenameRe = /file=([^\/]*\/)*([^&\/\.]+)(\.[^.&%:]+(\.gz)*)([&:].*)*$/; + const filesizeRe = /size=(\d+)/; + const jsonpathRe = /:(\$[^&]+)/; + const urlhash = {}; + + function traverse(obj, level, rootpath) { + if (level > 10) return; + if (obj === null || typeof obj !== "object") return; + + for (const subkey of Object.keys(obj)) { + const v = obj[subkey]; + if ( + subkey === "_DataLink_" && + typeof v === "string" && + v.indexOf("http") !== -1 + ) { + const url = v; + const uniqurl = url.split(":$")[0]; + if (!Object.prototype.hasOwnProperty.call(urlhash, uniqurl)) { + const fname = url.match(filenameRe); + const fsize = url.match(filesizeRe); + let jpath = url.match(jsonpathRe); + if (jpath !== null && jpath.length) jpath = jpath[1]; + urlhash[uniqurl] = 1; + if (fname && fsize) { + results.push({ + id: doc._id, + key: [fname[3], parseInt(fsize[1], 10)], + value: { + path: rootpath, + url: uniqurl, + file: fname[2] + fname[3], + suffix: fname[3], + ref: jpath, + }, + }); + } + } + } + if (typeof v === "object" && v !== null) { + traverse(v, level + 1, rootpath + "." + subkey); + } + } + } + + traverse(doc, 1, "$"); + return results; +} + +// === DB helpers (each accepts an optional transaction) === + +async function getLastSeq(dbname) { + try { + const result = await sequelize.query( + "SELECT last_seq FROM sync_state WHERE dbname = :dbname", + { replacements: { dbname }, type: sequelize.QueryTypes.SELECT } + ); + return result[0]?.last_seq || "0"; + } catch (err) { + console.error(`Error getting last_seq for ${dbname}:`, err.message); + return "0"; + } +} + +async function saveLastSeq(dbname, seq) { + await sequelize.query( + `INSERT INTO sync_state (dbname, last_seq, synced_at) + VALUES (:dbname, :seq, NOW()) + ON CONFLICT (dbname) DO UPDATE + SET last_seq = :seq, synced_at = NOW()`, + { replacements: { dbname, seq: String(seq) } } + ); +} + +// Postgres jsonb rejects the null-byte escape with "unsupported Unicode +// escape sequence", so strip it from the serialized JSON before insert. +// Seen in openneuro README/TSV fields containing stray null bytes. +function safeStringify(obj) { + return JSON.stringify(obj).replace(/\\u0000/g, ""); +} + +// A valid file type is a dot-prefixed extension with no slashes and +// a reasonable length. Some CouchDB links view rows (e.g. openneuro) +// emit paths like ".0/libraries/FID-A/..." where the version number +// gets parsed as a fake extension — reject those. +function isValidFileType(ext) { + return ( + typeof ext === "string" && + ext.startsWith(".") && + !ext.includes("/") && + ext.length <= 20 + ); +} + +async function upsertIoview(dbname, dsname, subj, view, json, transaction) { + const payload = safeStringify(json); + await sequelize.query( + `INSERT INTO ioviews (dbname, dsname, subj, view, json, search_vector, updated_at) + VALUES (:dbname, :dsname, :subj, :view, :json, to_tsvector('english', :text), NOW()) + ON CONFLICT (dbname, dsname, subj, view) DO UPDATE + SET json = :json, + search_vector = to_tsvector('english', :text), + updated_at = NOW()`, + { + replacements: { + dbname, + dsname, + subj: String(subj), + view, + json: payload, + text: payload, + }, + transaction, + } + ); +} + +async function insertIolink(dbname, dsname, subj, view, json, transaction) { + await sequelize.query( + `INSERT INTO iolinks (dbname, dsname, subj, view, json) + VALUES (:dbname, :dsname, :subj, :view, :json)`, + { + replacements: { + dbname, + dsname, + subj: String(subj), + view, + json: safeStringify(json), + }, + transaction, + } + ); +} + +async function deleteDataset(dbname, dsname, transaction) { + await sequelize.query( + "DELETE FROM ioviews WHERE dbname = :dbname AND dsname = :dsname", + { replacements: { dbname, dsname }, transaction } + ); + await sequelize.query( + "DELETE FROM iolinks WHERE dbname = :dbname AND dsname = :dsname", + { replacements: { dbname, dsname }, transaction } + ); +} + +// === First-time sync (fetch all three views once) === + +// Fetch a view, treating 404 as "view doesn't exist on this DB" (returns []). +// Non-BIDS DBs (e.g. brainmeshlibrary) only have the dbinfo view. +async function fetchView(dbname, viewName) { + try { + const res = await axios.get( + `${COUCHDB_URL}/${dbname}/_design/qq/_view/${viewName}` + ); + return res.data.rows || []; + } catch (err) { + if (err.response?.status === 404) { + console.log(` ${dbname}: view '${viewName}' not present, skipping`); + return []; + } + throw err; + } +} + +async function firstSync(dbname) { + console.log(` ${dbname}: first sync, fetching all views...`); + + const dbinfoRows = await fetchView(dbname, "dbinfo"); + for (const row of dbinfoRows) { + const subj = String(row.value?.subj?.length || 0); + await upsertIoview(dbname, row.id, subj, "dbinfo", row.value); + } + console.log(` ${dbname}: dbinfo synced (${dbinfoRows.length} rows)`); + + const subjectRows = await fetchView(dbname, "subjects"); + for (const row of subjectRows) { + const subj = String(row.key?.[6] || ""); + await upsertIoview(dbname, row.id, subj, "subjects", { + key: row.key, + value: row.value, + }); + } + console.log(` ${dbname}: subjects synced (${subjectRows.length} rows)`); + + const linkRows = await fetchView(dbname, "links"); + let linkCount = 0; + for (const row of linkRows) { + const fileType = row.key?.[0]; + if (!isValidFileType(fileType)) continue; + const subjId = String(row.key?.[1] || ""); + await insertIolink(dbname, row.id, subjId, fileType, { + key: row.key, + value: row.value, + }); + linkCount++; + } + console.log(` ${dbname}: links synced (${linkCount}/${linkRows.length} rows)`); +} + +// === Process one changed dataset (Option A: 2 HTTP requests + local transforms) === + +async function processDatasetUpdate(dbname, dsname) { + // dbinfo view supports key filtering; raw doc carries everything else. + const keyParam = encodeURIComponent(JSON.stringify(dsname)); + const [dbinfoRes, rawDocRes] = await Promise.all([ + axios.get( + `${COUCHDB_URL}/${dbname}/_design/qq/_view/dbinfo?key=${keyParam}` + ), + axios.get(`${COUCHDB_URL}/${dbname}/${encodeURIComponent(dsname)}`), + ]); + + const dbinfoRow = (dbinfoRes.data.rows || [])[0]; + if (!dbinfoRow) { + console.warn(` ${dbname}/${dsname}: no dbinfo row, skipping`); + return; + } + const dbinfoValue = dbinfoRow.value; + const doc = rawDocRes.data; + + const subjectRows = transformSubjects(doc); + const linkRows = transformLinks(doc); + + // Rule 1: wrap all writes for this dataset in one transaction. + await sequelize.transaction(async (t) => { + const subjCount = String(dbinfoValue?.subj?.length || 0); + await upsertIoview(dbname, dsname, subjCount, "dbinfo", dbinfoValue, t); + + // Rule 2: empty-subjs guard. NOT IN (NULL) silently matches nothing. + const currentSubjs = Array.isArray(dbinfoValue?.subj) + ? dbinfoValue.subj + : []; + if (currentSubjs.length > 0) { + // subjects view stores subj without "sub-"/"Sub-" prefix + // (key[6] = subj.substring(4) in upstream map). + const currentSubjIds = currentSubjs.map((s) => s.substring(4)); + await sequelize.query( + `DELETE FROM ioviews + WHERE dbname = :dbname AND dsname = :dsname AND view = 'subjects' + AND subj NOT IN (:subjs)`, + { + replacements: { dbname, dsname, subjs: currentSubjIds }, + transaction: t, + } + ); + } + + for (const row of subjectRows) { + const subj = String(row.key?.[6] || ""); + await upsertIoview( + dbname, + dsname, + subj, + "subjects", + { key: row.key, value: row.value }, + t + ); + } + + // iolinks: no usable upsert key, so delete + reinsert per dataset. + await sequelize.query( + "DELETE FROM iolinks WHERE dbname = :dbname AND dsname = :dsname", + { replacements: { dbname, dsname }, transaction: t } + ); + for (const row of linkRows) { + const fileType = row.key?.[0]; + const subjId = String(row.key?.[1] || ""); + await insertIolink( + dbname, + dsname, + subjId, + fileType, + { key: row.key, value: row.value }, + t + ); + } + }); +} + +// === Incremental sync === + +async function incrementalSync(dbname, lastSeq) { + // No include_docs=true: we fetch the raw doc per dataset so the _changes + // payload stays small and per-dataset work runs in parallel. + const { data } = await axios.get( + `${COUCHDB_URL}/${dbname}/_changes?since=${encodeURIComponent(lastSeq)}` + ); + + if (!data.results || data.results.length === 0) { + console.log(` ${dbname}: no changes since last sync`); + return data.last_seq; + } + + const changes = data.results.filter( + (c) => c.id && !c.id.startsWith("_design/") + ); + console.log( + ` ${dbname}: ${changes.length} dataset changes (raw=${data.results.length})` + ); + + // Rule 3: bounded concurrency + per-dataset try/catch. + for (let i = 0; i < changes.length; i += CONCURRENCY) { + const chunk = changes.slice(i, i + CONCURRENCY); + await Promise.all( + chunk.map(async (change) => { + try { + if (change.deleted) { + await sequelize.transaction((t) => + deleteDataset(dbname, change.id, t) + ); + console.log(` ${dbname}/${change.id}: deleted`); + } else { + await processDatasetUpdate(dbname, change.id); + } + } catch (err) { + console.error(` ${dbname}/${change.id}: failed - ${err.message}`); + } + }) + ); + } + + // Rule 4: return last_seq from THIS response. Never re-fetch update_seq + // afterward (writes during sync would be silently skipped). + return data.last_seq; +} + +// === Sync a single database === + +async function syncDatabase(dbname) { + console.log(`\nSyncing ${dbname}...`); + const lastSeq = await getLastSeq(dbname); + + try { + let nextSeq; + if (lastSeq === "0") { + // Rule 5: capture update_seq BEFORE firstSync. Writes during firstSync + // get picked up by the next incremental run. + const { data: info } = await axios.get(`${COUCHDB_URL}/${dbname}`); + const seqAtStart = String(info.update_seq); + await firstSync(dbname); + nextSeq = seqAtStart; + } else { + nextSeq = await incrementalSync(dbname, lastSeq); + } + + await saveLastSeq(dbname, String(nextSeq)); + console.log(` ${dbname}: sync complete ✓`); + } catch (err) { + console.error(` ${dbname}: sync failed - ${err.message}`); + } +} + +// === Main === + +async function runSync() { + console.log("=== Starting NeuroJSON sync ==="); + console.log(new Date().toISOString()); + console.log(`CouchDB: ${COUCHDB_URL}`); + + const databases = await getDatabases(); + console.log(`Databases: ${databases.length}`); + + for (const db of databases) { + await syncDatabase(db); + } + + await sequelize.close(); + console.log("\n=== Sync complete ==="); + console.log(new Date().toISOString()); +} + +runSync().catch((err) => { + console.error("Sync failed:", err); + process.exit(1); +}); diff --git a/src/components/SearchPage/DatasetCard.tsx b/src/components/SearchPage/DatasetCard.tsx index 42f6646..4243119 100644 --- a/src/components/SearchPage/DatasetCard.tsx +++ b/src/components/SearchPage/DatasetCard.tsx @@ -1,10 +1,37 @@ -import { Typography, Card, CardContent, Stack, Chip } from "@mui/material"; +import DownloadIcon from "@mui/icons-material/Download"; +import KeyboardArrowDownIcon from "@mui/icons-material/KeyboardArrowDown"; +import { + Typography, + Card, + CardContent, + Stack, + Chip, + Button, + Link as MuiLink, + Menu, + MenuItem, + Box, + Snackbar, + Alert, +} from "@mui/material"; +import { baseURL } from "services/instance"; import { Colors } from "design/theme"; import React from "react"; -import { useMemo } from "react"; +import { useMemo, useState } from "react"; import { Link } from "react-router-dom"; import RoutesEnum from "types/routes.enum"; +interface MatchingFile { + key?: any; + value?: { + file?: string; + url?: string; + path?: string; + suffix?: string; + ref?: string; + }; +} + interface DatasetCardProps { dbname: string; dsname: string; @@ -26,6 +53,9 @@ interface DatasetCardProps { index: number; onChipClick: (key: string, value: string) => void; keyword?: string; // for keyword highlight + matchingFiles?: MatchingFile[]; // sample of iolinks rows matching file_type + matchingFilesTotal?: number; // total count across all matches + fileTypes?: string[]; // the active file_type filter, used to build manifest URL } /** ---------- utility helpers ---------- **/ @@ -35,11 +65,21 @@ const normalize = (s: string) => ?.replace(/[\u201C\u201D\u2033]/g, '"') ?? // curly → straight ""; +// Multi-word keyword support: backend tsquery treats "head brain" as AND of +// independent tokens. Highlighting should match the same logic — split on +// whitespace and treat each word independently. +const splitKeyword = (kw?: string): string[] => { + if (!kw) return []; + return normalize(kw).trim().split(/\s+/).filter(Boolean); +}; + +const escapeRegex = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const containsKeyword = (text?: string, kw?: string) => { if (!text || !kw) return false; const t = normalize(text).toLowerCase(); - const k = normalize(kw).toLowerCase(); - return t.includes(k); + const words = splitKeyword(kw.toLowerCase()); + return words.some((w) => t.includes(w)); }; /** Find a short snippet in secondary fields if not already visible */ @@ -62,24 +102,41 @@ function findMatchSnippet( ["ReferencesAndLinks", (v) => v?.info?.ReferencesAndLinks], ]; - const k = normalize(kw).toLowerCase(); + const words = splitKeyword(kw.toLowerCase()); + if (words.length === 0) return null; for (const [label, getter] of CANDIDATE_FIELDS) { const raw = getter(v); // v = parsedJson.value if (!raw) continue; const text = normalize(String(raw)); - const i = text.toLowerCase().indexOf(k); // k is the lowercase version of keyword - if (i >= 0) { - const start = Math.max(0, i - 40); - const end = Math.min(text.length, i + k.length + 40); - const before = text.slice(start, i); - const hit = text.slice(i, i + k.length); - const after = text.slice(i + k.length, end); - const html = `${ - start > 0 ? "…" : "" - }${before}${hit}${after}${end < text.length ? "…" : ""}`; - return { label, html }; + const lower = text.toLowerCase(); + + // Find the earliest occurrence of ANY matching word — that's the snippet anchor. + let anchor = -1; + let anchorLen = 0; + for (const w of words) { + const i = lower.indexOf(w); + if (i >= 0 && (anchor < 0 || i < anchor)) { + anchor = i; + anchorLen = w.length; + } } + if (anchor < 0) continue; + + const start = Math.max(0, anchor - 40); + const end = Math.min(text.length, anchor + anchorLen + 40); + const slice = text.slice(start, end); + + // Highlight every matching word inside the snippet, not just the first. + const regex = new RegExp( + `(${words.map(escapeRegex).join("|")})`, + "gi" + ); + const highlighted = slice.replace(regex, "$1"); + const html = `${start > 0 ? "…" : ""}${highlighted}${ + end < text.length ? "…" : "" + }`; + return { label, html }; } return null; } @@ -92,10 +149,69 @@ const DatasetCard: React.FC = ({ index, onChipClick, keyword, + matchingFiles, + matchingFilesTotal, + fileTypes, }) => { const { name, readme, modality, subj, info } = parsedJson.value; const datasetLink = `${RoutesEnum.DATABASES}/${dbname}/${dsname}`; + // Build manifest URL for any of the three formats. Backend serves + // text/plain for .txt, application/x-sh for .sh, text/plain for .bat — + // each with a Content-Disposition header so the browser saves them. + const buildManifestUrl = (format: "txt" | "sh" | "bat") => { + if (!fileTypes || fileTypes.length === 0) return null; + const ext = fileTypes.map((e) => encodeURIComponent(e)).join(","); + return `${baseURL}/dbs/${encodeURIComponent( + dbname + )}/${encodeURIComponent( + dsname + )}/files/manifest?ext=${ext}&format=${format}`; + }; + + const hasManifest = Array.isArray(fileTypes) && fileTypes.length > 0; + + // Dropdown state for the download format menu. + const [downloadMenuEl, setDownloadMenuEl] = useState( + null + ); + // Post-download instruction snackbar. Stays open until user dismisses it + // (no autoHideDuration) so researchers have time to read multi-step + // instructions. + const [downloadHint, setDownloadHint] = useState< + "sh" | "bat" | "txt" | null + >(null); + const handleDownload = (format: "txt" | "sh" | "bat") => { + const url = buildManifestUrl(format); + setDownloadMenuEl(null); + if (!url) return; + // Programmatic anchor click triggers the browser's normal download flow + // without leaving the current page (window.location would navigate away). + const a = document.createElement("a"); + a.href = url; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + setDownloadHint(format); + }; + + // Extract a short "sub-XXX" tag from a BIDS path like + // "$.sub-019.ses-1.nirs.sub-019_ses-1_task-MA_run-01_nirs.snirf.SNIRFData..." + const subjectFromPath = (p?: string): string => { + if (!p) return ""; + const m = p.match(/sub-[^.]+/); + return m ? m[0] : ""; + }; + + // File size stored in key[1] of each iolinks row (bytes). Format for humans. + const formatBytes = (n?: number): string => { + if (typeof n !== "number" || !Number.isFinite(n) || n < 0) return ""; + if (n < 1024) return `${n} B`; + if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`; + if (n < 1024 * 1024 * 1024) return `${(n / 1024 / 1024).toFixed(1)} MB`; + return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`; + }; + // prepare DOI URL const rawDOI = info?.DatasetDOI?.replace(/^doi:/, ""); const doiLink = rawDOI ? `https://doi.org/${rawDOI}` : null; @@ -122,19 +238,25 @@ const DatasetCard: React.FC = ({ [parsedJson.value, keyword, visibleHasKeyword] ); - // keyword highlight functional component (only for visible fields) + // keyword highlight functional component (only for visible fields). + // Splits the keyword on whitespace and highlights each word independently + // so "head brain" highlights both words wherever they appear. const highlightKeyword = (text: string, keyword?: string) => { - if (!keyword || !text?.toLowerCase().includes(keyword.toLowerCase())) { - return text; - } - - const regex = new RegExp(`(${keyword})`, "gi"); // for case-insensitive and global + const words = splitKeyword(keyword); + if (words.length === 0 || !text) return text; + const lowerWordSet = new Set(words.map((w) => w.toLowerCase())); + const regex = new RegExp( + `(${words.map(escapeRegex).join("|")})`, + "gi" + ); + if (!regex.test(text)) return text; + // Reset lastIndex because test() advances on /g regexes; safer to use split. const parts = text.split(regex); return ( <> {parts.map((part, i) => - part.toLowerCase() === keyword.toLowerCase() ? ( + lowerWordSet.has(part.toLowerCase()) ? ( = ({ )} + + {/* Matching files section — only shown when file_type filter is active */} + {Array.isArray(matchingFiles) && matchingFiles.length > 0 && ( + + + + Matching files + {typeof matchingFilesTotal === "number" && + ` (${ + matchingFiles.length < matchingFilesTotal + ? `${matchingFiles.length} of ${matchingFilesTotal}` + : matchingFilesTotal + })`} + + {hasManifest && ( + <> + + setDownloadMenuEl(null)} + > + handleDownload("sh")}> + For Mac / Linux (.sh) + + handleDownload("bat")}> + For Windows (.bat) + + handleDownload("txt")}> + URL list (.txt, advanced) + + + + )} + + + {matchingFiles.slice(0, 10).map((f, i) => { + const v = f.value || {}; + const subjTag = subjectFromPath(v.path); + const sizeBytes = + Array.isArray(f.key) && typeof f.key[1] === "number" + ? f.key[1] + : undefined; + const sizeTag = formatBytes(sizeBytes); + const meta = [subjTag, sizeTag].filter(Boolean).join(" · "); + return ( +
  • + + {v.file || v.url} + + {meta && ( + + ({meta}) + + )} +
  • + ); + })} +
    +
    + )} + + {/* Post-download instructions. No auto-hide so users can read at their + * own pace; dismiss with the ✕ when finished. */} + setDownloadHint(null)} + anchorOrigin={{ vertical: "bottom", horizontal: "center" }} + > + setDownloadHint(null)} + sx={{ maxWidth: 520 }} + > + {downloadHint === "sh" && ( + + + Downloaded the Mac / Linux script + + + To fetch your data files: + + +
  • Open Terminal
  • +
  • Go to the folder where the script was saved
  • +
  • + Run:{" "} + + bash <script-name>.sh + +
  • +
    +
    + )} + {downloadHint === "bat" && ( + + + Downloaded the Windows script + + + Open the folder where the script was saved and{" "} + double-click the .bat file. A command window + opens and the files download next to it. + + + )} + {downloadHint === "txt" && ( + + + Downloaded the URL list + + + In Terminal (Mac/Linux) or PowerShell (Windows), run:{" "} + + wget -i <file-name>.txt + + + + )} +
    +
    ); }; diff --git a/src/components/SearchPage/SubjectCard.tsx b/src/components/SearchPage/SubjectCard.tsx index c66ebce..f805b48 100644 --- a/src/components/SearchPage/SubjectCard.tsx +++ b/src/components/SearchPage/SubjectCard.tsx @@ -16,6 +16,7 @@ interface SubjectCardProps { modalities?: string[]; tasks?: string[]; sessions?: string[]; + runs?: string[]; types?: string[]; }; }; @@ -32,7 +33,8 @@ const SubjectCard: React.FC = ({ index, onChipClick, }) => { - const { modalities, tasks, sessions, types } = parsedJson.value; + const { modalities, tasks, sessions, runs, types } = parsedJson.value; + const runCount = Array.isArray(runs) ? runs.length : 0; const subjectLink = `${RoutesEnum.DATABASES}/${dbname}/${dsname}`; const formattedSubj = /^sub-/i.test(subj) ? subj : `sub-${String(subj)}`; @@ -229,6 +231,11 @@ const SubjectCard: React.FC = ({ {sessions?.length === 0 ? 1 : sessions?.length} + + + Runs: {runCount} + + diff --git a/src/components/SearchPage/widgets/FileTypeAutocompleteWidget.tsx b/src/components/SearchPage/widgets/FileTypeAutocompleteWidget.tsx new file mode 100644 index 0000000..eb941ad --- /dev/null +++ b/src/components/SearchPage/widgets/FileTypeAutocompleteWidget.tsx @@ -0,0 +1,40 @@ +import { Autocomplete, Chip, TextField } from "@mui/material"; +import { WidgetProps } from "@rjsf/utils"; + +// Multi-select combobox for file extensions (e.g. ".jdb", ".snirf"). +// Options come from uiSchema's ui:options.fileTypes, fetched once by the +// parent SearchPage from /api/v1/dbs/file-types. +export const FileTypeAutocompleteWidget = (props: WidgetProps) => { + const { value, onChange, options, label } = props; + const fileTypes = (options.fileTypes as string[]) || []; + const current: string[] = Array.isArray(value) ? value : []; + + return ( + onChange(v as string[])} + renderTags={(items, getTagProps) => + items.map((item, index) => ( + + )) + } + renderInput={(params) => ( + + )} + /> + ); +}; diff --git a/src/components/SearchPage/widgets/TypeAutocompleteWidget.tsx b/src/components/SearchPage/widgets/TypeAutocompleteWidget.tsx new file mode 100644 index 0000000..b0573fc --- /dev/null +++ b/src/components/SearchPage/widgets/TypeAutocompleteWidget.tsx @@ -0,0 +1,27 @@ +import { Autocomplete, TextField } from "@mui/material"; +import { WidgetProps } from "@rjsf/utils"; + +// Combobox: type freely OR pick from a modality-specific suggestion list. +export const TypeAutocompleteWidget = (props: WidgetProps) => { + const { value, onChange, options, label, placeholder } = props; + const suggestions = (options.suggestions as string[]) || []; + + return ( + onChange(typeof v === "string" ? v : "")} + onInputChange={(_, v) => onChange(v || "")} + renderInput={(params) => ( + + )} + /> + ); +}; diff --git a/src/pages/SearchPage.tsx b/src/pages/SearchPage.tsx index 8e35494..df49761 100644 --- a/src/pages/SearchPage.tsx +++ b/src/pages/SearchPage.tsx @@ -13,6 +13,12 @@ import { Tooltip, IconButton, Alert, + Slider, + Stack, + TextField, + ToggleButton, + ToggleButtonGroup, + Autocomplete, } from "@mui/material"; import { useTheme } from "@mui/material/styles"; import useMediaQuery from "@mui/material/useMediaQuery"; @@ -22,6 +28,8 @@ import ClickTooltip from "components/SearchPage/ClickTooltip"; import DatabaseCard from "components/SearchPage/DatabaseCard"; import DatasetCard from "components/SearchPage/DatasetCard"; import SubjectCard from "components/SearchPage/SubjectCard"; +import { FileTypeAutocompleteWidget } from "components/SearchPage/widgets/FileTypeAutocompleteWidget"; +import { TypeAutocompleteWidget } from "components/SearchPage/widgets/TypeAutocompleteWidget"; import { Colors } from "design/theme"; import { useAppDispatch } from "hooks/useAppDispatch"; import { useAppSelector } from "hooks/useAppSelector"; @@ -29,6 +37,7 @@ import pako from "pako"; import React from "react"; import { useState, useEffect, useMemo } from "react"; import { + fetchFileTypes, fetchMetadataSearchResults, fetchRegistry, } from "redux/neurojson/neurojson.action"; @@ -45,6 +54,194 @@ type RegistryItem = { logo?: string; }; +// Module-scope so the component identity is stable across SearchPage renders. +// An inline arrow function inside customFields was getting a new identity each +// render, which made RJSF remount the slider mid-drag. +const AGE_MIN_BOUND = 0; +const AGE_MAX_BOUND = 100; + +const DATASET_MODALITIES = [ + "anat", "func", "dwi", "fmap", "perf", + "meg", "eeg", "ieeg", "beh", "pet", + "micr", "nirs", "motion", "ephys", "atlas", + "JMesh", "JNIFTI", "JSNIRF", "JData", +]; + +const DatasetModalityFilterField = (props: any) => { + const ctx = props?.registry?.formContext as + | { formData: Record; setFormData: React.Dispatch>> } + | undefined; + if (!ctx) return null; + const { formData, setFormData } = ctx; + const selected: string[] = Array.isArray(formData.modalities) ? formData.modalities : []; + const mode: string = formData.modality_mode || "or"; + + const handleChange = (_: any, next: string[]) => { + setFormData((prev) => { + const updated = { ...prev }; + if (next.length === 0) { + delete updated.modalities; + delete updated.modality_mode; + } else { + updated.modalities = next; + if (!updated.modality_mode) updated.modality_mode = "or"; + } + return updated; + }); + }; + + const handleModeChange = (_: any, val: string | null) => { + if (!val) return; + setFormData((prev) => ({ ...prev, modality_mode: val })); + }; + + return ( + + + items.map((item, index) => ( + + )) + } + renderInput={(params) => ( + + )} + /> + {selected.length > 1 && ( + + + OR + AND + + + {mode === "and" ? "must have all selected" : "must have any selected"} + + + )} + + ); +}; + +const AgeRangeSliderField = (props: any) => { + const ctx = props?.registry?.formContext as + | { + formData: Record; + setFormData: React.Dispatch>>; + } + | undefined; + if (!ctx) return null; + const { formData, setFormData } = ctx; + const lo = + typeof formData.age_min === "number" ? formData.age_min : AGE_MIN_BOUND; + const hi = + typeof formData.age_max === "number" ? formData.age_max : AGE_MAX_BOUND; + const isAny = lo === AGE_MIN_BOUND && hi === AGE_MAX_BOUND; + return ( + + + Age: {isAny ? "Any" : `${lo} – ${hi}`} + + { + const [newLo, newHi] = v as number[]; + setFormData((prev) => { + const next = { ...prev }; + // Each handle is its own filter. A handle at the bound means + // "no constraint on that side", so we leave it out of formData + // (otherwise age_min=0 silently excludes unknown-age subjects + // whose stored key is "000-1", lexicographically below "00000"). + if (newLo === AGE_MIN_BOUND) delete next.age_min; + else next.age_min = newLo; + if (newHi === AGE_MAX_BOUND) delete next.age_max; + else next.age_max = newHi; + return next; + }); + }} + valueLabelDisplay="auto" + min={AGE_MIN_BOUND} + max={AGE_MAX_BOUND} + step={1} + disableSwap + sx={{ color: Colors.purple }} + /> + + ); +}; + +// Pairs a "_min" + "_max" into a single row of two number inputs. +// Reads target field names + label from uiSchema's ui:options: +// { minKey: "sess_min", maxKey: "sess_max", label: "sessions" } +const CountRangePairField = (props: any) => { + const ctx = props?.registry?.formContext as + | { + formData: Record; + setFormData: React.Dispatch>>; + } + | undefined; + const opts = props?.uiSchema?.["ui:options"] || {}; + const minKey = opts.minKey as string; + const maxKey = opts.maxKey as string; + const label = (opts.label as string) || ""; + if (!ctx || !minKey || !maxKey) return null; + const { formData, setFormData } = ctx; + const minVal = formData[minKey] ?? ""; + const maxVal = formData[maxKey] ?? ""; + + const update = (key: string, raw: string) => { + setFormData((prev) => { + const next = { ...prev }; + if (raw === "" || raw === undefined) { + delete next[key]; + } else { + const n = Number(raw); + if (Number.isNaN(n)) delete next[key]; + else next[key] = n; + } + return next; + }); + }; + + return ( + + update(minKey, e.target.value)} + fullWidth + inputProps={{ min: 0 }} + /> + update(maxKey, e.target.value)} + fullWidth + inputProps={{ min: 0 }} + /> + + ); +}; + const matchesKeyword = (item: RegistryItem, keyword: string) => { if (!keyword) return false; const needle = keyword.toLowerCase(); @@ -71,6 +268,9 @@ const SearchPage: React.FC = () => { const registry = useAppSelector( (state: RootState) => state.neurojson.registry ); + const fileTypes = useAppSelector( + (state: RootState) => state.neurojson.fileTypes + ); const loading = useAppSelector((state: RootState) => state.neurojson.loading); const [formData, setFormData] = useState>({}); @@ -91,9 +291,10 @@ const SearchPage: React.FC = () => { const placement = upMd ? "right" : "top"; - // for database card - const keywordInput = String(formData?.keyword ?? "").trim(); - const selectedDbId = String(formData?.database ?? "").trim(); + // inputs for the "Suggested databases" memo — read from appliedFilters so + // the suggestion list refreshes only on Search click, matching the results. + const keywordInput = String(appliedFilters?.keyword ?? "").trim(); + const selectedDbId = String(appliedFilters?.database ?? "").trim(); const registryMatches: RegistryItem[] = React.useMemo(() => { if (!Array.isArray(registry)) return []; @@ -119,10 +320,12 @@ const SearchPage: React.FC = () => { ([key, value]) => key !== "skip" && key !== "limit" && + key !== "modality_mode" && value !== undefined && value !== null && value !== "" && - value !== "any" + value !== "any" && + !(Array.isArray(value) && value.length === 0) ); useEffect(() => { @@ -207,14 +410,26 @@ const SearchPage: React.FC = () => { // form UI const uiSchema = useMemo( - () => generateUiSchema(formData, showSubjectFilters, showDatasetFilters), - [formData, showSubjectFilters, showDatasetFilters] + () => + generateUiSchema( + formData, + showSubjectFilters, + showDatasetFilters, + fileTypes || [] + ), + [formData, showSubjectFilters, showDatasetFilters, fileTypes] ); + // Custom RJSF widgets — comboboxes for the Data type and File types fields. + const customWidgets = { + typeAutocomplete: TypeAutocompleteWidget, + fileTypeAutocomplete: FileTypeAutocompleteWidget, + }; + // Create the "Subject-level Filters" button as a custom field const customFields = { subjectFiltersToggle: () => ( - + + + + + + ), datasetFiltersToggle: () => ( @@ -247,6 +478,9 @@ const SearchPage: React.FC = () => { ), + ageRangeSlider: AgeRangeSliderField, + countRangePair: CountRangePairField, + datasetModalityFilter: DatasetModalityFilterField, }; // determine the results are subject-level or dataset-level @@ -266,6 +500,11 @@ const SearchPage: React.FC = () => { dispatch(fetchRegistry()); }, [dispatch]); + // get the distinct file extensions for the "File types" multi-select. + useEffect(() => { + dispatch(fetchFileTypes()); + }, [dispatch]); + // dynamically add database enum to schema const schema = useMemo(() => { const dbList = registry?.length @@ -400,6 +639,8 @@ const SearchPage: React.FC = () => { onChange={({ formData }) => setFormData(formData)} uiSchema={uiSchema} fields={customFields} + widgets={customWidgets} + formContext={{ formData, setFormData }} /> ); @@ -418,6 +659,35 @@ const SearchPage: React.FC = () => { !loading && // !hasDbMatches && (!hasDatasetMatches || backendEmpty); + + // Tailored empty-state message: when the user combined a file_type filter + // with any subject-level filter and got nothing back, it's almost certainly + // because the file extension lives in non-BIDS datasets (which have no + // subject rows in ioviews). The generic "adjust filters" message hides this. + const SUBJECT_FILTER_KEYS = [ + "age_min", + "age_max", + "gender", + "task_min", + "task_max", + "task_name", + "run_min", + "run_max", + "run_name", + "sess_min", + "sess_max", + "session_name", + "type_name", + "modality", + "subject", + ]; + const isAppliedFilter = (v: any) => + v !== "" && v !== "any" && v !== undefined && v !== null; + const showFileTypeNonBidsHint = + showNoResults && + Array.isArray(appliedFilters.file_type) && + appliedFilters.file_type.length > 0 && + SUBJECT_FILTER_KEYS.some((k) => isAppliedFilter(appliedFilters[k])); return ( { mt: 1, }} > - {activeFilters.map(([key, value]) => ( + {activeFilters.map(([key, value]) => { + let label = `${String(key)}: ${String(value)}`; + if (key === "modalities" && Array.isArray(value)) { + const mode = appliedFilters.modality_mode || "or"; + label = `modalities (${mode}): ${value.join(", ")}`; + } + return ( { } }} /> - ))} + ); + })} )} @@ -635,10 +912,9 @@ const SearchPage: React.FC = () => { }} title={ - Live preview based on your keyword or selected database. - This list updates as you type or change the dropdown. - It’s separate from the results—you’ll - see datasets/subjects after you click Search. + Databases that match your keyword or selected database + filter. This list refreshes when you click{" "} + Search, alongside the datasets/subjects below. } > @@ -668,7 +944,7 @@ const SearchPage: React.FC = () => { datasets={db.datasets} modalities={db.datatype} logo={db.logo} - keyword={formData.keyword} // for keyword highlight + keyword={appliedFilters.keyword} // highlight the searched keyword, not the live input onChipClick={handleChipClick} /> ))} @@ -833,7 +1109,14 @@ const SearchPage: React.FC = () => { dsname={item.dsname} parsedJson={parsedJson} onChipClick={handleChipClick} - keyword={formData.keyword} // for keyword highlight + keyword={appliedFilters.keyword} // highlight what was searched, not the live form + matchingFiles={ + item.matching_files + ? JSON.parse(item.matching_files) + : undefined + } + matchingFilesTotal={item.matching_files_total} + fileTypes={appliedFilters.file_type} /> ) : ( { {...item} parsedJson={parsedJson} onChipClick={handleChipClick} + age={parsedJson?.key?.[0]} /> ); } catch (e) { @@ -869,10 +1153,20 @@ const SearchPage: React.FC = () => { Search Results - - No datasets or subjects found. Please adjust the - filters and try again. - + {showFileTypeNonBidsHint ? ( + + No matching subjects found. The selected file type + may only exist in non-BIDS datasets (e.g. mesh or + atlas libraries), which have no subject-level + records. Try removing subject-level filters + (modality, age, gender, etc.) and search again. + + ) : ( + + No datasets or subjects found. Please adjust the + filters and try again. + + )} )} diff --git a/src/pages/UpdatedDatasetDetailPage.tsx b/src/pages/UpdatedDatasetDetailPage.tsx index f94a751..835a6b5 100644 --- a/src/pages/UpdatedDatasetDetailPage.tsx +++ b/src/pages/UpdatedDatasetDetailPage.tsx @@ -7,6 +7,7 @@ import ExpandLess from "@mui/icons-material/ExpandLess"; import ExpandMore from "@mui/icons-material/ExpandMore"; import HomeIcon from "@mui/icons-material/Home"; import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined"; +import KeyboardArrowDownIcon from "@mui/icons-material/KeyboardArrowDown"; import { Box, Typography, @@ -17,6 +18,9 @@ import { Collapse, Tooltip, IconButton, + Menu, + MenuItem, + Snackbar, } from "@mui/material"; import DatasetActions from "components/DatasetDetailPage/DatasetAction"; import FileTree from "components/DatasetDetailPage/FileTree/FileTree"; @@ -267,8 +271,23 @@ const UpdatedDatasetDetailPage: React.FC = () => { const [externalLinks, setExternalLinks] = useState([]); const [internalLinks, setInternalLinks] = useState([]); const [isInternalExpanded, setIsInternalExpanded] = useState(true); - const [downloadScript, setDownloadScript] = useState(""); + // Three script formats generated client-side: bash (Mac/Linux), batch + // (Windows), and a plain URL list. Same files in all three; only the + // wrapper syntax differs. + const [downloadScripts, setDownloadScripts] = useState<{ + sh: string; + bat: string; + txt: string; + }>({ sh: "", bat: "", txt: "" }); const [downloadScriptSize, setDownloadScriptSize] = useState(0); + // Dropdown state for the download format menu. + const [downloadMenuEl, setDownloadMenuEl] = useState( + null + ); + // Post-download instruction snackbar. Stays open until user dismisses. + const [downloadHint, setDownloadHint] = useState< + "sh" | "bat" | "txt" | null + >(null); const [totalFileSize, setTotalFileSize] = useState(0); const [previewIsInternal, setPreviewIsInternal] = useState(false); const [isExternalExpanded, setIsExternalExpanded] = useState(true); @@ -577,31 +596,62 @@ const UpdatedDatasetDetailPage: React.FC = () => { // }); // setJsonSize(blob.size); - // Construct download script dynamically - let script = `curl -L --create-dirs "https://neurojson.io:7777/${dbName}/${docId}" -o "${docId}.json"\n`; - - links.forEach((link) => { - const url = link.url; - const match = url.match(/file=([^&]+)/); - - const filename = match - ? (() => { - try { - return decodeURIComponent(match[1]); - } catch { - return match[1]; // fallback if decode fails - } - })() - : `file-${link.index}`; - - const outputPath = `$HOME/.neurojson/io/${dbName}/${docId}/${filename}`; - - script += `curl -L --create-dirs "${url}" -o "${outputPath}"\n`; - }); - setDownloadScript(script); - // Calculate and set script size - const scriptBlob = new Blob([script], { type: "text/plain" }); - setDownloadScriptSize(scriptBlob.size); + // Construct download scripts (three formats) dynamically — everything + // lands in a .// folder next to where the user runs the script. + // JSON metadata and data files stay together (was split between cwd + // and ~/.neurojson/io/... previously, hard to find). + const docUrl = `https://neurojson.io:7777/${dbName}/${docId}`; + type DlItem = { url: string; filename: string }; + const items: DlItem[] = [ + { url: docUrl, filename: `${docId}.json` }, + ...links.map((link) => { + const match = link.url.match(/file=([^&]+)/); + const filename = match + ? (() => { + try { + return decodeURIComponent(match[1]); + } catch { + return match[1]; + } + })() + : `file-${link.index}`; + return { url: link.url, filename }; + }), + ]; + + // Bash script (Mac/Linux) + const sh = + `#!/bin/bash\n` + + `# Downloads ${docId} from ${dbName}\n` + + `# Usage: bash ${docId}.sh\n` + + `set -e\n` + + `mkdir -p "${docId}"\n` + + `cd "${docId}" || exit 1\n` + + items + .map((it) => `curl -L -C - -o "${it.filename}" "${it.url}"`) + .join("\n") + + `\necho "Done. Files saved to $(pwd)"\n`; + + // Batch script (Windows) — curl ships with Windows 10+. CRLF endings. + const bat = + `@echo off\r\n` + + `REM Downloads ${docId} from ${dbName}\r\n` + + `REM Usage: double-click or run ${docId}.bat\r\n` + + `if not exist "${docId}" mkdir "${docId}"\r\n` + + `cd /d "${docId}"\r\n` + + items + .map((it) => `curl -L -C - -o "${it.filename}" "${it.url}"`) + .join("\r\n") + + `\r\necho Done. Files saved to %cd%\r\n` + + `pause\r\n`; + + // Plain URL list — for advanced users with wget. + const txt = items.map((it) => it.url).join("\n") + "\n"; + + setDownloadScripts({ sh, bat, txt }); + // Size shown on the button is the .sh script size (representative). + const shBlob = new Blob([sh], { type: "text/plain" }); + setDownloadScriptSize(shBlob.size); } }, [datasetDocument, docId]); @@ -627,14 +677,25 @@ const UpdatedDatasetDetailPage: React.FC = () => { document.body.removeChild(link); }; - const handleDownloadScript = () => { - const blob = new Blob([downloadScript], { type: "text/plain" }); + // Trigger download of the selected script format. Programmatic anchor + // click triggers the browser's normal download flow without navigating. + const handleDownloadScript = (format: "sh" | "bat" | "txt") => { + const content = downloadScripts[format]; + if (!content) return; + const mime = + format === "sh" ? "application/x-sh" : "text/plain"; + const filename = + format === "txt" ? `${docId}_manifest.txt` : `${docId}.${format}`; + const blob = new Blob([content], { type: `${mime}; charset=utf-8` }); const link = document.createElement("a"); link.href = URL.createObjectURL(blob); - link.download = `${docId}.sh`; + link.download = filename; document.body.appendChild(link); link.click(); document.body.removeChild(link); + URL.revokeObjectURL(link.href); + setDownloadMenuEl(null); + setDownloadHint(format); }; const handlePreview = ( @@ -1118,20 +1179,35 @@ const UpdatedDatasetDetailPage: React.FC = () => { + setDownloadMenuEl(null)} + > + handleDownloadScript("sh")}> + For Mac / Linux (.sh) + + handleDownloadScript("bat")}> + For Windows (.bat) + + handleDownloadScript("txt")}> + URL list (.txt, advanced) + + @@ -1658,6 +1734,82 @@ const UpdatedDatasetDetailPage: React.FC = () => { key={`${previewIndex}-${previewOpen}`} // react will destroy the existing component and create a new one for mount /> + + {/* Post-download instructions. No auto-hide so users can read at + * their own pace; dismiss with the ✕ when finished. */} + setDownloadHint(null)} + anchorOrigin={{ vertical: "bottom", horizontal: "center" }} + > + setDownloadHint(null)} + sx={{ maxWidth: 520 }} + > + {downloadHint === "sh" && ( + + + Downloaded the Mac / Linux script + + + To fetch your data files: + + +
  • Open Terminal
  • +
  • Go to the folder where the script was saved
  • +
  • + Run:{" "} + + bash <script-name>.sh + +
  • +
    +
    + )} + {downloadHint === "bat" && ( + + + Downloaded the Windows script + + + Open the folder where the script was saved and{" "} + double-click the .bat file. A command + window opens and the files download next to it. + + + )} + {downloadHint === "txt" && ( + + + Downloaded the URL list + + + In Terminal (Mac/Linux) or PowerShell (Windows), run:{" "} + + wget -i <file-name>.txt + + + + )} +
    +
    ); }; diff --git a/src/redux/neurojson/neurojson.action.ts b/src/redux/neurojson/neurojson.action.ts index 35cf6c1..f8f08c2 100644 --- a/src/redux/neurojson/neurojson.action.ts +++ b/src/redux/neurojson/neurojson.action.ts @@ -109,6 +109,19 @@ export const fetchMetadataSearchResults = createAsyncThunk( } ); +// distinct iolinks file extensions — populates the "File types" multi-select +export const fetchFileTypes = createAsyncThunk( + "neurojson/fetchFileTypes", + async (_, { rejectWithValue }) => { + try { + const data = await NeurojsonService.getFileTypes(); + return data; + } catch (error: any) { + return rejectWithValue("Failed to fetch file types"); + } + } +); + // fetch data for metadata panel in dataset detail page export const fetchDbInfoByDatasetId = createAsyncThunk( "neurojson/fetchDbInfoByDatasetId", diff --git a/src/redux/neurojson/neurojson.slice.ts b/src/redux/neurojson/neurojson.slice.ts index 90722c7..cfafc6a 100644 --- a/src/redux/neurojson/neurojson.slice.ts +++ b/src/redux/neurojson/neurojson.slice.ts @@ -7,6 +7,7 @@ import { fetchDbStats, fetchMetadataSearchResults, fetchDbInfoByDatasetId, + fetchFileTypes, } from "./neurojson.action"; import { DBDatafields, INeuroJsonState } from "./types/neurojson.interface"; import { createSlice, PayloadAction } from "@reduxjs/toolkit"; @@ -26,6 +27,7 @@ const initialState: INeuroJsonState = { dbStats: null, searchResults: null, datasetViewInfo: null, + fileTypes: null, }; const neurojsonSlice = createSlice({ @@ -155,6 +157,17 @@ const neurojsonSlice = createSlice({ state.loading = false; state.error = action.payload as string; }) + // fetchFileTypes runs once on mount; no pending case so it doesn't + // clobber the shared `loading` spinner used by the search button. + .addCase( + fetchFileTypes.fulfilled, + (state, action: PayloadAction) => { + state.fileTypes = action.payload; + } + ) + .addCase(fetchFileTypes.rejected, (state, action) => { + state.error = action.payload as string; + }) .addCase(fetchDbInfoByDatasetId.pending, (state) => { state.loading = true; state.error = null; diff --git a/src/redux/neurojson/types/neurojson.interface.ts b/src/redux/neurojson/types/neurojson.interface.ts index 365566d..01c8273 100644 --- a/src/redux/neurojson/types/neurojson.interface.ts +++ b/src/redux/neurojson/types/neurojson.interface.ts @@ -13,6 +13,7 @@ export interface INeuroJsonState { dbStats: DbStatsItem[] | null; // for dbStats on landing page searchResults: any[] | { status: string; msg: string } | null; datasetViewInfo: any | null; + fileTypes: string[] | null; } export interface DBParticulars { diff --git a/src/services/neurojson.service.ts b/src/services/neurojson.service.ts index 008e960..ed1f70d 100644 --- a/src/services/neurojson.service.ts +++ b/src/services/neurojson.service.ts @@ -150,6 +150,13 @@ export const NeurojsonService = { return response.data; }, + // GET /api/v1/dbs/file-types → distinct iolinks.view values + // Drives the multi-select "File types" filter on the search page. + getFileTypes: async (): Promise => { + const response = await api.get(`/dbs/file-types`); + return response.data; + }, + // getDbInfoByDatasetId: async (dbName: string, dsId: string): Promise => { // const response = await api.get( // `${baseURL}/${dbName}/_design/qq/_view/dbinfo`, diff --git a/src/utils/SearchPageFunctions/generateUiSchema.ts b/src/utils/SearchPageFunctions/generateUiSchema.ts index f352a23..46dc215 100644 --- a/src/utils/SearchPageFunctions/generateUiSchema.ts +++ b/src/utils/SearchPageFunctions/generateUiSchema.ts @@ -1,11 +1,13 @@ import { Colors } from "design/theme"; +import { getTypeSuggestions } from "./typesByModality"; // Controls the background highlight of selected fields // Controls the visibility of subject-level filters export const generateUiSchema = ( formData: Record, showSubjectFilters: boolean, - showDatasetFilters: boolean + showDatasetFilters: boolean, + fileTypeOptions: string[] = [] ) => { const activeStyle = { "ui:options": { @@ -15,13 +17,11 @@ export const generateUiSchema = ( }, }; - // hide subject-level filter + // Fully remove a field from the rendered DOM (keeps its value in formData). + // Using ui:widget: "hidden" produces just an , so no + // empty Grid row + margin is left behind — fixes the big gap between rows. const invisibleStyle = { - "ui:options": { - style: { - display: "none", - }, - }, + "ui:widget": "hidden", }; const hiddenStyle = { @@ -50,19 +50,27 @@ export const generateUiSchema = ( "dataset_filters_toggle", // button first "database", "keyword", + "file_type", // dataset-level: filters by file extensions in iolinks + "dataset_modality_filter", // dataset-level: modality multi-select + AND/OR + "modalities", + "modality_mode", "subject_filters_toggle", + "age_range_slider", // top of subject filters — range slider for age "modality", + "type_name", // sits right after modality — its options depend on it "gender", - "age_min", + "age_min", // hidden via invisibleStyle; written by the slider above "age_max", + "sess_count_range", // sessions min/max on one row "sess_min", "sess_max", + "task_count_range", // tasks min/max on one row "task_min", "task_max", + "run_count_range", // runs min/max on one row "run_min", "run_max", "task_name", - "type_name", "session_name", "run_name", "limit", @@ -87,6 +95,25 @@ export const generateUiSchema = ( // dataset: formData["dataset"] ? activeStyle : {}, // limit: formData["limit"] ? activeStyle : {}, // skip: formData["skip"] ? activeStyle : {}, + // File-type filter — dataset-level. Multi-select of file extensions + // present in iolinks (fetched dynamically via /api/v1/dbs/file-types). + file_type: showDatasetFilters + ? { + "ui:widget": "fileTypeAutocomplete", + "ui:options": { + fileTypes: fileTypeOptions, + ...(Array.isArray(formData["file_type"]) && + formData["file_type"].length > 0 + ? { style: { backgroundColor: Colors.lightBlue } } + : {}), + }, + } + : datasetHiddenStyle, + dataset_modality_filter: showDatasetFilters + ? { "ui:field": "datasetModalityFilter" } + : datasetHiddenStyle, + modalities: invisibleStyle, + modality_mode: invisibleStyle, limit: invisibleStyle, skip: invisibleStyle, @@ -100,16 +127,14 @@ export const generateUiSchema = ( : {} : hiddenStyle, - age_min: showSubjectFilters - ? formData["age_min"] - ? activeStyle - : {} - : hiddenStyle, - age_max: showSubjectFilters - ? formData["age_max"] - ? activeStyle - : {} + // Age range — slider lives inside the form via the AgeRangeSliderField + // stable component. age_min/age_max stay in the schema (so the backend + // gets them on submit) but their default numeric inputs are hidden. + age_range_slider: showSubjectFilters + ? { "ui:field": "ageRangeSlider" } : hiddenStyle, + age_min: invisibleStyle, + age_max: invisibleStyle, gender: showSubjectFilters ? formData["gender"] && formData["gender"] !== "any" @@ -117,58 +142,76 @@ export const generateUiSchema = ( : {} : hiddenStyle, - sess_min: showSubjectFilters - ? formData["sess_min"] - ? activeStyle - : {} - : hiddenStyle, - sess_max: showSubjectFilters - ? formData["sess_max"] - ? activeStyle - : {} + // Session / task / run min+max pairs are rendered by a single + // CountRangePairField each. The raw integer inputs are hidden but stay in + // formData so the backend still receives them on submit. + sess_count_range: showSubjectFilters + ? { + "ui:field": "countRangePair", + "ui:options": { + minKey: "sess_min", + maxKey: "sess_max", + label: "sessions", + }, + } : hiddenStyle, + sess_min: invisibleStyle, + sess_max: invisibleStyle, - task_min: showSubjectFilters - ? formData["task_min"] - ? activeStyle - : {} - : hiddenStyle, - task_max: showSubjectFilters - ? formData["task_max"] - ? activeStyle - : {} + task_count_range: showSubjectFilters + ? { + "ui:field": "countRangePair", + "ui:options": { + minKey: "task_min", + maxKey: "task_max", + label: "tasks", + }, + } : hiddenStyle, + task_min: invisibleStyle, + task_max: invisibleStyle, - run_min: showSubjectFilters - ? formData["run_min"] - ? activeStyle - : {} - : hiddenStyle, - run_max: showSubjectFilters - ? formData["run_max"] - ? activeStyle - : {} + run_count_range: showSubjectFilters + ? { + "ui:field": "countRangePair", + "ui:options": { + minKey: "run_min", + maxKey: "run_max", + label: "runs", + }, + } : hiddenStyle, + run_min: invisibleStyle, + run_max: invisibleStyle, task_name: showSubjectFilters - ? formData["task_name"] - ? activeStyle - : {} + ? { + "ui:placeholder": "e.g. rest, motor", + ...(formData["task_name"] ? activeStyle : {}), + } : hiddenStyle, type_name: showSubjectFilters - ? formData["type_name"] - ? activeStyle - : {} + ? { + "ui:widget": "typeAutocomplete", + "ui:options": { + suggestions: getTypeSuggestions(formData.modality), + ...(formData["type_name"] + ? { style: { backgroundColor: Colors.lightBlue } } + : {}), + }, + } : hiddenStyle, session_name: showSubjectFilters - ? formData["session_name"] - ? activeStyle - : {} + ? { + "ui:placeholder": "e.g. 01, pre, baseline", + ...(formData["session_name"] ? activeStyle : {}), + } : hiddenStyle, run_name: showSubjectFilters - ? formData["run_name"] - ? activeStyle - : {} + ? { + "ui:placeholder": "e.g. 01, 02", + ...(formData["run_name"] ? activeStyle : {}), + } : hiddenStyle, "ui:submitButtonOptions": { diff --git a/src/utils/SearchPageFunctions/searchformSchema.ts b/src/utils/SearchPageFunctions/searchformSchema.ts index 7fc71b5..3674cdd 100644 --- a/src/utils/SearchPageFunctions/searchformSchema.ts +++ b/src/utils/SearchPageFunctions/searchformSchema.ts @@ -61,9 +61,13 @@ export const baseSchema: JSONSchema7 = { gender: { title: "Subject gender", type: "string", - enum: ["male", "female", "unknown", "any"], + enum: ["male", "female", "any"], default: "any", }, + age_range_slider: { + type: "null", + title: "Age range", + }, age_min: { title: "Minimum age", type: "number", @@ -77,6 +81,7 @@ export const baseSchema: JSONSchema7 = { maximum: 1000, }, + sess_count_range: { type: "null", title: "Sessions" }, sess_min: { title: "Minimum session count", type: "integer", @@ -89,6 +94,7 @@ export const baseSchema: JSONSchema7 = { minimum: 0, maximum: 1000, }, + task_count_range: { type: "null", title: "Tasks" }, task_min: { title: "Minimum task count", type: "integer", @@ -101,6 +107,7 @@ export const baseSchema: JSONSchema7 = { minimum: 0, maximum: 1000, }, + run_count_range: { type: "null", title: "Runs" }, run_min: { title: "Minimum runs", type: "integer", @@ -118,8 +125,29 @@ export const baseSchema: JSONSchema7 = { type: "string", }, type_name: { - title: "Data type keywords", + title: "Data type", + type: "string", + }, + file_type: { + title: "File types", + type: "array", + items: { type: "string" }, + uniqueItems: true, + }, + dataset_modality_filter: { + type: "null", + title: "", + }, + modalities: { + type: "array", + title: "Dataset modalities", + items: { type: "string" }, + uniqueItems: true, + }, + modality_mode: { type: "string", + title: "Modality match mode", + default: "or", }, session_name: { title: "Session keywords", diff --git a/src/utils/SearchPageFunctions/typesByModality.ts b/src/utils/SearchPageFunctions/typesByModality.ts new file mode 100644 index 0000000..090475d --- /dev/null +++ b/src/utils/SearchPageFunctions/typesByModality.ts @@ -0,0 +1,34 @@ +// Common BIDS suffixes grouped by modality. +// Extend the lists as you find missing values in your data. +export const TYPES_BY_MODALITY: Record = { + anat: ["T1w", "T2w", "FLAIR", "T2star", "PD", "angio", "defacemask"], + func: ["bold", "sbref", "events", "physio", "stim"], + dwi: ["dwi", "sbref"], + fmap: ["phasediff", "magnitude1", "magnitude2", "fieldmap", "epi"], + meg: ["meg", "channels", "coordsystem", "headshape", "events"], + eeg: ["eeg", "channels", "electrodes", "coordsystem", "events"], + ieeg: ["ieeg", "channels", "electrodes", "coordsystem", "events"], + pet: ["pet", "blood", "events"], + nirs: ["nirs", "channels", "optodes", "coordsystem", "events"], + beh: ["beh", "events"], + motion: ["motion", "channels", "events"], + perf: ["asl", "m0scan"], + micr: ["TEM", "SEM", "MRM"], +}; + +// The modality form field stores values like "fMRI (func)" — extract the +// suffix inside the parens so we can look it up in TYPES_BY_MODALITY. +export function getModalityKey(modalityValue?: string): string | null { + if (!modalityValue || modalityValue === "any") return null; + const m = modalityValue.match(/\(([^)]+)\)/); + return m ? m[1] : modalityValue; +} + +export function getTypeSuggestions(modalityValue?: string): string[] { + const key = getModalityKey(modalityValue); + if (!key) { + // No modality picked → show all suffixes deduped and sorted. + return Array.from(new Set(Object.values(TYPES_BY_MODALITY).flat())).sort(); + } + return TYPES_BY_MODALITY[key] || []; +}