diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index a0a00b01..00000000 --- a/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -logs/ -.git/ -.idea/ -*.class \ No newline at end of file diff --git a/.env.example b/.env.example deleted file mode 100644 index 9cb719c1..00000000 --- a/.env.example +++ /dev/null @@ -1,44 +0,0 @@ -# ============================================================================= -# DecodingUs Environment Configuration -# ============================================================================= -# Copy this file to .env and fill in the values for your environment. -# NEVER commit .env to version control! -# ============================================================================= - -# ----------------------------------------------------------------------------- -# Application Security -# ----------------------------------------------------------------------------- -# Generate with: openssl rand -base64 64 | tr -d '\n' -APPLICATION_SECRET=changeme-generate-a-real-secret - -# ----------------------------------------------------------------------------- -# Database Configuration -# ----------------------------------------------------------------------------- -# For local development with docker-compose, these are set in docker-compose.yml -# For production, set these to your RDS or external PostgreSQL instance - -# Main database -DATABASE_URL=jdbc:postgresql://localhost:5432/decodingus_db -DATABASE_USER=decodingus_user -DATABASE_PASSWORD=your-secure-password - -# Metadata database (can be same as main or separate) -METADATA_DATABASE_URL=jdbc:postgresql://localhost:5432/decodingus_metadata - -# ----------------------------------------------------------------------------- -# reCAPTCHA (Production) -# ----------------------------------------------------------------------------- -ENABLE_RECAPTCHA=false -RECAPTCHA_SITE_KEY=your-recaptcha-site-key -RECAPTCHA_SECRET_KEY=your-recaptcha-secret-key - -# ----------------------------------------------------------------------------- -# Contact Form -# ----------------------------------------------------------------------------- -CONTACT_RECIPIENT_EMAIL=contact@decoding-us.com - -# ----------------------------------------------------------------------------- -# Docker Registry (Production CI/CD) -# ----------------------------------------------------------------------------- -DOCKER_REGISTRY= -IMAGE_TAG=latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e75a503e..873f93ca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,47 +2,70 @@ name: CI on: push: - branches: [ "main" ] + branches: [main, rust-rewrite-foundation] pull_request: - branches: [ "main" ] permissions: contents: read +concurrency: + group: ci-${{ github.ref }} + cancel-in-progress: true + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 + jobs: - build: + check: + name: build · clippy · test runs-on: ubuntu-latest + # The Rust workspace lives in rust/; the repo root holds only docs + scripts. + defaults: + run: + working-directory: rust + + # PostGIS-enabled Postgres for the DB-backed tests (migrations need the + # postgis / citext / pgcrypto extensions). Tests create ephemeral databases. + services: + postgres: + image: postgis/postgis:16-3.4 + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: dev + POSTGRES_DB: decodingus + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U postgres" + --health-interval 10s + --health-timeout 5s + --health-retries 10 + + env: + DATABASE_URL: postgres://postgres:dev@localhost:5432/decodingus?sslmode=disable + APP_SECRET: devdevdevdevdevdevdevdevdevdevdev steps: - - uses: actions/checkout@v4 - - - name: Set up JDK 21 - uses: actions/setup-java@v4 - with: - java-version: '21' - distribution: 'temurin' - - - name: Setup SBT - uses: sbt/setup-sbt@v1 - - - name: Cache SBT - uses: actions/cache@v4 - with: - path: | - ~/.sbt - ~/.ivy2/cache - ~/.cache/coursier - key: ${{ runner.os }}-sbt-${{ hashFiles('**/build.sbt', '**/plugins.sbt') }} - restore-keys: | - ${{ runner.os }}-sbt- - - - name: Run Tests with Coverage - run: sbt clean coverage test coverageReport - - # Optional: Upload coverage report to an artifact if you want to inspect it manually - - name: Upload Coverage Report - uses: actions/upload-artifact@v4 - if: always() - with: - name: coverage-report - path: target/scala-3.3.6/scoverage-report + - uses: actions/checkout@v4 + + - name: Install Rust (stable) + uses: dtolnay/rust-toolchain@stable + with: + components: clippy + + - name: Cache cargo build + uses: Swatinem/rust-cache@v2 + with: + workspaces: rust + + - name: Build + run: cargo build --workspace --locked + + # Strict lint on shipping code (lib + bins). Test-target lints are not gated; + # `cargo test` below still compiles them. + - name: Clippy + run: cargo clippy --workspace --locked -- -D warnings + + - name: Test + run: cargo test --workspace --locked diff --git a/.gitignore b/.gitignore index bde06aac..ec86e798 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ target # Docker volumes postgres_data/ + +# macOS +.DS_Store diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index c9ad18ab..00000000 --- a/Dockerfile +++ /dev/null @@ -1,49 +0,0 @@ -# ============================================================================= -# DecodingUs Application Dockerfile -# ============================================================================= -# Build: sbt stage && docker build -t decodingus . -# Run: docker run -p 9000:9000 --env-file .env decodingus -# ============================================================================= - -FROM eclipse-temurin:21-jre-jammy - -# Labels for container metadata -LABEL org.opencontainers.image.title="DecodingUs" -LABEL org.opencontainers.image.description="Collaborative platform for genetic genealogy and population research" -LABEL org.opencontainers.image.source="https://github.com/decodingus/decodingus" - -# Install curl for healthchecks -RUN apt-get update && apt-get install -y --no-install-recommends curl \ - && rm -rf /var/lib/apt/lists/* - -# Create non-root user for security -RUN groupadd -r decodingus && useradd -r -g decodingus decodingus - -# Set working directory -WORKDIR /app - -# Copy the pre-built application -COPY --chown=decodingus:decodingus target/universal/stage /app - -# Make scripts executable -RUN chmod +x /app/bin/decodingus - -# Switch to non-root user -USER decodingus - -# Expose the port the app runs on -EXPOSE 9000 - -# Health check -HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ - CMD curl -f http://localhost:9000/health || exit 1 - -# JVM tuning for container environments -ENV JAVA_OPTS="-XX:+UseContainerSupport \ - -XX:MaxRAMPercentage=75.0 \ - -XX:+UseG1GC \ - -XX:+ExitOnOutOfMemoryError \ - -Djava.security.egd=file:/dev/./urandom" - -# Command to run the application -CMD ["bin/decodingus"] \ No newline at end of file diff --git a/PROJECT_ANALYSIS.md b/PROJECT_ANALYSIS.md deleted file mode 100644 index e09ffe2e..00000000 --- a/PROJECT_ANALYSIS.md +++ /dev/null @@ -1,94 +0,0 @@ -# Project Analysis: DecodingUs - -This document provides a comprehensive analysis of the DecodingUs project, a Scala-based web application. It is intended to guide developers in understanding the architecture, extending its features, and improving test coverage. - -## 1. Project Overview - -DecodingUs is a modern web application built with Scala 3 and the Play Framework. It serves as a platform for genomic data analysis, with features related to haplogroups, biosamples, and scientific publications. The application exposes a REST API, serves web pages, and runs background jobs for data processing. - -## 2. Technology Stack - -The project leverages a modern technology stack: - -- **Backend Framework:** [Play Framework](https://www.playframework.com/) -- **Language:** [Scala 3](https://www.scala-lang.org/) -- **Database:** PostgreSQL -- **Database Access:** [Slick](https://scala-slick.org/) -- **API Definition:** [Tapir](https://tapir.softwaremill.com/) for OpenAPI/Swagger documentation -- **Asynchronous Jobs:** [Apache Pekko](https://pekko.apache.org/) with `pekko-quartz-scheduler` -- **Dependency Injection:** [Guice](https://github.com/google/guice) -- **Testing:** [ScalaTest](https://www.scalatest.org/) with `scalatestplus-play` -- **Build Tool:** [sbt](https://www.scala-sbt.org/) -- **Frontend Interactivity:** [HTMX](https://htmx.org/) for HATEOAS on HTML pages - -## 3. Project Structure - -The project follows a standard Play application layout with some key directories: - -- **`app/`**: Contains the core application source code. - - **`app/api/`**: Tapir endpoint definitions. These define the API structure for OpenAPI generation but do not contain business logic. - - **`app/controllers/`**: Play controllers that handle HTTP requests. Some controllers implement the logic for the API endpoints defined in `app/api/`. - - **`app/services/`**: The business logic layer. Controllers delegate to services to perform operations. - - **`app/repositories/`**: The data access layer, responsible for all database interactions using Slick. - - **`app/models/`**: Domain models, API request/response objects, and Slick table definitions. - - **`app/modules/`**: Guice modules for dependency injection and application startup lifecycle. - - **`app/actors/`**: Pekko actors for concurrent and background processing tasks. -- **`conf/`**: Configuration files. - - **`application.conf`**: The main configuration file for the application, including database connections, module loading, and scheduler settings. - - **`routes`**: The main routing file that maps HTTP requests to controller actions. -- **`test/`**: Contains automated tests. -- **`build.sbt`**: The sbt build definition file, where all project dependencies are managed. - -## 4. API Architecture: A Hybrid Approach - -The project uses a hybrid approach for its API: - -1. **Declarative Endpoints with Tapir:** The `app/api/` directory contains endpoint definitions using Tapir. These definitions describe the API's shape (URL, methods, inputs, outputs) and are used to generate a unified OpenAPI specification. -2. **Implementation in Play Controllers:** The actual logic for handling API requests is implemented in standard Play controllers located in `app/controllers/`. -3. **Routing:** The `conf/routes` file maps API paths (e.g., `/api/...`) to `controllers.ApiRouter`, which serves the Swagger UI. The same routes file also directs requests to the appropriate Play controllers that contain the business logic. - -This separation allows for clear API documentation while leveraging the familiar Play Framework controller pattern for implementation. - -## 5. Frontend Technologies - -The project utilizes [HTMX](https://htmx.org/) to enhance the interactivity of HTML pages by enabling HATEOAS (Hypermedia as the Engine of Application State) principles. This approach allows for dynamic updates to parts of the page without full page reloads, using HTML attributes to trigger AJAX requests and swap content. This minimizes the need for extensive client-side JavaScript frameworks for common interactive patterns. - -## 6. Application Lifecycle - -The application's startup and lifecycle are managed by Guice modules in the `app/modules/` directory. - -- **`StartupModule.scala`**: This module eagerly binds a `StartupService`. -- **`StartupService`**: This service is responsible for initializing the application on startup. A key task it performs is seeding the database with essential data, such as importing haplogroup trees via the `TreeInitializationService`. -- **`Scheduler.scala`**: This module configures and schedules background jobs using the Pekko Quartz Scheduler. Job schedules are defined in `conf/application.conf`. - -## 7. Testing Guide - -Tests are located in the `test/` directory and are written using ScalaTest. - -### Running Tests - -To run the existing test suite, execute the following command in your sbt shell: - -```bash -sbt test -``` - -### Adding New Tests - -When adding new features, it is crucial to add corresponding tests. - -- **Controller Tests:** For new controllers, add a new spec file in `test/controllers/`. Use `scalatestplus-play` to help create a test application instance and make requests to your controller actions. -- **Service Tests:** For new services, create a new spec file in a corresponding package under `test/`. Mock any repository dependencies to isolate the business logic for unit testing. -- **Repository Tests:** For repository tests, you may need an in-memory or test database to verify database queries. - -## 8. Development Guide: Adding a New Feature - -Here is a step-by-step guide to adding a new feature (e.g., a new API endpoint): - -1. **Define the Endpoint (Tapir):** If it's a new API endpoint, first define its structure in a new file within `app/api/`. This makes it part of the OpenAPI specification. -2. **Add Route:** Add a new entry in the `conf/routes` file to route the new URL to a new controller action. -3. **Create Controller:** Create a new controller in `app/controllers/` or add a new action to an existing one. This controller will handle the HTTP request. -4. **Implement Service Logic:** Create a new service in `app/services/` to contain the business logic. The controller should call this service. -5. **Implement Repository Logic:** If the feature requires database access, create a new repository in `app/repositories/` or add a new query method to an existing one. The service will use this repository. -6. **Add Models:** Define any new data structures (case classes) needed for the API request/response or for the database schema in the `app/models/` directory. -7. **Write Tests:** Add new tests for the controller, service, and repository to ensure the feature works correctly and is protected against future regressions. diff --git a/README.md b/README.md index 5da205ac..4e43e895 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,149 @@ # Decoding Us -A collaborative platform for genetic genealogy and population research, bridging community efforts with secure, AT Protocol-powered Edge computing. + +A collaborative platform for genetic genealogy and population research, bridging +community efforts with secure, AT Protocol–powered Edge computing. + ## Site Information + [Decoding-Us.com](https://decoding-us.com/) ## Overview -Decoding Us is the central web application designed to empower genetic genealogists and researchers by: +Decoding Us is the **AppView** — the central web application for the Decoding Us +federation. It maintains a curated Y-DNA and mitochondrial-DNA phylogenetic +catalog, contextualizes publicly available academic samples within it, and +aggregates privacy-preserving summaries contributed across the network. It is a +read/coordination surface and a **broker**: it aggregates and reports, it does +not hold raw genomes or perform the heavy genetic analysis — that happens at the +Edge (the [Navigator](https://github.com/JamesKane/decodingus-navigator) +companion app), on hardware the participant controls. -* Facilitating collaborative development of Y-DNA and mtDNA haplogroup trees, leveraging de-identified data from Edge computing participants within a Pan Genome framework. This allows for more comprehensive and accurate analysis than traditional linear assemblies. -* Enabling discovery of genetic relatives through privacy-preserving IBD segment matching across the network of users. -* Providing a secure hub for sharing and refining population research insights derived from distributed genetic data. +This repository is a **Rust rewrite** of the platform, which originally ran on +Scala 3 / Play Framework. That legacy codebase has been removed — the Rust app +under [`rust/`](rust/) is now the platform, and the data cutover is verified +end-to-end against a real production dump. See [`rust/README.md`](rust/README.md) +and [`rust/STATUS.md`](rust/STATUS.md) for the detailed, living status. ## Purpose -Decoding Us serves as a next-generation citizen science platform for population research, specifically engineered to connect and empower individuals contributing to genetic genealogy. It bridges the gap between individual genomic data (processed securely via companion Edge computing software) and global research efforts. A core objective is to collaboratively build out highly resolved haplogroup trees, and to this end, the platform integrates data from a curated list of academic research papers by showing where these public samples fit within the experimental trees being constructed by the community. Direct links back to original sequencing data in repositories like the European Nucleotide Archive (ENA) are provided for full transparency. +Decoding Us is a next-generation citizen-science platform for population research. +It connects individual genomic data — processed securely by companion Edge +software — to global research efforts, and collaboratively builds highly resolved +haplogroup trees. It integrates a curated list of academic papers by showing where +those public samples fall within the experimental trees the community is building, +with direct links back to the original sequencing data in repositories such as the +European Nucleotide Archive (ENA). + +Built on decentralized personal-data principles, it leverages the AT Protocol and +Personal Data Servers (PDS) so sensitive genomic data stays under the user's +control. The AppView itself holds **no personal data**: it works with +de-identified call signatures and aggregated, privacy-preserving insights, while +identity-bearing data and genetic comparison move directly Edge-to-Edge over an +encrypted channel that the AppView only brokers. + +A Pan-Genome approach underlies the science — moving beyond single reference +genomes toward a more inclusive, accurate representation of human genetic +diversity for genealogical and population study. + +## Architecture + +Decoding Us is a federation of three cooperating parts: + +- **AppView (this repo, Rust).** The public read surface + JSON API, the curated + Y/mt haplotree with versioning/merge tooling, the curator suite, and the + federated-reporting mirror. It is a single static binary backed by PostgreSQL. +- **Navigator (Edge).** A companion application that runs on the participant's own + machine, processes raw reads (BAM/CRAM) locally, and publishes only anonymized + computed summaries to the participant's PDS — never raw data. +- **Shared crates** (`decodingus-shared`). Pure domain types, the AT Protocol + identity/crypto + OAuth client, and the genomics coordinate/parse library, + consumed by both the AppView and Navigator. + +**Privacy stance:** the AppView is a pure broker. The anonymized reporting mirror +drops donor PII at ingest; identity-bearing data (names, ancestor records, +kit↔identity linkage) and genetic comparison (IBD) are exchanged **Edge-to-Edge +over an encrypted, consent-gated channel** that the AppView coordinates but cannot +read. The collaboration/IBD layer that builds on this is designed in +[`documents/planning/`](documents/planning/) (the `d1`–`d5` specs + roadmap). -Built on the principles of decentralized personal data management, it leverages the AT Protocol and Personal Data Servers (PDS) to ensure that sensitive genomic data remains under the user's control. By focusing on de-identified call signatures and IBD segment discovery within a Pan Genome context, Decoding Us enables the community to expand genetic relative networks, all while upholding the highest standards of data privacy and security. The Pan Genome approach moves beyond single reference genomes, providing a more inclusive and accurate representation of human genetic diversity for genealogical and population studies. ## Key Features -* Collaborative Haplogroup Tree Resolution: Tools for community-driven refinement and expansion of Y-DNA and mtDNA haplogroup trees based on de-identified call signatures. -* Academic Data Integration & Contextualization: Visualize publicly available academic samples within the experimental haplogroup trees built by the Decoding Us community, with direct links to original sequencing data in public archives (e.g., ENA). -* Privacy-Preserving Genetic Relative Discovery: Functionality to discover and connect with other users based on matching IBD segments, enhancing genealogical research. -* Secure Data Interaction: A web interface for interacting with and visualizing insights derived from de-identified data processed on secure Edge nodes. -* Research Collaboration Tools: Features to facilitate discussion, sharing, and joint analysis among genealogists and researchers. +**Available now:** + +- **De-novo haplogroup trees.** Y-DNA and mtDNA phylogenies built from genotypes + (IQ-TREE maximum-likelihood + ancestral-state reconstruction), with per-branch + defining SNPs matched to the variant catalog, public samples placed as leaves, + temporal versioning, and change-set review. +- **Variant catalog & naming authority.** A universal per-site variant model with + the `DU` naming authority, alias/coordinate search, and CSV/GFF3 export, kept in + sync by a YBrowse GFF3 ingestion pipeline (~3M variants). +- **Academic data integration.** Publications and their public biosamples shown in + context, with links to ENA; a public "suggest a paper" flow backed by OpenAlex. +- **Federated reporting.** Anonymized population coverage, ancestry, and + haplogroup-distribution reports aggregated from network-published summaries. +- **Public JSON API** with OpenAPI 3 / Swagger UI, plus a full curator toolset. -## Technologies +**On the roadmap** (designed; see `documents/planning/`): -- [Scala 3](https://www.scala-lang.org/) - Scalable programming language -- [Play Framework](https://www.playframework.com/) - Web framework for Scala -- [HTMX](https://htmx.org/) - HTML extension for modern web applications -- [PostgreSQL](https://www.postgresql.org/) - Relational database management system -- [Docker](https://www.docker.com/) - Containerization platform -- [AWS](https://aws.amazon.com/) - Cloud computing platform +- **Privacy-preserving genetic-relative discovery** via Edge-to-Edge IBD segment + matching, brokered (not read) by the AppView. +- **Collaborative research projects** — attributed, scoped assertions over a + PII-free research-subject registry, with admin-team ACLs and an encrypted + peer-to-peer channel for any identity-bearing exchange. + +## Technology Stack + +- [Rust](https://www.rust-lang.org/) — a single static binary, no JVM +- [Axum](https://github.com/tokio-rs/axum) + [tokio](https://tokio.rs/) — async web stack +- [Askama](https://github.com/rinja-rs/askama) — compile-time typed templates +- [HTMX](https://htmx.org/) + Bootstrap 5 — HATEOAS-first frontend +- [SQLx](https://github.com/launchbadge/sqlx) + [PostgreSQL](https://www.postgresql.org/) / [PostGIS](https://postgis.net/) +- [AT Protocol](https://atproto.com/) — decentralized identity + OAuth (PKCE / DPoP) +- Deploys as a single static binary; Apple `container` runs the local PostGIS for Docker-less dev + +## Repository Layout + +``` +rust/ the Rust AppView (workspace) — see rust/README.md + crates/du-db SQLx data layer + versioning/merge/graft/naming engines + crates/du-web Axum app: routes, Askama templates, i18n, auth, JSON API + crates/du-jobs scheduled jobs + the Jetstream reporting-mirror consumer + crates/du-external OpenAlex / ENA / NCBI / AWS clients + crates/du-migrate data-cutover ETL + the de-novo tree builder + migrations/ the redesigned PostgreSQL schema +documents/ architecture, planning specs (incl. d1–d5), proposals +scripts/ deploy-time helpers (maintenance splash page) +``` + +Shared crates live in the sibling [`decodingus-shared`](https://github.com/JamesKane/decodingus-shared) +repo and are pulled in as pinned git dependencies. + +## Getting Started + +Full setup, run, test, ETL, and deploy instructions live in +[`rust/README.md`](rust/README.md). The short version: + +```sh +cd rust +eval "$(./scripts/test-db.sh up)" # local PostGIS (Apple container; no Docker) +DATABASE_URL=... APP_SECRET=<32+ chars> cargo run -p du-web # serves on :9000 +``` ## Note on Data Processing & Privacy -Sensitive genetic computing and direct handling of raw sequencing data are performed securely on the user's chosen -environment (local network or leased virtual private server) via companion Edge computing software. This utilizes AT -Protocol and Personal Data Servers (PDS) to maintain individual data sovereignty. Decoding Us, the web application, -operates exclusively with de-identified call signatures and aggregated, privacy-preserving insights to facilitate -collaborative research and network building. + +Sensitive genetic computation and direct handling of raw sequencing data are +performed on the participant's own environment (local network or leased VPS) by +companion Edge software, using the AT Protocol and Personal Data Servers to +maintain data sovereignty. The Decoding Us AppView operates exclusively on +de-identified call signatures and aggregated, privacy-preserving insights; any +identity-bearing exchange between participants is end-to-end encrypted and merely +brokered by the AppView, which never sees the plaintext. ## Related Projects -[DecodingUs - Navigator](https://github.com/JamesKane/decodingus-navigator) - A companion Edge computing application for Decoding Us. -[DecodingUs - Nexus](https://github.com/JamesKane/decodingus-nexus) - A horizontal scaling BGS (Big Graph Service) for the Decoding Us Federation. +- [DecodingUs — Navigator](https://github.com/JamesKane/decodingus-navigator) — the companion Edge computing application. +- [decodingus-shared](https://github.com/JamesKane/decodingus-shared) — shared Rust crates (domain, AT Protocol, genomics). ## [![License](https://img.shields.io/badge/License-BSD_3--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) -This project is licensed under the BSD 3-Clause License. See the [LICENSE](LICENSE) file for details. \ No newline at end of file +This project is licensed under the BSD 3-Clause License. See the [LICENSE](LICENSE) file for details. diff --git a/app/actions/ApiSecurityAction.scala b/app/actions/ApiSecurityAction.scala deleted file mode 100644 index 972589d9..00000000 --- a/app/actions/ApiSecurityAction.scala +++ /dev/null @@ -1,15 +0,0 @@ -package actions - -import play.api.libs.json.Reads -import play.api.mvc.* - -/** - * A trait that defines an action builder for handling secure API requests, - * including support for JSON payloads and custom authentication mechanisms. - * - * ApiSecurityAction extends ActionBuilder, providing a foundation for constructing actions - * with additional security layers and JSON processing capabilities. - */ -trait ApiSecurityAction extends ActionBuilder[Request, AnyContent] { - def jsonAction[A](implicit reader: Reads[A]): ActionBuilder[Request, A] -} \ No newline at end of file diff --git a/app/actions/AuthenticatedAction.scala b/app/actions/AuthenticatedAction.scala deleted file mode 100644 index f909fe74..00000000 --- a/app/actions/AuthenticatedAction.scala +++ /dev/null @@ -1,89 +0,0 @@ -package actions - -import models.domain.user.User -import play.api.mvc._ -import services.AuthService - -import java.util.UUID -import javax.inject.Inject -import scala.concurrent.{ExecutionContext, Future} - -// Custom Request type that holds the authenticated User -class AuthenticatedRequest[A](val user: User, request: Request[A]) extends WrappedRequest[A](request) - -/** - * AuthenticatedAction builder. - * Checks for a session "userId" and fetches the user. - */ -class AuthenticatedAction @Inject()( - val parser: BodyParsers.Default, - val authService: AuthService, - val userRepository: repositories.UserRepository - )(implicit val executionContext: ExecutionContext) - extends ActionBuilder[AuthenticatedRequest, AnyContent] - with ActionRefiner[Request, AuthenticatedRequest] { - - override protected def refine[A](request: Request[A]): Future[Either[Result, AuthenticatedRequest[A]]] = { - request.session.get("userId") match { - case Some(userIdStr) => - try { - val userId = UUID.fromString(userIdStr) - userRepository.findById(userId).map { - case Some(user) if user.isActive => Right(new AuthenticatedRequest(user, request)) - case _ => Left(Results.Redirect(controllers.routes.AuthController.login).withNewSession) - } - } catch { - case _: IllegalArgumentException => - Future.successful(Left(Results.Redirect(controllers.routes.AuthController.login).withNewSession)) - } - case None => - Future.successful(Left(Results.Redirect(controllers.routes.AuthController.login))) - } - } -} - -/** - * RoleAction builder factory. - * Allows requiring specific roles on top of authentication. - */ -class RoleAction @Inject()(authService: AuthService)(implicit ec: ExecutionContext) { - - def apply(requiredRoles: String*): ActionFilter[AuthenticatedRequest] = new ActionFilter[AuthenticatedRequest] { - override protected def executionContext: ExecutionContext = ec - - override protected def filter[A](request: AuthenticatedRequest[A]): Future[Option[Result]] = { - request.user.id match { - case Some(userId) => - authService.hasAnyRole(userId, requiredRoles).map { hasRole => - if (hasRole) None - else Some(Results.Forbidden("You do not have the required permissions to access this resource.")) - } - case None => - Future.successful(Some(Results.Forbidden("You do not have the required permissions to access this resource."))) - } - } - } -} - -/** - * PermissionAction builder factory. - * Allows requiring specific permissions on top of authentication. - */ -class PermissionAction @Inject()(authService: AuthService)(implicit ec: ExecutionContext) { - - def apply(permission: String): ActionFilter[AuthenticatedRequest] = new ActionFilter[AuthenticatedRequest] { - override protected def executionContext: ExecutionContext = ec - - override protected def filter[A](request: AuthenticatedRequest[A]): Future[Option[Result]] = { - request.user.id match { - case Some(userId) => - authService.hasPermission(userId, permission).map { hasPermission => - if (hasPermission) None - else Some(Results.Forbidden("You do not have the required permissions to access this resource.")) - } - case None => - Future.successful(Some(Results.Forbidden("You do not have the required permissions to access this resource."))) - } - } - } -} diff --git a/app/actions/DevelopmentSecureApiAction.scala b/app/actions/DevelopmentSecureApiAction.scala deleted file mode 100644 index bd7fca3f..00000000 --- a/app/actions/DevelopmentSecureApiAction.scala +++ /dev/null @@ -1,65 +0,0 @@ -package actions - -import org.apache.pekko.stream.Materializer -import play.api.libs.json.{JsError, Json, Reads} -import play.api.mvc.* -import play.api.mvc.Results.BadRequest - -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -/** - * A development-mode implementation of the `ApiSecurityAction` trait, providing an unsecured - * means to handle API requests that supports JSON payload parsing and processing. - * - * This class is designed for use in development environments where full security measures are not - * required. It bypasses authentication mechanisms and directly allows the handling of requests. - * - * @param controllerComponents Controller components for configuration and auxiliary services - * @param defaultParser Default body parser for handling request payloads - * @param executionContext The implicit `ExecutionContext` for asynchronous operations - * @param materializer The stream `Materializer` for managing Play's asynchronous streams - */ -@Singleton -class DevelopmentSecureApiAction @Inject()( - val controllerComponents: ControllerComponents, - val defaultParser: BodyParsers.Default - )(implicit val executionContext: ExecutionContext, val materializer: Materializer) extends ApiSecurityAction with JsonValidation { - - override def parser: BodyParser[AnyContent] = defaultParser - - /** - * Constructs an `ActionBuilder` for handling requests with JSON payloads, allowing the payload to be - * validated and parsed into a specified type `A`. - * - * This method sets up the necessary JSON body parser and validation mechanism using the provided - * implicit `Reads[A]`. If the JSON validation fails, an appropriate error response is returned. - * - * @param reader an implicit JSON `Reads[A]` type class, used to validate and parse the JSON body into type `A` - * @return an `ActionBuilder` configured to process requests with JSON payloads, validating and parsing them into type `A` - */ - override def jsonAction[A](implicit reader: Reads[A]): ActionBuilder[Request, A] = { - new ActionBuilder[Request, A] { - override def parser: BodyParser[A] = jsonBodyParser[A] - - override protected def executionContext: ExecutionContext = DevelopmentSecureApiAction.this.executionContext - - override def invokeBlock[B](request: Request[B], block: Request[B] => Future[Result]): Future[Result] = { - block(request) - } - } - } - - /** - * Overrides the `invokeBlock` method to execute a block of request handling logic. - * This implementation directly invokes the provided block without any additional - * preprocessing or filtering, making it suitable for development purposes. - * - * @param request the incoming HTTP request of type `A` - * @param block a function that processes the HTTP request and returns a `Future[Result]` - * @return a `Future[Result]` produced by executing the provided block - */ - override def invokeBlock[A](request: Request[A], block: Request[A] => Future[Result]): Future[Result] = { - block(request) - } -} \ No newline at end of file diff --git a/app/actions/JsonValidation.scala b/app/actions/JsonValidation.scala deleted file mode 100644 index a0fb989b..00000000 --- a/app/actions/JsonValidation.scala +++ /dev/null @@ -1,33 +0,0 @@ -package actions - -import org.apache.pekko.stream.Materializer -import play.api.libs.json.{JsError, Json, Reads} -import play.api.mvc.Results.BadRequest -import play.api.mvc.* - -import scala.concurrent.ExecutionContext - -/** - * A trait providing common JSON validation and parsing capabilities for ActionBuilders. - * This helps to reduce boilerplate and ensure consistent JSON error responses across different API actions. - */ -trait JsonValidation { - def controllerComponents: ControllerComponents - implicit def executionContext: ExecutionContext - implicit def materializer: Materializer - - /** - * Constructs an `ActionBuilder` for handling requests with JSON payloads, allowing the payload to be - * validated and parsed into a specified type `A`. - * - * This method sets up the necessary JSON body parser and validation mechanism using the provided - * implicit `Reads[A]`. If the JSON validation fails, an appropriate error response is returned. - * - * @param reader an implicit JSON `Reads[A]` type class, used to validate and parse the JSON body into type `A` - * @return an `ActionBuilder` configured to process requests with JSON payloads, validating and parsing them into type `A` - */ - protected def jsonBodyParser[A](implicit reader: Reads[A]): BodyParser[A] = - controllerComponents.parsers.json.validate( - _.validate[A].asEither.left.map(e => BadRequest(Json.obj("message" -> JsError.toJson(e)))) - ) -} diff --git a/app/actions/PdsAuthAction.scala b/app/actions/PdsAuthAction.scala deleted file mode 100644 index 98cc4635..00000000 --- a/app/actions/PdsAuthAction.scala +++ /dev/null @@ -1,101 +0,0 @@ -package actions - -import jakarta.inject.{Inject, Singleton} -import models.domain.pds.PdsNode -import org.apache.pekko.stream.Materializer -import play.api.Logging -import play.api.libs.json.{Json, Reads} -import play.api.mvc.* -import repositories.PdsNodeRepository -import services.PdsSignatureVerifier - -import scala.concurrent.{ExecutionContext, Future} - -case class PdsAuthRequest[A](pdsNode: PdsNode, request: Request[A]) extends WrappedRequest[A](request) - -@Singleton -class PdsAuthAction @Inject()( - val parser: BodyParsers.Default, - val controllerComponents: ControllerComponents, - signatureVerifier: PdsSignatureVerifier, - nodeRepo: PdsNodeRepository - )(implicit val executionContext: ExecutionContext, val materializer: Materializer) - extends ActionBuilder[PdsAuthRequest, AnyContent] with JsonValidation with Logging { - - private val DidHeader = "X-PDS-DID" - private val SignatureHeader = "X-PDS-Signature" - private val TimestampHeader = "X-PDS-Timestamp" - private val NonceHeader = "X-PDS-Nonce" - - def jsonAction[A](implicit reader: Reads[A]): ActionBuilder[PdsAuthRequest, A] = { - new ActionBuilder[PdsAuthRequest, A] { - override def parser: BodyParser[A] = jsonBodyParser[A] - - override protected def executionContext: ExecutionContext = PdsAuthAction.this.executionContext - - override def invokeBlock[B](request: Request[B], block: PdsAuthRequest[B] => Future[Result]): Future[Result] = { - PdsAuthAction.this.invokeBlock(request, block) - } - } - } - - override def invokeBlock[A](request: Request[A], block: PdsAuthRequest[A] => Future[Result]): Future[Result] = { - val headers = request.headers - - (headers.get(DidHeader), headers.get(SignatureHeader), headers.get(TimestampHeader)) match { - case (Some(did), Some(signature), Some(timestamp)) => - authenticateRequest(request, did, signature, timestamp, headers.get(NonceHeader), block) - case _ => - Future.successful(Results.Unauthorized( - Json.obj("error" -> "Missing required authentication headers (X-PDS-DID, X-PDS-Signature, X-PDS-Timestamp)") - )) - } - } - - private def authenticateRequest[A]( - request: Request[A], - did: String, - signature: String, - timestamp: String, - nonce: Option[String], - block: PdsAuthRequest[A] => Future[Result] - ): Future[Result] = { - if (!signatureVerifier.isTimestampValid(timestamp)) { - return Future.successful(Results.Unauthorized( - Json.obj("error" -> "Request timestamp expired or invalid") - )) - } - - if (nonce.exists(n => !signatureVerifier.checkAndRecordNonce(n))) { - return Future.successful(Results.Unauthorized( - Json.obj("error" -> "Nonce already used") - )) - } - - nodeRepo.findByDid(did).flatMap { - case None => - Future.successful(Results.Unauthorized( - Json.obj("error" -> s"PDS node not registered: $did") - )) - case Some(node) => - val bodyHash = signatureVerifier.hashBody(request) - val signingInput = signatureVerifier.buildSigningInput( - request.method, request.path, timestamp, bodyHash, nonce - ) - - signatureVerifier.verifySignature(did, signingInput, signature).flatMap { - case true => - logger.debug(s"PDS request authenticated for $did") - block(PdsAuthRequest(node, request)) - case false => - Future.successful(Results.Unauthorized( - Json.obj("error" -> "Invalid signature") - )) - }.recover { - case e: Exception => - logger.error(s"Error during PDS authentication for $did: ${e.getMessage}", e) - Results.InternalServerError(Json.obj("error" -> "Authentication error")) - } - } - } -} diff --git a/app/actions/ProductionSecureApiAction.scala b/app/actions/ProductionSecureApiAction.scala deleted file mode 100644 index 892a0cbd..00000000 --- a/app/actions/ProductionSecureApiAction.scala +++ /dev/null @@ -1,81 +0,0 @@ -package actions - -import filters.ApiKeyFilter -import org.apache.pekko.stream.Materializer -import play.api.libs.json.{JsError, Json} -import play.api.libs.streams.Accumulator -import play.api.mvc.* -import play.api.mvc.Results.BadRequest - -import javax.inject.Inject -import scala.concurrent.{ExecutionContext, Future} - -/** - * A secure action builder implementation for handling API requests in a production environment. - * - * ProductionSecureApiAction combines API key validation, JSON payload handling, and modularized - * request processing via the ActionBuilder pattern. It ensures that all incoming API requests - * are validated and handled securely. - * - * @constructor Creates a new instance of ProductionSecureApiAction. - * @param apiKeyFilter The filter responsible for validating API keys in incoming requests. - * @param defaultParser The default body parser for parsing HTTP requests. - * @param controllerComponents Components used for constructing the controller's actions and responses. - * @param executionContext The ExecutionContext for handling asynchronous operations. - * @param materializer The Materializer used for Play's stream processing. - */ -class ProductionSecureApiAction @Inject()( - apiKeyFilter: ApiKeyFilter, - val defaultParser: BodyParsers.Default, - val controllerComponents: ControllerComponents - )(implicit val executionContext: ExecutionContext, val materializer: Materializer) extends ApiSecurityAction with JsonValidation { - - override def parser: BodyParser[AnyContent] = defaultParser - - /** - * Constructs an `ActionBuilder` for handling JSON requests and parsing their bodies into a specified type `A`, - * with support for API key filtering and JSON validation. - * - * This method ensures that incoming requests are authenticated using an API key filter. - * It also validates the request body as JSON and attempts to parse it as type `A` using the implicit `Reads[A]`. - * If the API key is missing/invalid or the JSON validation fails, appropriate error responses are generated. - * - * @param reader an implicit JSON `Reads[A]` type class, used to validate and parse the JSON body into type `A` - * @return a configured `ActionBuilder` that processes JSON requests as type `A` and applies API key authentication - */ - def jsonAction[A](implicit reader: play.api.libs.json.Reads[A]): ActionBuilder[Request, A] = { - new ActionBuilder[Request, A] { - override def parser: BodyParser[A] = BodyParser { requestHeader => - val jsonValidatedParser = jsonBodyParser[A] - - val accumulator = apiKeyFilter.filter(requestHeader).map { - case Some(result) => Accumulator.done(Left(result)) - case None => jsonValidatedParser(requestHeader) - } - - Accumulator.flatten(accumulator) - } - - override protected def executionContext: ExecutionContext = ProductionSecureApiAction.this.executionContext - - override def invokeBlock[B](request: Request[B], block: Request[B] => Future[Result]): Future[Result] = { - block(request) - } - } - } - - /** - * Invokes the provided block of request handling logic after applying the API key filter to the incoming request. - * If the request passes authentication, the block is executed. Otherwise, an appropriate error response is returned. - * - * @param request the incoming HTTP request of type `A` - * @param block a function that processes the authenticated HTTP request and returns a `Future[Result]` - * @return a `Future[Result]` representing the HTTP response, either from the API key filter or the provided block - */ - override def invokeBlock[A](request: Request[A], block: Request[A] => Future[Result]): Future[Result] = { - apiKeyFilter.filter(request).flatMap { - case Some(result) => Future.successful(result) - case None => block(request) - } - } -} \ No newline at end of file diff --git a/app/actors/GenomicStudyUpdateActor.scala b/app/actors/GenomicStudyUpdateActor.scala deleted file mode 100644 index d7645ffc..00000000 --- a/app/actors/GenomicStudyUpdateActor.scala +++ /dev/null @@ -1,94 +0,0 @@ -package actors - -import models.domain.publications.{PublicationGenomicStudy, StudySource} -import org.apache.pekko.actor.Actor -import org.apache.pekko.stream.scaladsl.{Sink, Source} -import org.apache.pekko.stream.{Materializer, ThrottleMode} -import repositories.{BiosampleRepository, GenomicStudyRepository, PublicationBiosampleRepository, PublicationGenomicStudyRepository} -import services.GenomicStudyService - -import javax.inject.Inject -import scala.concurrent.duration.* -import scala.concurrent.{ExecutionContextExecutor, Future} - -object GenomicStudyUpdateActor { - case class UpdateStudy(accession: String, source: StudySource, publicationId: Option[Int]) - - case class UpdateResult(accession: String, success: Boolean, message: String) -} - -class GenomicStudyUpdateActor @Inject()( - genomicStudyService: GenomicStudyService, - studyRepository: GenomicStudyRepository, - biosampleRepository: BiosampleRepository, - publicationStudyRepository: PublicationGenomicStudyRepository, - publicationBiosampleRepository: PublicationBiosampleRepository - ) extends Actor { - - import GenomicStudyUpdateActor.* - - implicit val materializer: Materializer = Materializer(context.system) - implicit val ec: ExecutionContextExecutor = context.dispatcher - - // Rate limiting configuration - private val elementsPerUnit = 1 - private val perDuration = 2.second // Increased to handle NCBI rate limits - private val maxBurst = 1 - private val throttleMode = ThrottleMode.shaping - - def receive = { - case UpdateStudy(accession, source, publicationId) => - val sender = context.sender() - - Source.single(accession) - .throttle(elementsPerUnit, perDuration, maxBurst, throttleMode) - .mapAsync(1) { acc => - updateStudy(acc, source, publicationId) - } - .runWith(Sink.head) - .foreach(result => sender ! result) - } - - private def updateStudy(accession: String, source: StudySource, publicationId: Option[Int]) = { - (for { - studyOpt <- genomicStudyService.getStudyDetails(accession) - result <- studyOpt match { - case Some(study) => - for { - savedStudy <- studyRepository.saveStudy(study) - biosamples <- genomicStudyService.getBiosamplesForStudy(accession) - savedBiosamples <- if (biosamples.nonEmpty) { - biosampleRepository.upsertMany(biosamples) - } else Future.successful(Seq.empty) - _ <- publicationId match { - case Some(pubId) => - for { - _ <- publicationStudyRepository.create(PublicationGenomicStudy( - publicationId = pubId, - studyId = savedStudy.id.get - )) - _ <- Future.sequence( - savedBiosamples.flatMap(_.id).map { biosampleId => - publicationBiosampleRepository.create( - models.domain.publications.PublicationBiosample(pubId, biosampleId) - ) - } - ) - } yield () - case None => Future.successful(()) - } - } yield UpdateResult( - accession, - true, - s"${source} study updated successfully with ${savedBiosamples.size} biosamples" + - publicationId.map(id => s" and linked to publication $id").getOrElse("") - ) - case None => - Future.successful(UpdateResult(accession, false, s"No data found in ${source}")) - } - } yield result).recover { - case e: Exception => - UpdateResult(accession, false, s"Failed to process: ${e.getMessage}") - } - } -} \ No newline at end of file diff --git a/app/actors/MatchDiscoveryActor.scala b/app/actors/MatchDiscoveryActor.scala deleted file mode 100644 index 237b2a82..00000000 --- a/app/actors/MatchDiscoveryActor.scala +++ /dev/null @@ -1,31 +0,0 @@ -package actors - -import jakarta.inject.Inject -import org.apache.pekko.actor.Actor -import play.api.Logging -import services.ibd.{MatchDiscoveryService, PopulationAnalysisService} - -import scala.concurrent.ExecutionContext - -object MatchDiscoveryActor { - case object RunDiscovery -} - -class MatchDiscoveryActor @Inject()( - populationAnalysisService: PopulationAnalysisService, - matchDiscoveryService: MatchDiscoveryService -)(implicit ec: ExecutionContext) extends Actor with Logging { - - import MatchDiscoveryActor.* - - override def receive: Receive = { - case RunDiscovery => - logger.info("Starting match discovery computation") - for { - overlapCount <- populationAnalysisService.computeAllOverlapScores() - suggestionCount <- matchDiscoveryService.generateSuggestions() - } yield { - logger.info(s"Match discovery complete: $overlapCount overlap scores computed, $suggestionCount suggestions generated") - } - } -} diff --git a/app/actors/PublicationDiscoveryActor.scala b/app/actors/PublicationDiscoveryActor.scala deleted file mode 100644 index de62f5cc..00000000 --- a/app/actors/PublicationDiscoveryActor.scala +++ /dev/null @@ -1,30 +0,0 @@ -package actors - -import jakarta.inject.Inject -import org.apache.pekko.actor.Actor -import play.api.Logging -import services.PublicationDiscoveryService - -import scala.concurrent.ExecutionContext - -object PublicationDiscoveryActor { - case object RunDiscovery -} - -class PublicationDiscoveryActor @Inject()( - discoveryService: PublicationDiscoveryService - )(implicit ec: ExecutionContext) extends Actor with Logging { - - import PublicationDiscoveryActor._ - - override def receive: Receive = { - case RunDiscovery => - logger.info("Received RunDiscovery message") - discoveryService.runDiscovery().map { _ => - logger.info("Discovery run finished successfully") - }.recover { - case e: Exception => - logger.error(s"Discovery run failed: ${e.getMessage}", e) - } - } -} diff --git a/app/actors/PublicationUpdateActor.scala b/app/actors/PublicationUpdateActor.scala deleted file mode 100644 index cb7ab7d4..00000000 --- a/app/actors/PublicationUpdateActor.scala +++ /dev/null @@ -1,139 +0,0 @@ -package actors - -import org.apache.pekko.actor.Actor -import play.api.Logging -import repositories.PublicationRepository -import services.OpenAlexService - -import scala.concurrent.duration.* -import scala.concurrent.{ExecutionContext, Future} - -// Pekko Streams imports -import org.apache.pekko.stream.scaladsl.{Sink, Source} -import org.apache.pekko.stream.{Materializer, ThrottleMode} - -/** Companion object for PublicationUpdateActor containing message types and result cases - */ -object PublicationUpdateActor { - /** Message to trigger update of all publications in the system */ - case object UpdateAllPublications - - case class UpdateSinglePublication(doi: String) - - /** Represents the result of a single publication update operation - * - * @param doi The DOI (Digital Object Identifier) of the publication that was processed - * @param success Whether the update operation was successful - * @param message A descriptive message about the result of the operation - */ - case class UpdateResult(doi: String, success: Boolean, message: String) -} - -/** Actor responsible for managing publication updates from OpenAlex API with rate limiting - * - * This actor handles the periodic updating of publication data from OpenAlex, - * implementing rate limiting to respect API constraints (10 requests per second). - * It uses Pekko Streams for efficient processing and backpressure management. - * - * Rate limiting is implemented using Pekko Streams throttle with: - * - 1 request per 150ms (approximately 6.67 requests per second) - * - No initial burst - * - Shaping mode for consistent spacing of requests - * - * The actor processes publications sequentially to maintain strict rate limiting - * and logs all operations for monitoring and debugging purposes. - * - * @param openAlexService Service for fetching publication data from OpenAlex - * @param publicationRepository Repository for storing and retrieving publication data - * @param ec Implicit execution context for Future operations - */ - -class PublicationUpdateActor @javax.inject.Inject()( - openAlexService: OpenAlexService, - publicationRepository: PublicationRepository - )(implicit ec: ExecutionContext) extends Actor with Logging { - - import PublicationUpdateActor.* - - // Materializer is needed to run Pekko Streams. - // It's often implicitly available in Actor contexts via context.system, but good to be explicit. - implicit val materializer: Materializer = Materializer(context.system) - - // Rate limit for OpenAlex: 10 requests per second. - // We'll aim for 1 element per 150 milliseconds to be safe. - private val elementsPerUnit = 1 - private val perDuration = 150.millis - private val maxBurst = 1 // No initial burst needed for continuous updates - private val throttleMode = ThrottleMode.shaping // Make pauses to adhere to rate - - override def receive: Receive = { - case UpdateAllPublications => - logger.info("PublicationUpdateActor: Starting scheduled update of all publications using Pekko Streams throttle.") - val senderRef = sender() // Capture the sender (scheduler) for potential reply - - (for { - dois <- publicationRepository.getAllDois - results <- Source(dois.toList) - .throttle(elementsPerUnit, perDuration, maxBurst, throttleMode) - .mapAsync(1) { doi => - for { - publicationOpt <- openAlexService.fetchAndMapPublicationByDOI(doi) - result <- publicationOpt match { - case Some(updatedPublication) => - (for { - _ <- publicationRepository.savePublication(updatedPublication) - _ = logger.debug(s"Updated publication for DOI: $doi") - } yield UpdateResult(doi, success = true, "Updated")) - .recover { - case e: Exception => - logger.error(s"Failed to save updated publication for DOI '$doi': ${e.getMessage}", e) - UpdateResult(doi, success = false, s"DB save error: ${e.getMessage}") - } - case None => - logger.warn(s"Could not re-fetch data for DOI: $doi from OpenAlex.") - Future.successful(UpdateResult(doi, success = false, "OpenAlex fetch failed")) - } - } yield result - } - .runWith(Sink.fold(Seq.empty[UpdateResult])((acc, elem) => acc :+ elem)) - _ = { - val successful = results.count(_.success) - val failed = results.count(!_.success) - logger.info(s"PublicationUpdateActor: Update cycle finished. Successfully updated: $successful, Failed: $failed.") - senderRef ! "Update complete" - } - } yield ()) recover { - case e: Exception => - logger.error(s"PublicationUpdateActor: Error during overall update process: ${e.getMessage}", e) - senderRef ! s"Update failed: ${e.getMessage}" - } - - case UpdateSinglePublication(doi) => - logger.info(s"Received request to update single publication with DOI: $doi") - - val senderRef = sender() // capture the sender - - Source.single(doi) - .throttle(elementsPerUnit, perDuration, maxBurst, throttleMode) - .mapAsync(1) { doi => - for { - publicationOpt <- openAlexService.fetchAndMapPublicationByDOI(doi) - result <- publicationOpt match { - case Some(publication) => - publicationRepository.savePublication(publication) - .map(_ => UpdateResult(doi, success = true, "Publication updated successfully")) - .recover { - case e: Exception => - logger.error(s"Failed to save publication for DOI '$doi': ${e.getMessage}", e) - UpdateResult(doi, success = false, s"Failed to save: ${e.getMessage}") - } - case None => - logger.warn(s"No publication data found for DOI: $doi") - Future.successful(UpdateResult(doi, success = false, "No data found in OpenAlex")) - } - } yield result - } - .runWith(Sink.head) - .map(result => senderRef ! result) - } -} \ No newline at end of file diff --git a/app/actors/VariantExportActor.scala b/app/actors/VariantExportActor.scala deleted file mode 100644 index cb03114d..00000000 --- a/app/actors/VariantExportActor.scala +++ /dev/null @@ -1,74 +0,0 @@ -package actors - -import org.apache.pekko.actor.Actor -import play.api.Logging -import services.{ExportResult, VariantExportService} - -import scala.concurrent.ExecutionContext -import scala.util.{Failure, Success} - -object VariantExportActor { - case object RunExport - private case class ExportComplete(result: ExportResult) -} - -/** - * Actor responsible for generating daily variant export files. - * - * This actor: - * 1. Generates a gzipped JSONL file of all variants - * 2. Writes metadata about the export - * 3. Handles concurrent request protection - */ -class VariantExportActor @javax.inject.Inject()( - variantExportService: VariantExportService -)(implicit ec: ExecutionContext) extends Actor with Logging { - - import VariantExportActor.* - import org.apache.pekko.actor.ActorRef - - override def receive: Receive = idle - - private def idle: Receive = { - case RunExport => - logger.info("VariantExportActor: Starting variant export generation") - val senderRef = sender() - - context.become(running(senderRef)) - - variantExportService.generateExport().onComplete { - case Success(result) => - if (result.success) { - logger.info(s"VariantExportActor: Export completed - ${result.variantCount} variants, ${result.fileSizeBytes / 1024 / 1024}MB") - } else { - logger.error(s"VariantExportActor: Export failed - ${result.error.getOrElse("Unknown error")}") - } - self ! ExportComplete(result) - case Failure(ex) => - logger.error(s"VariantExportActor: Export failed unexpectedly - ${ex.getMessage}", ex) - self ! ExportComplete(ExportResult( - success = false, - variantCount = 0, - fileSizeBytes = 0, - generationTimeMs = 0, - error = Some(ex.getMessage) - )) - } - } - - private def running(originalSender: ActorRef): Receive = { - case RunExport => - logger.warn("VariantExportActor: Export already in progress, ignoring request") - sender() ! ExportResult( - success = false, - variantCount = 0, - fileSizeBytes = 0, - generationTimeMs = 0, - error = Some("Export already in progress") - ) - - case ExportComplete(result) => - originalSender ! result - context.become(idle) - } -} diff --git a/app/actors/YBrowseVariantUpdateActor.scala b/app/actors/YBrowseVariantUpdateActor.scala deleted file mode 100644 index 8b1e64c9..00000000 --- a/app/actors/YBrowseVariantUpdateActor.scala +++ /dev/null @@ -1,147 +0,0 @@ -package actors - -import config.GenomicsConfig -import org.apache.pekko.actor.Actor -import play.api.Logging -import services.genomics.YBrowseVariantIngestionService - -import java.io.{BufferedInputStream, FileOutputStream} -import java.net.{HttpURLConnection, URI} -import java.nio.file.Files -import scala.concurrent.{ExecutionContext, Future} -import scala.util.{Failure, Success, Try} - -object YBrowseVariantUpdateActor { - case object RunUpdate - case class UpdateResult(success: Boolean, variantsIngested: Int, message: String) - private case class UpdateComplete(result: UpdateResult) -} - -/** - * Actor responsible for downloading and ingesting Y-DNA SNP data from YBrowse. - * - * This actor: - * 1. Downloads the VCF file from ybrowse.org - * 2. Stores it at the configured local path - * 3. Triggers variant ingestion via YBrowseVariantIngestionService - */ -class YBrowseVariantUpdateActor @javax.inject.Inject()( - genomicsConfig: GenomicsConfig, - ingestionService: YBrowseVariantIngestionService -)(implicit ec: ExecutionContext) extends Actor with Logging { - - import YBrowseVariantUpdateActor.* - import org.apache.pekko.actor.ActorRef - - // Idle state - ready to accept updates - override def receive: Receive = idle - - private def idle: Receive = { - case RunUpdate => - logger.info("YBrowseVariantUpdateActor: Starting YBrowse variant update") - val senderRef = sender() - - // Switch to running state to reject concurrent requests - context.become(running(senderRef)) - - runUpdate().onComplete { - case Success(result) => - logger.info(s"YBrowseVariantUpdateActor: Update completed - ${result.message}") - self ! UpdateComplete(result) - case Failure(ex) => - logger.error(s"YBrowseVariantUpdateActor: Update failed - ${ex.getMessage}", ex) - self ! UpdateComplete(UpdateResult(success = false, variantsIngested = 0, s"Update failed: ${ex.getMessage}")) - } - } - - // Running state - reject new requests while update is in progress - private def running(originalSender: ActorRef): Receive = { - case RunUpdate => - logger.warn("YBrowseVariantUpdateActor: Update already in progress, rejecting request") - sender() ! UpdateResult(success = false, variantsIngested = 0, "Update already in progress. Please wait for the current update to complete.") - - case UpdateComplete(result) => - originalSender ! result - context.become(idle) - } - - private def runUpdate(): Future[UpdateResult] = { - Future { - downloadGffFile() - }.flatMap { - case Success(_) => - logger.info("GFF file downloaded successfully, starting ingestion") - ingestionService.ingestGff(genomicsConfig.ybrowseGffStoragePath).map { count => - UpdateResult(success = true, variantsIngested = count, s"Successfully ingested $count variants from GFF") - } - case Failure(ex) => - Future.successful(UpdateResult(success = false, variantsIngested = 0, s"Download failed: ${ex.getMessage}")) - } - } - - private def downloadGffFile(): Try[Unit] = Try { - val url = URI.create(genomicsConfig.ybrowseGffUrl).toURL - val targetFile = genomicsConfig.ybrowseGffStoragePath - - // Check for fresh local file (cache for 24 hours) - val cacheDuration = 24 * 60 * 60 * 1000L // 24 hours in millis - if (targetFile.exists() && (System.currentTimeMillis() - targetFile.lastModified() < cacheDuration)) { - logger.info(s"Local GFF file is fresh (< 24 hours old), skipping download: ${targetFile.getAbsolutePath}") - } else { - // Ensure parent directory exists - val parentDir = targetFile.getParentFile - if (parentDir != null && !parentDir.exists()) { - Files.createDirectories(parentDir.toPath) - logger.info(s"Created directory: ${parentDir.getAbsolutePath}") - } - - // Download to a temp file first, then rename (atomic operation) - val tempFile = new java.io.File(targetFile.getAbsolutePath + ".tmp") - - logger.info(s"Downloading GFF from ${genomicsConfig.ybrowseGffUrl} to ${tempFile.getAbsolutePath}") - - val connection = url.openConnection().asInstanceOf[HttpURLConnection] - connection.setRequestMethod("GET") - connection.setConnectTimeout(30000) // 30 seconds - connection.setReadTimeout(300000) // 5 minutes for large file - - try { - val responseCode = connection.getResponseCode - if (responseCode != HttpURLConnection.HTTP_OK) { - throw new RuntimeException(s"HTTP request failed with status $responseCode") - } - - val inputStream = new BufferedInputStream(connection.getInputStream) - val outputStream = new FileOutputStream(tempFile) - - try { - val buffer = new Array[Byte](8192) - var bytesRead = 0 - var totalBytes = 0L - - while ({ bytesRead = inputStream.read(buffer); bytesRead != -1 }) { - outputStream.write(buffer, 0, bytesRead) - totalBytes += bytesRead - } - - logger.info(s"Downloaded $totalBytes bytes") - } finally { - inputStream.close() - outputStream.close() - } - - // Atomic rename - if (targetFile.exists()) { - targetFile.delete() - } - if (!tempFile.renameTo(targetFile)) { - throw new RuntimeException(s"Failed to rename temp file to ${targetFile.getAbsolutePath}") - } - - logger.info(s"GFF file saved to ${targetFile.getAbsolutePath}") - } finally { - connection.disconnect() - } - } - } -} diff --git a/app/api/CoverageEndpoints.scala b/app/api/CoverageEndpoints.scala deleted file mode 100644 index b6b5bd55..00000000 --- a/app/api/CoverageEndpoints.scala +++ /dev/null @@ -1,36 +0,0 @@ -package api - -import models.domain.genomics.CoverageBenchmark -import sttp.tapir.* -import sttp.tapir.generic.auto.* -import sttp.tapir.json.play.* - -/** - * Defines API endpoints for coverage-related operations. - * - * This object provides Tapir endpoint definitions for retrieving coverage benchmark statistics. - */ -object CoverageEndpoints { - - /** - * Endpoint for retrieving coverage benchmark statistics grouped by lab, test type, and contig. - * - * Returns aggregated statistics including mean, min, max values for read length and insert size, - * along with coverage metrics and their standard deviations for calculating 95% confidence intervals. - */ - private val getBenchmarks: PublicEndpoint[Unit, String, List[CoverageBenchmark], Any] = { - endpoint - .get - .in("api" / "v1" / "coverage" / "benchmarks") - .out(jsonBody[List[CoverageBenchmark]]) - .errorOut(stringBody) - .description("Returns aggregated coverage benchmark statistics grouped by lab, test type, and contig. " + - "Standard deviation values are provided to calculate 95% confidence intervals when there is more than one sample in the group.") - .summary("Get coverage benchmark statistics") - .tag("Coverage") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - getBenchmarks - ) -} \ No newline at end of file diff --git a/app/api/FirehoseEndpoints.scala b/app/api/FirehoseEndpoints.scala deleted file mode 100644 index cb7f9a7f..00000000 --- a/app/api/FirehoseEndpoints.scala +++ /dev/null @@ -1,29 +0,0 @@ -package api - -import models.api.{BiosampleOperationResponse, ExternalBiosampleRequest} -import services.firehose.{FirehoseEvent, FirehoseResult} -import sttp.tapir.* -import sttp.tapir.generic.auto.* -import sttp.tapir.json.play.* -import play.api.libs.json.JsValue - -import java.util.UUID - -object FirehoseEndpoints { - - private val processEvent: PublicEndpoint[JsValue, String, JsValue, Any] = { - endpoint - .post - .in("api" / "firehose" / "event") - .in(jsonBody[JsValue]) - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .description("Process a generic Atmosphere Lexicon event (Create/Update/Delete for any record type).") - .summary("Process Atmosphere Event") - .tag("Atmosphere Firehose") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - processEvent - ) -} diff --git a/app/api/GenomeRegionsEndpoints.scala b/app/api/GenomeRegionsEndpoints.scala deleted file mode 100644 index 72cf152a..00000000 --- a/app/api/GenomeRegionsEndpoints.scala +++ /dev/null @@ -1,71 +0,0 @@ -package api - -import models.api.genomics.* -import sttp.model.StatusCode -import sttp.tapir.* -import sttp.tapir.generic.auto.* -import sttp.tapir.json.play.* - -/** - * Tapir endpoint definitions for the Genome Regions API. - * Provides OpenAPI documentation for reference genome structural annotations. - */ -object GenomeRegionsEndpoints { - - /** - * List supported genome builds. - * GET /api/v1/genome-regions - */ - val listBuilds: PublicEndpoint[Unit, String, SupportedBuildsResponse, Any] = { - endpoint - .get - .in("api" / "v1" / "genome-regions") - .out(jsonBody[SupportedBuildsResponse]) - .errorOut(stringBody) - .description("Returns a list of supported reference genome builds and the current data version.") - .summary("List supported genome builds") - .tag("Genome Regions") - } - - /** - * Get genome regions for a specific build. - * GET /api/v1/genome-regions/{build} - */ - val getRegions: PublicEndpoint[String, GenomeRegionsError, GenomeRegionsResponse, Any] = { - endpoint - .get - .in("api" / "v1" / "genome-regions" / path[String]("build") - .description("Reference genome build (e.g., GRCh38, hg38, GRCh37, hg19, hs1, chm13)")) - .out(jsonBody[GenomeRegionsResponse]) - .errorOut( - statusCode(StatusCode.NotFound) - .and(jsonBody[GenomeRegionsError]) - ) - .description( - """Returns chromosome region data for a specific reference genome build. - | - |**Supported builds:** - |- GRCh38 (alias: hg38) - NCBI human reference genome assembly - |- GRCh37 (alias: hg19) - Legacy NCBI assembly - |- hs1 (aliases: chm13, T2T-CHM13) - Telomere-to-telomere assembly - | - |**Response includes:** - |- Chromosome lengths - |- Centromere positions - |- Telomere positions - |- Cytoband annotations (Giemsa staining) - |- Y-chromosome specific regions (PAR, XTR, ampliconic, etc.) - |- STR marker positions with verification status - | - |**Caching:** - |Responses are cached for 7 days. Use the ETag header for conditional requests. - |""".stripMargin) - .summary("Get genome regions by build") - .tag("Genome Regions") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - listBuilds, - getRegions - ) -} diff --git a/app/api/HaplogroupEndpoints.scala b/app/api/HaplogroupEndpoints.scala deleted file mode 100644 index e7027696..00000000 --- a/app/api/HaplogroupEndpoints.scala +++ /dev/null @@ -1,47 +0,0 @@ -package api - -import models.api.* -import sttp.tapir.* -import sttp.tapir.generic.auto.* -import sttp.tapir.json.play.* - -import java.time.ZonedDateTime - -object HaplogroupEndpoints { - - given Schema[ZonedDateTime] = Schema.string.map((str: String) => - try Some(ZonedDateTime.parse(str)) - catch case _: Exception => None - )(_.toString) - - private val getYTreeEnpoint: PublicEndpoint[Option[String], String, List[SubcladeDTO], Any] = { - endpoint - .get - .in( - "api" / "v1" / "y-tree" / - query[Option[String]]("rootHaplogroup") // <--- Changed from path to query - .description("The name of the subclade to use as root") - ) - .out(jsonBody[List[SubcladeDTO]]) - .errorOut(stringBody) - .description("Returns a list of YDNA tree nodes, defining variants and date of last update.") - .summary("Returns a list of YDNA tree nodes") - .tag("Haplogroups") - } - - private val getMTreeEnpoint: PublicEndpoint[Option[String], String, List[SubcladeDTO], Any] = { - endpoint - .get - .in("api" / "v1" / "mt-tree" / query[Option[String]]("rootHaplogroup").description("The name of the subclade to use as root")) - .out(jsonBody[List[SubcladeDTO]]) - .errorOut(stringBody) - .description("Returns a list of mtDNA tree nodes, defining variants and date of last update.") - .summary("Returns a list of mtDNA tree nodes") - .tag("Haplogroups") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - getMTreeEnpoint, - getYTreeEnpoint - ) -} diff --git a/app/api/IbdRelayEndpoints.scala b/app/api/IbdRelayEndpoints.scala deleted file mode 100644 index 704e558c..00000000 --- a/app/api/IbdRelayEndpoints.scala +++ /dev/null @@ -1,35 +0,0 @@ -package api - -import play.api.libs.json.JsValue -import sttp.tapir.* -import sttp.tapir.json.play.* - -object IbdRelayEndpoints { - - private val createSession: PublicEndpoint[JsValue, String, JsValue, Any] = { - endpoint - .post - .in("api" / "v1" / "ibd" / "relay" / "session") - .in(jsonBody[JsValue]) - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Create a relay session for IBD data exchange") - .description("Requires mutual consent. Returns sessionId for WebSocket connection.") - .tag("IBD Relay") - } - - private val getSessionStatus: PublicEndpoint[String, String, JsValue, Any] = { - endpoint - .get - .in("api" / "v1" / "ibd" / "relay" / "session" / path[String]("sessionId")) - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Check relay session status") - .tag("IBD Relay") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - createSession, - getSessionStatus - ) -} diff --git a/app/api/MatchDiscoveryEndpoints.scala b/app/api/MatchDiscoveryEndpoints.scala deleted file mode 100644 index e2723f36..00000000 --- a/app/api/MatchDiscoveryEndpoints.scala +++ /dev/null @@ -1,58 +0,0 @@ -package api - -import play.api.libs.json.JsValue -import sttp.tapir.* -import sttp.tapir.json.play.* - -object MatchDiscoveryEndpoints { - - private val getSuggestions: PublicEndpoint[(Option[String], Int, String), String, JsValue, Any] = { - endpoint - .get - .in("api" / "v1" / "discovery" / "suggestions") - .in(query[Option[String]]("type").description("Filter by suggestion type")) - .in(query[Int]("limit").default(20).description("Max results")) - .in(query[String]("sampleGuid").description("Target sample GUID")) - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Get match suggestions for a sample") - .tag("Match Discovery") - } - - private val dismissSuggestion: PublicEndpoint[Long, String, JsValue, Any] = { - endpoint - .post - .in("api" / "v1" / "discovery" / "suggestions" / path[Long]("id") / "dismiss") - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Dismiss a match suggestion") - .tag("Match Discovery") - } - - private val getPopulationBreakdown: PublicEndpoint[String, String, JsValue, Any] = { - endpoint - .get - .in("api" / "v1" / "discovery" / "population" / path[String]("sampleGuid")) - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Get population breakdown for a sample") - .tag("Match Discovery") - } - - private val getPopulationOverlap: PublicEndpoint[(String, String), String, JsValue, Any] = { - endpoint - .get - .in("api" / "v1" / "discovery" / "population" / "overlap" / path[String]("guid1") / path[String]("guid2")) - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Get population overlap score between two samples") - .tag("Match Discovery") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - getSuggestions, - dismissSuggestion, - getPopulationBreakdown, - getPopulationOverlap - ) -} diff --git a/app/api/MatchRequestEndpoints.scala b/app/api/MatchRequestEndpoints.scala deleted file mode 100644 index 8292670b..00000000 --- a/app/api/MatchRequestEndpoints.scala +++ /dev/null @@ -1,80 +0,0 @@ -package api - -import play.api.libs.json.JsValue -import sttp.tapir.* -import sttp.tapir.json.play.* - -object MatchRequestEndpoints { - - private val createRequest: PublicEndpoint[JsValue, String, JsValue, Any] = { - endpoint - .post - .in("api" / "v1" / "matches" / "request") - .in(jsonBody[JsValue]) - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Create a match request") - .tag("Match Requests") - } - - private val getPendingRequests: PublicEndpoint[String, String, JsValue, Any] = { - endpoint - .get - .in("api" / "v1" / "matches" / "requests" / "pending") - .in(query[String]("sampleGuid").description("Sample GUID to check pending requests for")) - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Get pending match requests for a sample") - .tag("Match Requests") - } - - private val getSentRequests: PublicEndpoint[Unit, String, JsValue, Any] = { - endpoint - .get - .in("api" / "v1" / "matches" / "requests" / "sent") - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Get sent match requests") - .tag("Match Requests") - } - - private val cancelRequest: PublicEndpoint[String, String, JsValue, Any] = { - endpoint - .post - .in("api" / "v1" / "matches" / "requests" / path[String]("uri") / "cancel") - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Cancel a match request") - .tag("Match Requests") - } - - private val submitConsent: PublicEndpoint[JsValue, String, JsValue, Any] = { - endpoint - .post - .in("api" / "v1" / "matches" / "consent") - .in(jsonBody[JsValue]) - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Submit match consent") - .tag("Match Requests") - } - - private val getConsentStatus: PublicEndpoint[String, String, JsValue, Any] = { - endpoint - .get - .in("api" / "v1" / "matches" / "consent" / "status" / path[String]("requestUri")) - .out(jsonBody[JsValue]) - .errorOut(stringBody) - .summary("Get consent status for a match request") - .tag("Match Requests") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - createRequest, - getPendingRequests, - getSentRequests, - cancelRequest, - submitConsent, - getConsentStatus - ) -} diff --git a/app/api/PDSRegistrationEndpoints.scala b/app/api/PDSRegistrationEndpoints.scala deleted file mode 100644 index fdfb1a17..00000000 --- a/app/api/PDSRegistrationEndpoints.scala +++ /dev/null @@ -1,33 +0,0 @@ -package api - -import models.PDSRegistration -import play.api.libs.json.{Format, Json} -import sttp.tapir.* -import sttp.tapir.generic.auto.* -import sttp.tapir.json.play.* - -// --- DTOs (Data Transfer Objects) --- -case class PdsRegistrationRequest( - did: String, - handle: String, - pdsUrl: String, - rToken: String - ) - -object PdsRegistrationRequest { - implicit val format: Format[PdsRegistrationRequest] = Json.format[PdsRegistrationRequest] -} - -object PDSRegistrationEndpoints { - - val registerPdsEndpoint: PublicEndpoint[PdsRegistrationRequest, String, PDSRegistration, Any] = - endpoint.post - .in("api" / "registerPDS") - .name("Register PDS") - .description("Registers a new PDS (Personal Data Server) with the system.") - .in(jsonBody[PdsRegistrationRequest]) - .out(jsonBody[PDSRegistration]) - .errorOut(stringBody) - - val all = List(registerPdsEndpoint) -} diff --git a/app/api/ProjectEndpoints.scala b/app/api/ProjectEndpoints.scala deleted file mode 100644 index a85a97e0..00000000 --- a/app/api/ProjectEndpoints.scala +++ /dev/null @@ -1,55 +0,0 @@ -package api - -import models.api.{ProjectRequest, ProjectResponse} -import sttp.tapir.* -import sttp.tapir.generic.auto.* -import sttp.tapir.json.play.* - -import java.util.UUID - -object ProjectEndpoints { - - private val createProject: PublicEndpoint[ProjectRequest, String, ProjectResponse, Any] = { - endpoint - .post - .in("api" / "projects") - .in(jsonBody[ProjectRequest]) - .out(jsonBody[ProjectResponse]) - .errorOut(stringBody) - .description("Creates a new Project. (Deprecated: Use /api/firehose/event)") - .summary("Create Project (Legacy)") - .tag("Projects") - .deprecated() - } - - private val updateProject: PublicEndpoint[(String, ProjectRequest), String, ProjectResponse, Any] = { - endpoint - .put - .in("api" / "projects" / path[String]("atUri")) - .in(jsonBody[ProjectRequest]) - .out(jsonBody[ProjectResponse]) - .errorOut(stringBody) - .description("Updates an existing Project using Optimistic Locking (via atCid). (Deprecated: Use /api/firehose/event)") - .summary("Update Project (Legacy)") - .tag("Projects") - .deprecated() - } - - private val deleteProject: PublicEndpoint[String, String, Unit, Any] = { - endpoint - .delete - .in("api" / "projects" / path[String]("atUri")) - .out(statusCode(sttp.model.StatusCode.NoContent)) - .errorOut(stringBody) - .description("Soft deletes a Project. (Deprecated: Use /api/firehose/event)") - .summary("Delete Project (Legacy)") - .tag("Projects") - .deprecated() - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - createProject, - updateProject, - deleteProject - ) -} diff --git a/app/api/ReferenceEndpoints.scala b/app/api/ReferenceEndpoints.scala deleted file mode 100644 index 632fa7c4..00000000 --- a/app/api/ReferenceEndpoints.scala +++ /dev/null @@ -1,43 +0,0 @@ -package api - -import models.api.{BiosampleWithOrigin, PublicationWithEnaStudiesAndSampleCount} -import play.api.libs.json.* -import sttp.tapir.* -import sttp.tapir.generic.auto.* -import sttp.tapir.json.play.* - -import java.time.LocalDate - -object ReferenceEndpoints { - given Schema[LocalDate] = Schema.string.map((str: String) => - try Some(LocalDate.parse(str)) - catch case _: Exception => None - )(_.toString) - - val getReferenceDetailsEndpoint: PublicEndpoint[Unit, String, List[PublicationWithEnaStudiesAndSampleCount], Any] = { - endpoint - .get - .in("api" / "v1" / "references" / "details") - .out(jsonBody[List[PublicationWithEnaStudiesAndSampleCount]]) - .errorOut(stringBody) - .description("Returns a list of reference details, including publication information, ENA studies, and sample counts.") - .summary("Retrieve details for references") - .tag("References") - } - - val getReferenceBiosamplesEndpoint: PublicEndpoint[Int, String, List[BiosampleWithOrigin], Any] = { - endpoint - .get - .in("api" / "v1" / "references" / "details" / path[Int]("publicationId") / "biosamples") - .out(jsonBody[List[BiosampleWithOrigin]]) - .errorOut(stringBody) - .description("Returns a list of biosamples associated with a specific publication.") - .summary("Retrieve biosamples for a publication") - .tag("References") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - getReferenceDetailsEndpoint, - getReferenceBiosamplesEndpoint - ) -} diff --git a/app/api/SampleEndpoints.scala b/app/api/SampleEndpoints.scala deleted file mode 100644 index 77a2dd16..00000000 --- a/app/api/SampleEndpoints.scala +++ /dev/null @@ -1,23 +0,0 @@ -package api - -import models.api.SampleWithStudies -import sttp.tapir.* -import sttp.tapir.generic.auto.* -import sttp.tapir.json.play.* - -object SampleEndpoints { - private val getSamplesWithStudies: PublicEndpoint[Unit, String, List[SampleWithStudies], Any] = { - endpoint - .get - .in("api" / "v1" / "biosample" / "studies") - .out(jsonBody[List[SampleWithStudies]]) - .errorOut(stringBody) - .description("Returns a list of samples with their associated studies and haplogroup assignments") - .summary("Get samples with studies and haplogroup assignments") - .tag("Biosamples") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - getSamplesWithStudies - ) -} \ No newline at end of file diff --git a/app/api/SequencerEndpoints.scala b/app/api/SequencerEndpoints.scala deleted file mode 100644 index 2c6ab4db..00000000 --- a/app/api/SequencerEndpoints.scala +++ /dev/null @@ -1,39 +0,0 @@ -package api - -import models.api.{SequencerLabInfo, SequencerLabInstrumentsResponse} -import sttp.tapir.* -import sttp.tapir.generic.auto.* -import sttp.tapir.json.play.* - -object SequencerEndpoints { - - private val getLabByInstrumentId: PublicEndpoint[String, String, SequencerLabInfo, Any] = { - endpoint - .get - .in("api" / "v1" / "sequencer" / "lab") - .in(query[String]("instrument_id") - .description("The unique instrument ID from BAM/CRAM read headers (e.g., 'A00123')") - .example("A00123")) - .out(jsonBody[SequencerLabInfo]) - .errorOut(stringBody) - .description("Returns sequencing lab information for a given instrument ID extracted from BAM/CRAM headers") - .summary("Get lab info by instrument ID") - .tag("Sequencer") - } - - private val getAllLabInstruments: PublicEndpoint[Unit, String, SequencerLabInstrumentsResponse, Any] = { - endpoint - .get - .in("api" / "v1" / "sequencer" / "lab-instruments") - .out(jsonBody[SequencerLabInstrumentsResponse]) - .errorOut(stringBody) - .description("Returns all lab-instrument associations") - .summary("Get all lab-instrument associations") - .tag("Sequencer") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - getLabByInstrumentId, - getAllLabInstruments - ) -} diff --git a/app/api/VariantEndpoints.scala b/app/api/VariantEndpoints.scala deleted file mode 100644 index 4c388605..00000000 --- a/app/api/VariantEndpoints.scala +++ /dev/null @@ -1,136 +0,0 @@ -package api - -import models.api.* -import services.ExportMetadata -import sttp.tapir.* -import sttp.tapir.generic.auto.* -import sttp.tapir.json.play.* - -/** - * Tapir endpoint definitions for the public Variant API. - * - * Provides searchable access to the variant database with pagination. - * Response format is designed to be forward-compatible with the - * proposed variant_v2 schema (see variant-schema-simplification.md). - */ -object VariantEndpoints { - - /** - * Search variants by name, rsId, or alias. - * - * GET /api/v1/variants?query=M269&page=1&pageSize=25 - */ - val searchVariants: PublicEndpoint[(Option[String], Int, Int), String, VariantSearchResponse, Any] = { - endpoint - .get - .in("api" / "v1" / "variants") - .in(query[Option[String]]("query").description("Search term (name, rsId, or alias). Leave empty to browse all.")) - .in(query[Int]("page").default(1).description("Page number (1-based)")) - .in(query[Int]("pageSize").default(25).validate(Validator.inRange(1, 100)).description("Items per page (max 100)")) - .out(jsonBody[VariantSearchResponse]) - .errorOut(stringBody) - .description( - """Search the variant database by name, rsId, or alias. - | - |Results include: - |- Canonical name (primary identifier) - |- Coordinates across reference assemblies (GRCh37, GRCh38, hs1) - |- Per-assembly alleles (handles strand differences between assemblies) - |- All known aliases grouped by source - |- Defining haplogroup (if this variant defines a branch) - | - |**Note**: The same variant name may appear multiple times if it represents - |parallel mutations in different lineages (e.g., L21 in R1b vs L21 in I2). - |""".stripMargin) - .summary("Search variants") - .tag("Variants") - } - - /** - * Get a single variant by ID. - * - * GET /api/v1/variants/:id - */ - val getVariantById: PublicEndpoint[Int, String, PublicVariantDTO, Any] = { - endpoint - .get - .in("api" / "v1" / "variants" / path[Int]("variantId").description("Variant ID")) - .out(jsonBody[PublicVariantDTO]) - .errorOut(stringBody) - .description("Get detailed information for a specific variant by its ID.") - .summary("Get variant by ID") - .tag("Variants") - } - - /** - * Get all variants defining a specific haplogroup. - * - * GET /api/v1/haplogroups/:name/variants - */ - val getVariantsByHaplogroup: PublicEndpoint[String, String, Seq[PublicVariantDTO], Any] = { - endpoint - .get - .in("api" / "v1" / "haplogroups" / path[String]("haplogroupName").description("Haplogroup name (e.g., R-M269)") / "variants") - .out(jsonBody[Seq[PublicVariantDTO]]) - .errorOut(stringBody) - .description("Get all variants that define a specific haplogroup.") - .summary("Get variants by haplogroup") - .tag("Variants") - } - - /** - * Download full variant export (gzipped JSONL). - * - * GET /api/v1/variants/export - * - * Note: This endpoint returns a binary file download, not JSON. - */ - val downloadExport: PublicEndpoint[Unit, String, String, Any] = { - endpoint - .get - .in("api" / "v1" / "variants" / "export") - .out(header[String]("Content-Type")) - .errorOut(stringBody) - .description( - """Download the complete variant database as a gzipped JSONL file. - | - |This export is regenerated daily at 4 AM UTC and includes all variants - |in the full PublicVariantDTO format. The file is typically 100-200MB compressed. - | - |Each line is a JSON object representing one variant group. - | - |**Response:** Binary file (application/gzip) - | - |**Use cases:** - |- Edge App novel variant annotation - |- Offline analysis - |- Data synchronization - |""".stripMargin) - .summary("Download variant export") - .tag("Variants") - } - - /** - * Get metadata about the current export file. - * - * GET /api/v1/variants/export/metadata - */ - val exportMetadata: PublicEndpoint[Unit, String, ExportMetadata, Any] = { - endpoint - .get - .in("api" / "v1" / "variants" / "export" / "metadata") - .out(jsonBody[ExportMetadata]) - .errorOut(stringBody) - .description("Get metadata about the current variant export file, including generation time and variant count.") - .summary("Get export metadata") - .tag("Variants") - } - - val all: List[PublicEndpoint[_, _, _, _]] = List( - searchVariants, - getVariantById, - getVariantsByHaplogroup, - downloadExport, - exportMetadata - ) -} diff --git a/app/config/AWSSecretsConfig.scala b/app/config/AWSSecretsConfig.scala deleted file mode 100644 index 6fe38e17..00000000 --- a/app/config/AWSSecretsConfig.scala +++ /dev/null @@ -1,13 +0,0 @@ -package config - -import play.api.Configuration -import software.amazon.awssdk.regions.Region - -import javax.inject.{Inject, Singleton} - -@Singleton -class AWSSecretsConfig @Inject()(configuration: Configuration) { - val region: Region = Region.of(configuration.get[String]("aws.region")) - val apiKeySecretName: String = configuration.get[String]("aws.secrets.apiKey.name") - val userEncryptionKeySecretName: String = configuration.get[String]("aws.secrets.userEncryptionKey.name") -} \ No newline at end of file diff --git a/app/config/FeatureFlags.scala b/app/config/FeatureFlags.scala deleted file mode 100644 index eb9dba55..00000000 --- a/app/config/FeatureFlags.scala +++ /dev/null @@ -1,25 +0,0 @@ -package config - -import jakarta.inject.{Inject, Singleton} -import play.api.Configuration - -/** - * Configuration wrapper for feature flags. - * Allows features to be enabled/disabled via application.conf. - */ -@Singleton -class FeatureFlags @Inject()(config: Configuration) { - - private val featuresConfig = config.getOptional[Configuration]("features").getOrElse(Configuration.empty) - - /** - * Show branch age estimates (Formed/TMRCA dates) on tree nodes. - * Disabled by default until age data is populated. - */ - val showBranchAgeEstimates: Boolean = featuresConfig.getOptional[Boolean]("tree.showBranchAgeEstimates").getOrElse(false) - - /** - * Show the alternative "Block Layout" (ytree.net style) for the tree. - */ - val showVerticalTree: Boolean = featuresConfig.getOptional[Boolean]("tree.showVerticalTree").getOrElse(false) -} diff --git a/app/config/GenomicsConfig.scala b/app/config/GenomicsConfig.scala deleted file mode 100644 index c2a56359..00000000 --- a/app/config/GenomicsConfig.scala +++ /dev/null @@ -1,53 +0,0 @@ -package config - -import jakarta.inject.{Inject, Singleton} -import play.api.Configuration - -import java.io.File - -/** - * Configuration wrapper for genomics-related settings. - */ -@Singleton -class GenomicsConfig @Inject()(config: Configuration) { - - private val genomicsConfig = config.get[Configuration]("genomics") - - val supportedReferences: Seq[String] = genomicsConfig.get[Seq[String]]("references.supported") - - val referenceAliases: Map[String, String] = genomicsConfig.getOptional[Map[String, String]]("references.aliases").getOrElse(Map.empty) - - val fastaPaths: Map[String, File] = genomicsConfig.get[Map[String, String]]("references.fasta_paths").map { - case (genome, path) => genome -> new File(path) - } - - // YBrowse configuration - val ybrowseGffUrl: String = genomicsConfig.get[String]("ybrowse.gff_url") - val ybrowseGffStoragePath: File = new File(genomicsConfig.get[String]("ybrowse.gff_storage_path")) - - // HipSTR configuration - val hipstrUrl: String = genomicsConfig.get[String]("hipstr.url") - val hipstrStoragePath: File = new File(genomicsConfig.get[String]("hipstr.storage_path")) - - /** - * Retrieves the path to a liftover chain file for a given source and target genome. - * - * @param source The source reference genome (e.g., "GRCh38"). - * @param target The target reference genome (e.g., "GRCh37"). - * @return An Option containing the File if the chain is configured and exists, otherwise None. - */ - def getLiftoverChainFile(source: String, target: String): Option[File] = { - val key = s"$source->$target" - genomicsConfig.getOptional[String](s"liftover.chains.\"$key\"").map(new File(_)) - } - - /** - * Resolves a genome name to its canonical form using the aliases configuration. - * - * @param name The input genome name. - * @return The canonical name if an alias exists, otherwise the input name. - */ - def resolveReferenceName(name: String): String = { - referenceAliases.getOrElse(name, name) - } -} diff --git a/app/controllers/ApiRouter.scala b/app/controllers/ApiRouter.scala deleted file mode 100644 index b2d6a7b5..00000000 --- a/app/controllers/ApiRouter.scala +++ /dev/null @@ -1,43 +0,0 @@ -package controllers - -import org.apache.pekko.stream.Materializer -import play.api.mvc.* -import play.api.routing.{Router, SimpleRouter} -import sttp.apispec.openapi.Info -import sttp.tapir.* -import sttp.tapir.server.play.PlayServerInterpreter -import sttp.tapir.swagger.bundle.SwaggerInterpreter - -import javax.inject.* -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class ApiRouter @Inject()(cc: ControllerComponents, configuration: play.api.Configuration) - (implicit ec: ExecutionContext, mat: Materializer) - extends SimpleRouter { - - // Create OpenAPI info object - private val apiInfo = Info( - title = "Decoding-Us API", - version = "1.0.0", - description = Some("API for accessing Decoding-Us data") - ) - - // Swagger docs - private val swaggerEndpoints = - SwaggerInterpreter().fromEndpoints[Future]( - endpoints = _root_.api.ReferenceEndpoints.all ++ _root_.api.HaplogroupEndpoints.all - ++ _root_.api.SampleEndpoints.all ++ _root_.api.CoverageEndpoints.all - ++ _root_.api.SequencerEndpoints.all ++ _root_.api.FirehoseEndpoints.all - ++ _root_.api.ProjectEndpoints.all ++ _root_.api.VariantEndpoints.all - ++ _root_.api.GenomeRegionsEndpoints.all - ++ _root_.api.MatchDiscoveryEndpoints.all ++ _root_.api.MatchRequestEndpoints.all - ++ _root_.api.IbdRelayEndpoints.all, - info = apiInfo - ) - - // Combine all endpoints ensuring Swagger endpoints come first - private val serverEndpoints = swaggerEndpoints ::: Nil - - override def routes: Router.Routes = PlayServerInterpreter().toRoutes(serverEndpoints) -} diff --git a/app/controllers/AuthController.scala b/app/controllers/AuthController.scala deleted file mode 100644 index 9efbbafc..00000000 --- a/app/controllers/AuthController.scala +++ /dev/null @@ -1,83 +0,0 @@ -package controllers - -import jakarta.inject.{Inject, Singleton} -import org.webjars.play.WebJarsUtil -import play.api.Logging -import play.api.data.Form -import play.api.data.Forms.* -import play.api.i18n.I18nSupport -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import services.{AuthService, LoginRateLimiter} - -import scala.concurrent.{ExecutionContext, Future} - -case class LoginData(handle: String, appPassword: String) - -@Singleton -class AuthController @Inject()( - val controllerComponents: ControllerComponents, - authService: AuthService, - userRoleRepository: repositories.UserRoleRepository, - rateLimiter: LoginRateLimiter - )(implicit ec: ExecutionContext, webJarsUtil: WebJarsUtil) extends BaseController with I18nSupport with Logging { - - val loginForm = Form( - mapping( - "handle" -> nonEmptyText(maxLength = 255), - "appPassword" -> nonEmptyText(maxLength = 255) - )(LoginData.apply)(data => Some((data.handle, data.appPassword))) - ) - - def login: Action[AnyContent] = Action { implicit request => - Ok(views.html.auth.login(loginForm)) - } - - def authenticate: Action[AnyContent] = Action.async { implicit request => - val clientIp = request.headers.get("X-Real-IP").getOrElse(request.remoteAddress) - - if (!rateLimiter.isAllowed(clientIp)) { - Future.successful( - Redirect(routes.AuthController.login) - .flashing("error" -> "Too many login attempts. Please try again later.") - ) - } else { - loginForm.bindFromRequest().fold( - formWithErrors => Future.successful(BadRequest(views.html.auth.login(formWithErrors))), - data => { - authService.login(data.handle, data.appPassword).flatMap { - case Some(user) => - rateLimiter.recordSuccess(clientIp) - // Fetch roles to store in session for UI logic - userRoleRepository.getUserRoles(user.id.get).map { roles => - val roleString = roles.mkString(",") - val displayName = user.displayName.orElse(user.handle).getOrElse("User") - Redirect(routes.HomeController.index()) - .withSession( - "userId" -> user.id.get.toString, - "userRoles" -> roleString, - "userDisplayName" -> displayName - ) - .flashing("success" -> s"Welcome back, $displayName!") - } - case None => - rateLimiter.recordFailure(clientIp) - Future.successful( - Redirect(routes.AuthController.login) - .flashing("error" -> "Invalid handle or password.") - ) - } - } - ) - } - } - - def logout: Action[AnyContent] = Action { implicit request => - Redirect(routes.AuthController.login) - .withNewSession - .flashing("success" -> "You have been logged out.") - } - - def showAppPasswordHelp(): Action[AnyContent] = Action { implicit request => - Ok(views.html.auth.appPasswordHelp()) - } -} diff --git a/app/controllers/BaseCuratorController.scala b/app/controllers/BaseCuratorController.scala deleted file mode 100644 index 0ce852f9..00000000 --- a/app/controllers/BaseCuratorController.scala +++ /dev/null @@ -1,40 +0,0 @@ -package controllers - -import actions.{AuthenticatedAction, AuthenticatedRequest, PermissionAction} -import play.api.mvc.{ActionBuilder, AnyContent} - -/** - * Base trait for curator controllers providing common functionality. - * - * Extract common patterns like permission-based action composition and - * curator ID extraction to reduce duplication across curator controllers. - */ -trait BaseCuratorController { - - /** - * Inject these in the implementing controller. - */ - protected def authenticatedAction: AuthenticatedAction - protected def permissionAction: PermissionAction - - /** - * Permission-based action composition. - * Combines authentication with permission checking. - * - * Usage: - * {{{ - * def myAction = withPermission("tree.version.view").async { implicit request => - * // action implementation - * } - * }}} - */ - protected def withPermission(permission: String): ActionBuilder[AuthenticatedRequest, AnyContent] = - authenticatedAction andThen permissionAction(permission) - - /** - * Extract curator ID from authenticated request. - * Uses email if available, otherwise falls back to user ID. - */ - protected def curatorId(request: AuthenticatedRequest[?]): String = - request.user.email.getOrElse(request.user.id.map(_.toString).getOrElse("unknown")) -} diff --git a/app/controllers/BiosampleController.scala b/app/controllers/BiosampleController.scala deleted file mode 100644 index 68716050..00000000 --- a/app/controllers/BiosampleController.scala +++ /dev/null @@ -1,87 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.Inject -import models.api.{BiosampleUpdate, BiosampleView} -import models.domain.genomics.Biosample -import play.api.libs.json.{JsValue, Json} -import play.api.mvc.{AbstractController, Action, AnyContent, ControllerComponents} -import services.BiosampleDomainService - -import scala.concurrent.{ExecutionContext, Future} - -/** - * The BiosampleController class provides HTTP endpoints for managing biosample-related - * operations, including updating biosamples, retrieving biosamples with studies, - * and searching biosamples by alias or accession. - * - * @constructor Creates a new instance of the BiosampleController class. - * @param cc the controller components used for handling HTTP requests and responses - * @param secureApi an action builder for processing secure API requests - * @param biosampleDomainService the facade service for all biosample operations - * @param ec the implicit execution context for handling asynchronous operations - */ -class BiosampleController @Inject()( - cc: ControllerComponents, - secureApi: ApiSecurityAction, - biosampleDomainService: BiosampleDomainService - )(implicit ec: ExecutionContext) extends AbstractController(cc) { - - /** - * Updates the details of a biosample based on the provided identifier and update information. - * - * @param id the identifier of the biosample to be updated - * @return an asynchronous action producing a JSON response: - * - On success, returns the updated biosample in the response. - * - On failure, returns an error message indicating the issue. - */ - def updateBiosample(id: Int): Action[JsValue] = Action.async(parse.json) { request => - secureApi.invokeBlock(request, { secureRequest => - request.body.validate[BiosampleUpdate].fold( - errors => Future.successful(BadRequest(Json.obj("error" -> "Invalid request format"))), - update => { - biosampleDomainService.updateBiosample(id, update).map { - case Right(biosample) => Ok(Json.toJson(biosample)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - ) - }) - } - - /** - * Retrieves all biosamples along with their associated studies. - * - * The method fetches data for all biosamples and enriches them with the corresponding - * study information. The data is retrieved from the `biosampleRepository` and returned - * as a JSON response. - * - * @return an asynchronous action resulting in an HTTP JSON response containing - * a list of biosamples with their associated studies - */ - def getSamplesWithStudies: Action[AnyContent] = Action.async { - biosampleDomainService.findAllWithStudies().map { - samples => - Ok(Json.toJson(samples)) - } - } - - /** - * Searches for a biosample based on the provided alias or accession identifier. - * - * The method queries the `biosampleRepository` to fetch a biosample associated with the given - * alias or accession. If found, it returns a JSON response containing the biosample details. - * Otherwise, it returns an HTTP 404 response indicating the biosample was not found. - * - * @param query the alias or accession identifier used to search for the biosample - * @return an asynchronous action resulting in an HTTP JSON response: - * - On success, returns a JSON representation of the matched biosample. - * - On failure, returns a 404 response with an error message. - */ - def findByAliasOrAccession(query: String): Action[AnyContent] = Action.async { - biosampleDomainService.findByAliasOrAccession(query).map { - case Some((biosample, specimenDonor)) => Ok(Json.toJson(BiosampleView.fromDomain(biosample, specimenDonor))) - case None => NotFound(Json.obj("error" -> "Biosample not found")) - } - } -} \ No newline at end of file diff --git a/app/controllers/BiosampleDataController.scala b/app/controllers/BiosampleDataController.scala deleted file mode 100644 index 79faa669..00000000 --- a/app/controllers/BiosampleDataController.scala +++ /dev/null @@ -1,67 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.{PublicationInfo, SequenceDataInfo} -import play.api.libs.json.Json -import play.api.mvc.{Action, BaseController, ControllerComponents} -import services.BiosampleDomainService - -import java.util.UUID -import scala.concurrent.ExecutionContext - -/** - * A controller that manages operations related to biosample data. This includes - * functionalities for adding sequence data and linking publications to specific - * biosamples. All API endpoints are secured using the SecureApiAction. - * - * @constructor Creates an instance of BiosampleDataController. - * @param controllerComponents Standard Play framework controller components. - * @param secureApi A custom SecureApiAction used to enforce authentication and JSON validation. - * @param biosampleDomainService The facade service for all biosample operations. - * @param ec An implicit ExecutionContext for asynchronous operations. - */ -@Singleton -class BiosampleDataController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - biosampleDomainService: BiosampleDomainService - )(implicit ec: ExecutionContext) extends BaseController { - - /** - * Adds sequencing data to a specified biosample. - * - * This method handles an HTTP request to associate sequencing dataset metadata - * with a particular biosample identified by the given sample GUID. - * The request body contains `SequenceDataInfo`, which includes detailed sequencing-related information - * such as platform name, read length, and test type. Authentication and JSON validation are enforced - * using the `SecureApiAction`. - * - * @param sampleGuid The unique identifier (UUID) of the biosample to which the sequence data will be added. - * @return An `Action` that asynchronously performs the operation and produces - * a `SequenceDataInfo`, returning an HTTP response indicating success or failure. - */ - def addSequenceData(sampleGuid: UUID): Action[SequenceDataInfo] = - secureApi.jsonAction[SequenceDataInfo].async { request => - biosampleDomainService.addSequenceData(sampleGuid, request.body).map { _ => - Ok(Json.toJson(ApiResponse("success"))) - } - } - - /** - * Links a publication to a specific biosample identified by the given GUID. - * - * This method processes a request containing `PublicationInfo` to associate it - * with the biosample corresponding to the provided GUID. The operation is performed - * asynchronously, and upon completion, a success response is returned. - * - * @param sampleGuid The unique identifier (UUID) of the biosample to which the publication will be linked. - * @return An `Action` that performs the linking operation and produces a result containing `PublicationInfo`. - */ - def linkPublication(sampleGuid: UUID): Action[PublicationInfo] = - secureApi.jsonAction[PublicationInfo].async { request => - biosampleDomainService.linkPublication(sampleGuid, request.body).map { _ => - Ok(Json.toJson(ApiResponse("success"))) - } - } -} \ No newline at end of file diff --git a/app/controllers/BiosampleMapController.scala b/app/controllers/BiosampleMapController.scala deleted file mode 100644 index e0e76f84..00000000 --- a/app/controllers/BiosampleMapController.scala +++ /dev/null @@ -1,35 +0,0 @@ -package controllers - -import models.dal.domain.genomics.BiosamplesTable -import org.webjars.play.WebJarsUtil -import play.api.i18n.I18nSupport -import play.api.libs.json.* -import play.api.mvc.* -import repositories.BiosampleRepository - -import javax.inject.* -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class BiosampleMapController @Inject()( - val controllerComponents: ControllerComponents, - biosampleRepository: BiosampleRepository - )(implicit ec: ExecutionContext, webJarsUtil: WebJarsUtil) extends BaseController with I18nSupport { - - def mapView() = Action { implicit request: Request[AnyContent] => - Ok(views.html.biosamples.map()) - } - - def geoData() = Action.async { implicit request => - biosampleRepository.getAllGeoLocations.map { locations => - val geoJson = locations.map { case (point, count) => - Json.obj( - "lat" -> point.getY, - "lng" -> point.getX, - "count" -> count - ) - } - Ok(Json.toJson(geoJson)) - } - } -} \ No newline at end of file diff --git a/app/controllers/BiosampleOriginalHaplogroupController.scala b/app/controllers/BiosampleOriginalHaplogroupController.scala deleted file mode 100644 index 7413e641..00000000 --- a/app/controllers/BiosampleOriginalHaplogroupController.scala +++ /dev/null @@ -1,85 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.Inject -import models.api.{BiosampleOriginalHaplogroupUpdate, BiosampleOriginalHaplogroupView} -import models.domain.genomics.OriginalHaplogroupEntry -import play.api.Logging -import play.api.libs.json.{JsValue, Json} -import play.api.mvc.{AbstractController, Action, AnyContent, ControllerComponents} -import repositories.{BiosampleOriginalHaplogroupRepository, BiosampleRepository} - -import scala.concurrent.{ExecutionContext, Future} - -class BiosampleOriginalHaplogroupController @Inject()( - cc: ControllerComponents, - secureApi: ApiSecurityAction, - haplogroupRepository: BiosampleOriginalHaplogroupRepository, - biosampleRepository: BiosampleRepository - )(implicit ec: ExecutionContext) extends AbstractController(cc) with Logging { - - def updateOrCreateHaplogroup(biosampleId: Int, publicationId: Int): Action[JsValue] = - Action.async(parse.json) { request => - secureApi.invokeBlock(request, { secureRequest => - request.body.validate[BiosampleOriginalHaplogroupUpdate].fold( - errors => Future.successful(BadRequest(Json.obj("error" -> "Invalid request format"))), - update => { - if (update.originalYHaplogroup.isEmpty && - update.originalMtHaplogroup.isEmpty && - update.notes.isEmpty) { - Future.successful(BadRequest(Json.obj("error" -> "No valid fields to update"))) - } else { - (for { - biosampleExists <- biosampleRepository.findById(biosampleId) - if biosampleExists.isDefined - existing <- haplogroupRepository.findByBiosampleAndPublication(biosampleId, publicationId) - entry = existing match { - case Some(e) => - e.copy( - yHaplogroupResult = update.originalYHaplogroup.orElse(e.yHaplogroupResult), - mtHaplogroupResult = update.originalMtHaplogroup.orElse(e.mtHaplogroupResult), - notes = update.notes.orElse(e.notes) - ) - case None => - OriginalHaplogroupEntry( - publicationId = publicationId, - yHaplogroupResult = update.originalYHaplogroup, - mtHaplogroupResult = update.originalMtHaplogroup, - notes = update.notes - ) - } - _ <- haplogroupRepository.upsert(biosampleId, entry) - } yield Ok(Json.toJson(BiosampleOriginalHaplogroupView.fromEntry(biosampleId, entry)))).recover { - case _: NoSuchElementException => - NotFound(Json.obj("error" -> "Biosample not found")) - case e: Exception => - logger.error("Error updating haplogroup", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - } - ) - }) - } - - def getHaplogroup(biosampleId: Int, publicationId: Int): Action[AnyContent] = - secureApi.async { request => - haplogroupRepository.findByBiosampleAndPublication(biosampleId, publicationId).map { - case Some(entry) => Ok(Json.toJson(BiosampleOriginalHaplogroupView.fromEntry(biosampleId, entry))) - case None => NotFound(Json.obj("error" -> "Haplogroup assignment not found")) - } - } - - def deleteHaplogroup(biosampleId: Int, publicationId: Int): Action[AnyContent] = - secureApi.async { request => - haplogroupRepository.findByBiosampleAndPublication(biosampleId, publicationId).flatMap { - case Some(_) => - haplogroupRepository.delete(biosampleId, publicationId).map { - case true => NoContent - case false => InternalServerError(Json.obj("error" -> "Failed to delete haplogroup assignment")) - } - case None => - Future.successful(NotFound(Json.obj("error" -> "Haplogroup assignment not found"))) - } - } -} diff --git a/app/controllers/BiosamplePublicationController.scala b/app/controllers/BiosamplePublicationController.scala deleted file mode 100644 index 2c37aa62..00000000 --- a/app/controllers/BiosamplePublicationController.scala +++ /dev/null @@ -1,74 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.BiosamplePublicationLinkRequest -import play.api.libs.json.Json -import play.api.mvc.{AbstractController, Action, ControllerComponents} -import services.BiosampleDomainService - -import scala.concurrent.ExecutionContext - -/** - * Controller for managing the linkage between biosamples and publications. - * - * This controller provides an endpoint for associating a biosample with a publication - * based on their respective identifiers. The operation ensures proper validation of - * input data and handles potential errors during the linking process. - * - * @constructor Creates a new instance of BiosamplePublicationController. - * @param cc Controller components used for handling requests and responses. - * @param secureApi Security action for validating and processing secure API requests. - * @param biosampleDomainService The facade service for all biosample operations. - * @param ec Execution context for asynchronous operations. - */ -@Singleton -class BiosamplePublicationController @Inject()( - cc: ControllerComponents, - secureApi: ApiSecurityAction, - biosampleDomainService: BiosampleDomainService - )(implicit ec: ExecutionContext) extends AbstractController(cc) { - - /** - * Links a biosample to a publication by processing a request containing their identifiers. - * - * This method accepts a JSON request body of type `BiosamplePublicationLinkRequest`. - * It triggers the `biosamplePublicationService` to establish a link between the specified - * biosample and publication. If successfully linked, a `201 Created` response is returned - * with the details of the link. In case of errors, appropriate HTTP error responses are returned: - * - `BadRequest` for invalid input or missing resources. - * - `InternalServerError` for unexpected server errors or data integrity issues. - * - * @return An asynchronous Play Framework Action that processes the request, performs the linking - * operation, and produces a JSON-based HTTP response with success or error details. - */ - def linkBiosampleToPublication: Action[BiosamplePublicationLinkRequest] = - secureApi.jsonAction[BiosamplePublicationLinkRequest].async { request => - biosampleDomainService.linkBiosampleToPublication( - request.body.sampleAccession, - request.body.publicationDoi - ).map { link => - Created(Json.obj( - "message" -> "Biosample successfully linked to publication", - "publicationId" -> link.publicationId, - "biosampleId" -> link.biosampleId - )) - }.recover { - case e: IllegalArgumentException => - BadRequest(Json.obj( - "error" -> "Invalid request", - "message" -> e.getMessage - )) - case e: IllegalStateException => - InternalServerError(Json.obj( - "error" -> "Data integrity error", - "message" -> e.getMessage - )) - case e: Exception => - InternalServerError(Json.obj( - "error" -> "Internal server error", - "message" -> "Failed to link biosample to publication" - )) - } - } -} \ No newline at end of file diff --git a/app/controllers/BiosampleReportController.scala b/app/controllers/BiosampleReportController.scala deleted file mode 100644 index 6c8b57e8..00000000 --- a/app/controllers/BiosampleReportController.scala +++ /dev/null @@ -1,51 +0,0 @@ -package controllers - -import play.api.libs.json.Json -import play.api.mvc.{Action, AnyContent, AbstractController, MessagesControllerComponents} -import play.api.i18n._ // Add this import -import services.BiosampleDomainService - -import javax.inject.{Inject, Singleton} -import scala.concurrent.ExecutionContext - -/** - * Controller responsible for handling operations related to biosample reports. - * - * @param cc Controller components used to handle HTTP-related features. - * @param biosampleDomainService The facade service for all biosample operations. - * @param ec ExecutionContext for handling asynchronous operations. - */ -@Singleton -class BiosampleReportController @Inject()(cc: MessagesControllerComponents, biosampleDomainService: BiosampleDomainService)(implicit ec: ExecutionContext) extends AbstractController(cc) with I18nSupport { // Modified - // override protected def controllerComponents: ControllerComponents = cc // Not needed when extending AbstractController directly - - /** - * Generates an HTML view of the biosample report for a specific publication. - * Supports pagination to display data across multiple pages. - * - * @param publicationId The ID of the publication for which the biosample report is generated. - * @param page Optional page number indicating which page of the report to retrieve. - * @return An asynchronous Action that renders the HTML view of the biosample report. - */ - def getBiosampleReportHTML(publicationId: Int, page: Option[Int]): Action[AnyContent] = Action.async { implicit request => - val currentPage = page.getOrElse(1) - val pageSize = request.queryString.get("pageSize").flatMap(_.headOption).flatMap(_.toIntOption).getOrElse(100) - - biosampleDomainService.getPaginatedBiosampleData(publicationId, currentPage, pageSize).map { paginatedResult => - Ok(views.html.biosampleReport(paginatedResult, publicationId)) - } - } - - /** - * Retrieves biosample data for a specific publication and returns it in JSON format. - * - * @param publicationId The ID of the publication for which biosample data will be retrieved. - * @return An asynchronous Action that returns the biosample data as a JSON response. - */ - def getBiosampleReportJSON(publicationId: Int): Action[AnyContent] = Action.async { implicit request => - biosampleDomainService.getBiosampleData(publicationId).map { biosamples => - val jsonResponse = Json.toJson(biosamples) - Ok(jsonResponse).as(play.api.http.MimeTypes.JSON) - } - } -} \ No newline at end of file diff --git a/app/controllers/ContactController.scala b/app/controllers/ContactController.scala deleted file mode 100644 index 43134319..00000000 --- a/app/controllers/ContactController.scala +++ /dev/null @@ -1,174 +0,0 @@ -package controllers - -import com.nappin.play.recaptcha.{RecaptchaVerifier, WidgetHelper} -import models.domain.support.{ContactMessage, MessageStatus} -import models.forms.Contact -import org.webjars.play.WebJarsUtil -import play.api.i18n.I18nSupport -import play.api.mvc.* -import play.api.{Configuration, Environment, Logging} -import repositories.ContactMessageRepository -import services.EmailService - -import java.time.LocalDateTime -import java.util.UUID -import javax.inject.* -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class ContactController @Inject()( - val controllerComponents: ControllerComponents, - contactMessageRepository: ContactMessageRepository, - emailService: EmailService, - config: Configuration, - verifier: RecaptchaVerifier, - env: Environment, - contactView: views.html.contact -)(implicit ec: ExecutionContext, widgetHelper: WidgetHelper, webJarsUtil: WebJarsUtil) - extends BaseController with I18nSupport with Logging { - - private val recipientEmail = config.get[String]("contact.recipient.email") - private val serviceEmail = "info@decoding-us.com" - private val isProd = env.mode == play.api.Mode.Prod - private val botRegex = "(?i)bot|crawl|spider|curl|wget|python|httpclient".r - - def show: Action[AnyContent] = Action { implicit request: Request[AnyContent] => - val isAuthenticated = request.session.get("userId").isDefined - Ok(contactView(Contact.form, isProd, isAuthenticated)) - } - - def submit(): Action[AnyContent] = Action.async { implicit request => - val clientIpAddress = request.headers.get("X-Real-IP").getOrElse(request.remoteAddress) - val userIdOpt = request.session.get("userId").map(UUID.fromString) - val isAuthenticated = userIdOpt.isDefined - - // Bot detection - val userAgentOpt = request.headers.get("User-Agent") - if (userAgentOpt.isEmpty || userAgentOpt.exists(agent => botRegex.findFirstMatchIn(agent).isDefined)) { - logger.warn(s"Submission blocked due to suspicious user agent: $userAgentOpt from IP: $clientIpAddress") - Future.successful( - Redirect(routes.ContactController.show()) - .flashing("error" -> "Submission rejected.") - ) - } else { - // For anonymous users, validate with reCAPTCHA in production - val formValidation = if (isProd && !isAuthenticated) { - verifier.bindFromRequestAndVerify(Contact.form) - } else { - Future.successful(Contact.form.bindFromRequest()) - } - - formValidation.flatMap { - case form if form.hasErrors => - Future.successful(BadRequest(contactView(form, isProd, isAuthenticated))) - - case form => form.get match { - case contact if contact.phoneNumber.nonEmpty => - // Honeypot field is filled - likely spam - logger.warn(s"Spam attempt detected from IP: $clientIpAddress") - Future.successful( - Redirect(routes.ContactController.show()) - .flashing("success" -> "Thank you for your message. We'll get back to you soon!") - ) - - case contact => - val now = LocalDateTime.now() - val ipHash = utils.IpAddressUtils.hashIpAddress(clientIpAddress) - - val message = ContactMessage( - id = None, - userId = userIdOpt, - senderName = if (isAuthenticated) None else Some(contact.name), - senderEmail = if (isAuthenticated) None else Some(contact.email), - subject = contact.subject, - message = contact.message, - status = MessageStatus.New, - ipAddressHash = Some(ipHash), - userAgent = userAgentOpt, - createdAt = now, - updatedAt = now - ) - - contactMessageRepository.create(message).map { created => - logger.info(s"Contact message ${created.id.get} created from ${userIdOpt.map(_.toString).getOrElse(contact.email)}") - - // Send notification email to admins - sendAdminNotification(created, contact) - - Redirect(routes.ContactController.show()) - .flashing("success" -> "Thank you for your message. We'll get back to you soon!") - }.recover { case e: Exception => - logger.error(s"Failed to save contact message", e) - InternalServerError(contactView(Contact.form, isProd, isAuthenticated)) - .flashing("error" -> "Sorry, there was a problem sending your message. Please try again later.") - } - } - } - } - } - - /** - * Show messages for authenticated user (their own message history). - * Also marks all messages as viewed to clear the notification badge. - */ - def myMessages: Action[AnyContent] = Action.async { implicit request => - request.session.get("userId").map(UUID.fromString) match { - case Some(userId) => - for { - _ <- contactMessageRepository.updateUserLastViewedAll(userId) - messages <- contactMessageRepository.findByUserId(userId) - } yield Ok(views.html.support.myMessages(messages)) - case None => - Future.successful(Redirect(routes.AuthController.login).flashing("error" -> "Please log in to view your messages.")) - } - } - - /** - * HTMX endpoint: Get unread reply count badge for authenticated user. - */ - def userMessageBadge: Action[AnyContent] = Action.async { implicit request => - request.session.get("userId").map(UUID.fromString) match { - case Some(userId) => - contactMessageRepository.countUnreadRepliesForUser(userId).map { count => - Ok(views.html.partials.messageBadge(count)) - } - case None => - Future.successful(Ok(views.html.partials.messageBadge(0))) - } - } - - /** Strip CR/LF to prevent email header injection */ - private def sanitizeForEmail(s: String): String = - s.replaceAll("[\\r\\n]", " ").trim - - private def sendAdminNotification(message: ContactMessage, contact: Contact.ContactDTO): Unit = { - val senderInfo = message.userId match { - case Some(uid) => s"Authenticated User (ID: $uid)" - case None => s"${sanitizeForEmail(contact.name)} <${sanitizeForEmail(contact.email)}>" - } - - emailService.sendEmail( - to = Seq(recipientEmail), - from = serviceEmail, - subject = s"[DecodingUs Contact] ${sanitizeForEmail(contact.subject)}", - body = - s""" - |New contact message received: - | - |From: $senderInfo - |Subject: ${sanitizeForEmail(contact.subject)} - | - |Message: - |${contact.message} - | - |--- - |Message ID: ${message.id.getOrElse("N/A")} - |View in admin panel: /admin/messages/${message.id.getOrElse("")} - |""".stripMargin - ) match { - case Right(_) => logger.info(s"Admin notification sent for message ${message.id}") - case Left(error) => logger.error(s"Failed to send admin notification: $error") - } - } - -} diff --git a/app/controllers/CookieConsentController.scala b/app/controllers/CookieConsentController.scala deleted file mode 100644 index 6a9afa15..00000000 --- a/app/controllers/CookieConsentController.scala +++ /dev/null @@ -1,107 +0,0 @@ -package controllers - -import jakarta.inject.{Inject, Singleton} -import models.domain.auth.CookieConsent -import play.api.Logging -import play.api.libs.json.{Json, OFormat} -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import repositories.CookieConsentRepository - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -case class ConsentStatusResponse(hasConsent: Boolean, policyVersion: String) - -object ConsentStatusResponse { - implicit val format: OFormat[ConsentStatusResponse] = Json.format[ConsentStatusResponse] -} - -@Singleton -class CookieConsentController @Inject()( - val controllerComponents: ControllerComponents, - cookieConsentRepository: CookieConsentRepository -)(implicit ec: ExecutionContext) extends BaseController with Logging { - - private val ConsentCookieName = "cookie_consent" - private val ConsentSessionKey = "consent_session_id" - - /** - * Check if the current user/session has given consent. - * Returns JSON with consent status. - */ - def checkConsent: Action[AnyContent] = Action.async { implicit request => - val policyVersion = CookieConsent.CurrentPolicyVersion - - // Check if user is logged in - request.session.get("userId").map(UUID.fromString) match { - case Some(userId) => - cookieConsentRepository.hasValidConsent(userId, policyVersion).map { hasConsent => - Ok(Json.toJson(ConsentStatusResponse(hasConsent, policyVersion))) - } - case None => - // Check session-based consent - request.session.get(ConsentSessionKey) match { - case Some(sessionId) => - cookieConsentRepository.hasValidConsentBySession(sessionId, policyVersion).map { hasConsent => - Ok(Json.toJson(ConsentStatusResponse(hasConsent, policyVersion))) - } - case None => - // Also check cookie for returning visitors - request.cookies.get(ConsentCookieName) match { - case Some(cookie) if cookie.value == policyVersion => - Future.successful(Ok(Json.toJson(ConsentStatusResponse(hasConsent = true, policyVersion)))) - case _ => - Future.successful(Ok(Json.toJson(ConsentStatusResponse(hasConsent = false, policyVersion)))) - } - } - } - } - - /** - * Record cookie consent acceptance. - */ - def acceptConsent: Action[AnyContent] = Action.async { implicit request => - val policyVersion = CookieConsent.CurrentPolicyVersion - val now = LocalDateTime.now() - val userAgent = request.headers.get("User-Agent") - val ipHash = utils.IpAddressUtils.hashIpAddress(request.remoteAddress) - - // Get or create session ID for anonymous users - val sessionId = request.session.get(ConsentSessionKey).getOrElse(UUID.randomUUID().toString) - - // Check if user is logged in - val userId = request.session.get("userId").map(UUID.fromString) - - val consent = CookieConsent( - id = None, - userId = userId, - sessionId = if (userId.isEmpty) Some(sessionId) else None, - ipAddressHash = Some(ipHash), - consentGiven = true, - consentTimestamp = now, - policyVersion = policyVersion, - userAgent = userAgent, - createdAt = now - ) - - cookieConsentRepository.create(consent).map { _ => - logger.info(s"Cookie consent recorded for ${userId.map(_.toString).getOrElse(s"session:$sessionId")}") - - Ok(Json.obj("success" -> true, "message" -> "Consent recorded")) - .withSession(request.session + (ConsentSessionKey -> sessionId)) - .withCookies( - play.api.mvc.Cookie( - name = ConsentCookieName, - value = policyVersion, - maxAge = Some(365 * 24 * 60 * 60), // 1 year - httpOnly = false // Needs to be readable by JS for banner logic - ) - ) - }.recover { case e: Exception => - logger.error("Failed to record cookie consent", e) - InternalServerError(Json.obj("success" -> false, "message" -> "Failed to record consent")) - } - } - -} diff --git a/app/controllers/CoverageController.scala b/app/controllers/CoverageController.scala deleted file mode 100644 index 82fd6cb7..00000000 --- a/app/controllers/CoverageController.scala +++ /dev/null @@ -1,102 +0,0 @@ -package controllers - -import jakarta.inject.Singleton -import models.domain.genomics.CoverageBenchmark -import org.webjars.play.WebJarsUtil -import play.api.i18n.I18nSupport -import play.api.libs.json.Json -import play.api.mvc.* -import repositories.CoverageRepository - -import javax.inject.Inject -import scala.concurrent.ExecutionContext - -/** - * Controller responsible for managing endpoints related to coverage information and benchmarks. - * - * @param controllerComponents Controller components used for handling HTTP-related functionality. - * @param coverageRepository Repository for accessing coverage benchmark data. - * @param webJarsUtil Utility for managing WebJars in Play framework. - * @param ec Execution context for asynchronous operations. - */ -@Singleton -class CoverageController @Inject()( - val controllerComponents: ControllerComponents, - coverageRepository: CoverageRepository - )(using webJarsUtil: WebJarsUtil, ec: ExecutionContext) extends BaseController with I18nSupport { - - /** - * Handles an HTTP GET request to render the coverage benchmarks page. - * - * @return an Action that, when executed, returns an HTTP OK response containing the rendered HTML for the - * coverage benchmarks view. - */ - def index(): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - Ok(views.html.coverage()) - } - - /** - * Handles an HTTP GET request to return benchmark information in JSON format. - * - * Returns aggregated coverage statistics grouped by lab, test type, and contig. - * The standard deviation values are provided to calculate 95% confidence intervals - * when there is more than one sample in the group. - * - * @return An HTTP OK response containing a JSON array of coverage benchmark objects. - */ - def apiBenchmarks(): Action[AnyContent] = Action.async { implicit request: Request[AnyContent] => - coverageRepository.getBenchmarkStatistics.map { benchmarks => - Ok(Json.toJson(benchmarks)) - } - } - - /** - * Handles an HTTP GET request to retrieve a list of sequencing labs in JSON format. - * - * This method queries the `coverageRepository` to fetch all sequencing labs, - * sorts them by their name, and maps their details into a JSON response. - * It returns an HTTP OK response containing a JSON array of sequencing lab objects. - * - * @return An asynchronous Action that produces an HTTP OK response with the lab data serialized into JSON. - */ - def labs = Action.async { implicit request => - coverageRepository.getAllLabs.map { labs => - Ok(Json.toJson(labs)) - } - } - - /** - * Handles an HTTP GET request to fetch benchmarks for a specific lab. - * The method queries the `coverageRepository` to retrieve the coverage benchmark data - * corresponding to the given lab and renders it as an HTML fragment. - * - * @param labId The unique identifier of the sequencing lab for which coverage benchmark data is requested. - * @return An asynchronous action that produces an HTTP OK response containing the rendered HTML fragment - * with the benchmarks data. - */ - def benchmarksByLab(labId: Int) = Action.async { implicit request => - coverageRepository.getBenchmarksByLab(labId).map { benchmarks => - Ok(views.html.fragments.coverageBenchmarks(benchmarks, None)) - } - } - - /** - * Handles an HTTP GET request to fetch benchmark details for a specific lab. - * The method queries the `coverageRepository` to retrieve all labs and the specific - * benchmarks corresponding to the provided lab ID, and renders it as an HTML fragment - * displaying the benchmark details along with lab information. - * - * @param labId The unique identifier of the sequencing lab for which benchmark details are requested. - * @return An asynchronous action that produces an HTTP OK response containing the rendered HTML fragment - * with detailed benchmark data and lab information. - */ - def benchmarksByLabWithDetails(labId: Int) = Action.async { implicit request => - for { - labs <- coverageRepository.getAllLabs - benchmarks <- coverageRepository.getBenchmarksByLab(labId) - } yield { - val lab = labs.find(_.id.contains(labId)) - Ok(views.html.fragments.coverageBenchmarks(benchmarks, lab)) - } - } -} \ No newline at end of file diff --git a/app/controllers/CuratorController.scala b/app/controllers/CuratorController.scala deleted file mode 100644 index 85f31c27..00000000 --- a/app/controllers/CuratorController.scala +++ /dev/null @@ -1,915 +0,0 @@ -package controllers - -import actions.{AuthenticatedAction, AuthenticatedRequest, PermissionAction} -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.domain.genomics.{MutationType, NamingStatus, PointVariantCoordinates, VariantAliases, VariantV2} -import models.domain.haplogroups.Haplogroup -import org.webjars.play.WebJarsUtil -import play.api.Logging -import play.api.data.Form -import play.api.data.Forms.* -import play.api.libs.json.Json -import play.api.i18n.I18nSupport -import play.api.mvc.* -import repositories.{GenbankContigRepository, HaplogroupCoreRepository, HaplogroupVariantRepository, VariantV2Repository} -import services.{CuratorAuditService, TreeRestructuringService, TreeVersioningService} -import services.genomics.YBrowseVariantIngestionService - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -case class HaplogroupFormData( - name: String, - lineage: Option[String], - description: Option[String], - haplogroupType: String, - source: String, - confidenceLevel: String, - formedYbp: Option[Int], - formedYbpLower: Option[Int], - formedYbpUpper: Option[Int], - tmrcaYbp: Option[Int], - tmrcaYbpLower: Option[Int], - tmrcaYbpUpper: Option[Int], - ageEstimateSource: Option[String] -) - -case class CreateHaplogroupFormData( - name: String, - lineage: Option[String], - description: Option[String], - haplogroupType: String, - source: String, - confidenceLevel: String, - parentId: Option[Int], - createAboveRoot: Boolean -) - -case class VariantFormData( - refGenome: String, - contig: String, - position: Int, - referenceAllele: String, - alternateAllele: String, - variantType: String, - rsId: Option[String], - commonName: Option[String] -) - -case class SplitBranchFormData( - name: String, - lineage: Option[String], - description: Option[String], - source: String, - confidenceLevel: String, - variantIds: Seq[Int], - childIds: Seq[Int] -) - -case class ReparentFormData( - newParentId: Int, - source: String, - reason: Option[String] -) - -@Singleton -class CuratorController @Inject()( - val controllerComponents: ControllerComponents, - protected val authenticatedAction: AuthenticatedAction, - protected val permissionAction: PermissionAction, - haplogroupRepository: HaplogroupCoreRepository, - variantV2Repository: VariantV2Repository, - haplogroupVariantRepository: HaplogroupVariantRepository, - genbankContigRepository: GenbankContigRepository, - auditService: CuratorAuditService, - treeRestructuringService: TreeRestructuringService, - variantIngestionService: YBrowseVariantIngestionService, - treeVersioningService: TreeVersioningService -)(implicit ec: ExecutionContext, webJarsUtil: WebJarsUtil) - extends BaseController with I18nSupport with Logging with BaseCuratorController { - - // Forms - private val haplogroupForm: Form[HaplogroupFormData] = Form( - mapping( - "name" -> nonEmptyText(1, 100), - "lineage" -> optional(text(maxLength = 500)), - "description" -> optional(text(maxLength = 2000)), - "haplogroupType" -> nonEmptyText.verifying("Invalid type", t => HaplogroupType.fromString(t).isDefined), - "source" -> nonEmptyText(1, 100), - "confidenceLevel" -> nonEmptyText(1, 50), - "formedYbp" -> optional(number), - "formedYbpLower" -> optional(number), - "formedYbpUpper" -> optional(number), - "tmrcaYbp" -> optional(number), - "tmrcaYbpLower" -> optional(number), - "tmrcaYbpUpper" -> optional(number), - "ageEstimateSource" -> optional(text(maxLength = 100)) - )(HaplogroupFormData.apply)(h => Some((h.name, h.lineage, h.description, h.haplogroupType, h.source, h.confidenceLevel, h.formedYbp, h.formedYbpLower, h.formedYbpUpper, h.tmrcaYbp, h.tmrcaYbpLower, h.tmrcaYbpUpper, h.ageEstimateSource))) - ) - - private val variantForm: Form[VariantFormData] = Form( - mapping( - "refGenome" -> nonEmptyText.verifying("Invalid reference genome", r => Seq("hs1", "GRCh38", "GRCh37").contains(r)), - "contig" -> nonEmptyText(1, 50), - "position" -> number, - "referenceAllele" -> nonEmptyText(1, 1000), - "alternateAllele" -> nonEmptyText(1, 1000), - "variantType" -> nonEmptyText.verifying("Invalid variant type", t => MutationType.fromString(t).isDefined), - "rsId" -> optional(text(maxLength = 50)), - "commonName" -> optional(text(maxLength = 100)) - )(VariantFormData.apply)(v => Some((v.refGenome, v.contig, v.position, v.referenceAllele, v.alternateAllele, v.variantType, v.rsId, v.commonName))) - ) - - private val splitBranchForm: Form[SplitBranchFormData] = Form( - mapping( - "name" -> nonEmptyText(1, 100), - "lineage" -> optional(text(maxLength = 500)), - "description" -> optional(text(maxLength = 2000)), - "source" -> nonEmptyText(1, 100), - "confidenceLevel" -> nonEmptyText(1, 50), - "variantIds" -> seq(number), - "childIds" -> seq(number) - )(SplitBranchFormData.apply)(s => Some((s.name, s.lineage, s.description, s.source, s.confidenceLevel, s.variantIds, s.childIds))) - ) - - private val reparentForm: Form[ReparentFormData] = Form( - mapping( - "newParentId" -> number, - "source" -> nonEmptyText(1, 100), - "reason" -> optional(text(maxLength = 500)) - )(ReparentFormData.apply)(r => Some((r.newParentId, r.source, r.reason))) - ) - - private val createHaplogroupFormMapping: Form[CreateHaplogroupFormData] = Form( - mapping( - "name" -> nonEmptyText(1, 100), - "lineage" -> optional(text(maxLength = 500)), - "description" -> optional(text(maxLength = 2000)), - "haplogroupType" -> nonEmptyText.verifying("Invalid type", t => HaplogroupType.fromString(t).isDefined), - "source" -> nonEmptyText(1, 100), - "confidenceLevel" -> nonEmptyText(1, 50), - "parentId" -> optional(number), - "createAboveRoot" -> boolean - )(CreateHaplogroupFormData.apply)(c => Some((c.name, c.lineage, c.description, c.haplogroupType, c.source, c.confidenceLevel, c.parentId, c.createAboveRoot))) - ) - - // === Dashboard === - - def dashboard: Action[AnyContent] = withPermission("haplogroup.view").async { implicit request => - for { - yCount <- haplogroupRepository.countByType(HaplogroupType.Y) - mtCount <- haplogroupRepository.countByType(HaplogroupType.MT) - variantCount <- variantV2Repository.count(None) - yActiveChangeSet <- treeVersioningService.getActiveChangeSet(HaplogroupType.Y) - mtActiveChangeSet <- treeVersioningService.getActiveChangeSet(HaplogroupType.MT) - } yield { - Ok(views.html.curator.dashboard(yCount, mtCount, variantCount, yActiveChangeSet, mtActiveChangeSet)) - } - } - - // === Haplogroups === - - def listHaplogroups(query: Option[String], hgType: Option[String], page: Int, pageSize: Int): Action[AnyContent] = - withPermission("haplogroup.view") { implicit request => - Ok(views.html.curator.haplogroups.list(query, hgType, pageSize)) - } - - def haplogroupsFragment(query: Option[String], hgType: Option[String], page: Int, pageSize: Int): Action[AnyContent] = - withPermission("haplogroup.view").async { implicit request => - val haplogroupType = hgType.flatMap(HaplogroupType.fromString) - val offset = (page - 1) * pageSize - - for { - haplogroups <- query match { - case Some(q) if q.nonEmpty => haplogroupRepository.search(q, haplogroupType, pageSize, offset) - case _ => haplogroupRepository.search("", haplogroupType, pageSize, offset) - } - totalCount <- haplogroupRepository.count(query.filter(_.nonEmpty), haplogroupType) - } yield { - val totalPages = Math.max(1, (totalCount + pageSize - 1) / pageSize) - Ok(views.html.curator.haplogroups.listFragment(haplogroups, query, hgType, page, totalPages, pageSize)) - } - } - - def haplogroupDetailPanel(id: Int): Action[AnyContent] = - withPermission("haplogroup.view").async { implicit request => - for { - haplogroupOpt <- haplogroupRepository.findById(id) - parentOpt <- haplogroupRepository.getParent(id) - children <- haplogroupRepository.getDirectChildren(id) - variants <- haplogroupVariantRepository.getHaplogroupVariants(id) - history <- auditService.getHaplogroupHistory(id) - } yield { - haplogroupOpt match { - case Some(haplogroup) => - Ok(views.html.curator.haplogroups.detailPanel(haplogroup, parentOpt, children, variants, history)) - case None => - NotFound("Haplogroup not found") - } - } - } - - def searchHaplogroupsJson(query: Option[String], hgType: Option[String]): Action[AnyContent] = - withPermission("haplogroup.view").async { implicit request => - import play.api.libs.json.* - val haplogroupType = hgType.flatMap(HaplogroupType.fromString) - for { - haplogroups <- haplogroupRepository.search(query.getOrElse(""), haplogroupType, 100, 0) - } yield { - val json = haplogroups.map { h => - Json.obj( - "id" -> h.id, - "name" -> h.name, - "type" -> h.haplogroupType.toString - ) - } - Ok(Json.toJson(json)) - } - } - - def createHaplogroupForm: Action[AnyContent] = - withPermission("haplogroup.create").async { implicit request => - for { - yRoots <- haplogroupRepository.findRoots(HaplogroupType.Y) - mtRoots <- haplogroupRepository.findRoots(HaplogroupType.MT) - } yield { - Ok(views.html.curator.haplogroups.createForm(createHaplogroupFormMapping, yRoots, mtRoots)) - } - } - - def createHaplogroup: Action[AnyContent] = - withPermission("haplogroup.create").async { implicit request => - createHaplogroupFormMapping.bindFromRequest().fold( - formWithErrors => { - for { - yRoots <- haplogroupRepository.findRoots(HaplogroupType.Y) - mtRoots <- haplogroupRepository.findRoots(HaplogroupType.MT) - } yield BadRequest(views.html.curator.haplogroups.createForm(formWithErrors, yRoots, mtRoots)) - }, - data => { - val haplogroupType = HaplogroupType.fromString(data.haplogroupType).get - val haplogroup = Haplogroup( - id = None, - name = data.name, - lineage = data.lineage, - description = data.description, - haplogroupType = haplogroupType, - revisionId = 1, - source = data.source, - confidenceLevel = data.confidenceLevel, - validFrom = LocalDateTime.now(), - validUntil = None - ) - - for { - // Validate parent selection - yRoots <- haplogroupRepository.findRoots(HaplogroupType.Y) - mtRoots <- haplogroupRepository.findRoots(HaplogroupType.MT) - existingRoots = if (haplogroupType == HaplogroupType.Y) yRoots else mtRoots - - result <- (data.parentId, data.createAboveRoot, existingRoots.nonEmpty) match { - case (None, true, true) => - // Create as NEW root above existing roots - for { - newId <- haplogroupRepository.createWithParent(haplogroup, None, "curator-create-above-root") - createdHaplogroup = haplogroup.copy(id = Some(newId._1)) - // Re-parent all existing roots to become children of the new root - _ <- Future.traverse(existingRoots.flatMap(_.id)) { oldRootId => - haplogroupRepository.updateParent(oldRootId, newId._1, "curator-create-above-root") - } - _ <- auditService.logHaplogroupCreate( - request.user.id.get, - createdHaplogroup, - Some(s"Created as new root above existing root(s): ${existingRoots.map(_.name).mkString(", ")}") - ) - } yield { - Redirect(routes.CuratorController.listHaplogroups(None, None, 1, 20)) - .flashing("success" -> s"Haplogroup '${data.name}' created as new root. Previous root(s) are now children.") - } - - case (None, false, true) => - // Trying to create a new root when one already exists without the flag - val errorForm = createHaplogroupFormMapping.fill(data).withGlobalError( - s"A root haplogroup already exists for ${haplogroupType}. Select a parent (leaf), use 'Create above existing root', or use Split to create a subclade." - ) - Future.successful(BadRequest(views.html.curator.haplogroups.createForm(errorForm, yRoots, mtRoots))) - - case (Some(parentId), _, _) => - // Validate parent exists and is of the same type - haplogroupRepository.findById(parentId).flatMap { - case Some(parent) if parent.haplogroupType != haplogroupType => - val errorForm = createHaplogroupFormMapping.fill(data).withGlobalError( - s"Parent haplogroup type (${parent.haplogroupType}) must match the new haplogroup type (${haplogroupType})" - ) - Future.successful(BadRequest(views.html.curator.haplogroups.createForm(errorForm, yRoots, mtRoots))) - - case Some(_) => - // Create with parent (leaf) - for { - newId <- haplogroupRepository.createWithParent(haplogroup, Some(parentId), "curator-create") - createdHaplogroup = haplogroup.copy(id = Some(newId._1)) - _ <- auditService.logHaplogroupCreate(request.user.id.get, createdHaplogroup, Some("Created as leaf via curator interface")) - } yield { - Redirect(routes.CuratorController.listHaplogroups(None, None, 1, 20)) - .flashing("success" -> s"Haplogroup '${data.name}' created successfully as child of parent") - } - - case None => - val errorForm = createHaplogroupFormMapping.fill(data).withGlobalError("Selected parent haplogroup not found") - Future.successful(BadRequest(views.html.curator.haplogroups.createForm(errorForm, yRoots, mtRoots))) - } - - case (None, _, false) => - // Create as new root (no existing roots for this type) - for { - newId <- haplogroupRepository.createWithParent(haplogroup, None, "curator-create") - createdHaplogroup = haplogroup.copy(id = Some(newId._1)) - _ <- auditService.logHaplogroupCreate(request.user.id.get, createdHaplogroup, Some("Created as root via curator interface")) - } yield { - Redirect(routes.CuratorController.listHaplogroups(None, None, 1, 20)) - .flashing("success" -> s"Haplogroup '${data.name}' created successfully as root") - } - } - } yield result - } - ) - } - - def editHaplogroupForm(id: Int): Action[AnyContent] = - withPermission("haplogroup.update").async { implicit request => - haplogroupRepository.findById(id).map { - case Some(haplogroup) => - val formData = HaplogroupFormData( - name = haplogroup.name, - lineage = haplogroup.lineage, - description = haplogroup.description, - haplogroupType = haplogroup.haplogroupType.toString, - source = haplogroup.source, - confidenceLevel = haplogroup.confidenceLevel, - formedYbp = haplogroup.formedYbp, - formedYbpLower = haplogroup.formedYbpLower, - formedYbpUpper = haplogroup.formedYbpUpper, - tmrcaYbp = haplogroup.tmrcaYbp, - tmrcaYbpLower = haplogroup.tmrcaYbpLower, - tmrcaYbpUpper = haplogroup.tmrcaYbpUpper, - ageEstimateSource = haplogroup.ageEstimateSource - ) - Ok(views.html.curator.haplogroups.editForm(id, haplogroupForm.fill(formData))) - case None => - NotFound("Haplogroup not found") - } - } - - def updateHaplogroup(id: Int): Action[AnyContent] = - withPermission("haplogroup.update").async { implicit request => - haplogroupRepository.findById(id).flatMap { - case Some(oldHaplogroup) => - haplogroupForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(views.html.curator.haplogroups.editForm(id, formWithErrors))) - }, - data => { - val updatedHaplogroup = oldHaplogroup.copy( - name = data.name, - lineage = data.lineage, - description = data.description, - source = data.source, - confidenceLevel = data.confidenceLevel, - formedYbp = data.formedYbp, - formedYbpLower = data.formedYbpLower, - formedYbpUpper = data.formedYbpUpper, - tmrcaYbp = data.tmrcaYbp, - tmrcaYbpLower = data.tmrcaYbpLower, - tmrcaYbpUpper = data.tmrcaYbpUpper, - ageEstimateSource = data.ageEstimateSource - ) - - for { - updated <- haplogroupRepository.update(updatedHaplogroup) - _ <- if (updated) { - auditService.logHaplogroupUpdate(request.user.id.get, oldHaplogroup, updatedHaplogroup, Some("Updated via curator interface")) - } else { - Future.successful(()) - } - } yield { - if (updated) { - Redirect(routes.CuratorController.listHaplogroups(None, None, 1, 20)) - .flashing("success" -> s"Haplogroup '${data.name}' updated successfully") - } else { - BadRequest("Failed to update haplogroup") - } - } - } - ) - case None => - Future.successful(NotFound("Haplogroup not found")) - } - } - - def deleteHaplogroup(id: Int): Action[AnyContent] = - withPermission("haplogroup.delete").async { implicit request => - haplogroupRepository.findById(id).flatMap { - case Some(haplogroup) => - for { - deleted <- haplogroupRepository.softDelete(id, "curator-deletion") - _ <- if (deleted) { - auditService.logHaplogroupDelete(request.user.id.get, haplogroup, Some("Soft-deleted via curator interface")) - } else { - Future.successful(()) - } - } yield { - if (deleted) { - Ok("Deleted").withHeaders("HX-Trigger" -> "haplogroupDeleted") - } else { - BadRequest("Failed to delete haplogroup") - } - } - case None => - Future.successful(NotFound("Haplogroup not found")) - } - } - - // === Variants === - - def listVariants(query: Option[String], page: Int, pageSize: Int): Action[AnyContent] = - withPermission("variant.view") { implicit request => - Ok(views.html.curator.variants.list(query, pageSize)) - } - - def variantsFragment(query: Option[String], page: Int, pageSize: Int): Action[AnyContent] = - withPermission("variant.view").async { implicit request => - val offset = (page - 1) * pageSize - for { - (variants, totalCount) <- variantV2Repository.searchPaginated(query.getOrElse(""), offset, pageSize) - } yield { - val totalPages = Math.max(1, (totalCount + pageSize - 1) / pageSize) - Ok(views.html.curator.variants.listFragment(variants, query, page, totalPages, pageSize, totalCount)) - } - } - - def variantDetailPanel(id: Int): Action[AnyContent] = - withPermission("variant.view").async { implicit request => - for { - variantOpt <- variantV2Repository.findById(id) - haplogroups <- haplogroupVariantRepository.getHaplogroupsByVariant(id) - history <- auditService.getVariantHistory(id) - } yield { - variantOpt match { - case Some(variant) => - Ok(views.html.curator.variants.detailPanel(variant, haplogroups, history)) - case None => - NotFound("Variant not found") - } - } - } - - def createVariantForm: Action[AnyContent] = - withPermission("variant.create") { implicit request => - Ok(views.html.curator.variants.createForm(variantForm)) - } - - def createVariant: Action[AnyContent] = - withPermission("variant.create").async { implicit request => - variantForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(views.html.curator.variants.createForm(formWithErrors))) - }, - data => { - val coordinates = Json.obj( - data.refGenome -> Json.toJson(PointVariantCoordinates( - contig = data.contig, - position = data.position, - ref = data.referenceAllele.toUpperCase, - alt = data.alternateAllele.toUpperCase - )) - ) - - val aliases = (data.commonName, data.rsId) match { - case (Some(name), Some(rs)) => - Json.toJson(VariantAliases(commonNames = Seq(name), rsIds = Seq(rs))) - case (Some(name), None) => - Json.toJson(VariantAliases(commonNames = Seq(name))) - case (None, Some(rs)) => - Json.toJson(VariantAliases(rsIds = Seq(rs))) - case _ => - Json.obj() - } - - val variant = VariantV2( - canonicalName = data.commonName, - mutationType = MutationType.fromStringOrDefault(data.variantType), - namingStatus = if (data.commonName.isDefined) NamingStatus.Named else NamingStatus.Unnamed, - aliases = aliases, - coordinates = coordinates - ) - - for { - createdId <- variantV2Repository.create(variant) - createdVariant = variant.copy(variantId = Some(createdId)) - _ <- auditService.logVariantCreate(request.user.id.get, createdVariant, Some("Created via curator interface")) - } yield { - Redirect(routes.CuratorController.listVariants(None, 1, 20)) - .flashing("success" -> s"Variant ${createdVariant.displayName} created successfully") - } - } - ) - } - - def editVariantForm(id: Int): Action[AnyContent] = - withPermission("variant.update").async { implicit request => - variantV2Repository.findById(id).map { - case Some(variant) => - // Get the primary reference genome coordinates (prefer hs1) - val refGenome = variant.availableReferences.find(_ == "hs1") - .orElse(variant.availableReferences.headOption) - .getOrElse("hs1") - - val coords = variant.getCoordinates(refGenome) - val contig = coords.flatMap(c => (c \ "contig").asOpt[String]).getOrElse("") - val position = coords.flatMap(c => (c \ "position").asOpt[Int]).getOrElse(0) - val ref = coords.flatMap(c => (c \ "ref").asOpt[String]).getOrElse("") - val alt = coords.flatMap(c => (c \ "alt").asOpt[String]).getOrElse("") - - val filledForm = variantForm.fill(VariantFormData( - refGenome = refGenome, - contig = contig, - position = position, - referenceAllele = ref, - alternateAllele = alt, - variantType = variant.mutationType.dbValue, - rsId = variant.rsIds.headOption, - commonName = variant.canonicalName - )) - - Ok(views.html.curator.variants.editForm(id, filledForm, s"$refGenome:$contig")) - - case None => - NotFound("Variant not found") - } - } - - def updateVariant(id: Int): Action[AnyContent] = - withPermission("variant.update").async { implicit request => - variantForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(views.html.curator.variants.editForm(id, formWithErrors, ""))) - }, - data => { - variantV2Repository.findById(id).flatMap { - case None => - Future.successful(NotFound("Variant not found")) - - case Some(existing) => - // Update editable fields (metadata only - coordinates are immutable after creation) - val updatedAliases = (data.commonName, data.rsId) match { - case (Some(name), Some(rs)) => - Json.toJson(VariantAliases(commonNames = Seq(name), rsIds = Seq(rs))) - case (Some(name), None) => - Json.toJson(VariantAliases(commonNames = Seq(name))) - case (None, Some(rs)) => - Json.toJson(VariantAliases(rsIds = Seq(rs))) - case _ => - existing.aliases - } - - val updated = existing.copy( - canonicalName = data.commonName.orElse(existing.canonicalName), - mutationType = MutationType.fromStringOrDefault(data.variantType), - namingStatus = if (data.commonName.isDefined) NamingStatus.Named else existing.namingStatus, - aliases = updatedAliases - ) - - for { - success <- variantV2Repository.update(updated) - _ <- if (success) { - auditService.logVariantUpdate(request.user.id.get, existing, updated, Some("Updated via curator interface")) - } else { - Future.successful(()) - } - } yield { - if (success) { - Redirect(routes.CuratorController.listVariants(None, 1, 20)) - .flashing("success" -> s"Variant ${updated.displayName} updated successfully") - } else { - BadRequest("Failed to update variant") - } - } - } - } - ) - } - - // Variant groups are obsolete - VariantV2 is already consolidated - def editVariantGroupForm(groupKey: String): Action[AnyContent] = - withPermission("variant.update") { implicit request => - Redirect(routes.CuratorController.listVariants(Some(groupKey), 1, 20)) - .flashing("info" -> "Variant groups have been replaced with consolidated variants. Edit each variant directly.") - } - - def updateVariantGroup(groupKey: String): Action[AnyContent] = - withPermission("variant.update") { implicit request => - Redirect(routes.CuratorController.listVariants(Some(groupKey), 1, 20)) - .flashing("info" -> "Variant groups have been replaced with consolidated variants.") - } - - def deleteVariant(id: Int): Action[AnyContent] = - withPermission("variant.delete").async { implicit request => - variantV2Repository.findById(id).flatMap { - case Some(variant) => - for { - deleted <- variantV2Repository.delete(id) - _ <- if (deleted) { - auditService.logVariantDelete(request.user.id.get, variant, Some("Deleted via curator interface")) - } else { - Future.successful(()) - } - } yield { - if (deleted) { - Ok("Deleted").withHeaders("HX-Trigger" -> "variantDeleted") - } else { - BadRequest("Failed to delete variant") - } - } - case None => - Future.successful(NotFound("Variant not found")) - } - } - - // === Audit === - - def auditHistory(entityType: String, entityId: Int): Action[AnyContent] = - withPermission("audit.view").async { implicit request => - val historyFuture = entityType match { - case "haplogroup" => auditService.getHaplogroupHistory(entityId) - case "variant" => auditService.getVariantHistory(entityId) - case _ => Future.successful(Seq.empty) - } - - historyFuture.map { history => - Ok(views.html.curator.audit.historyPanel(entityType, entityId, history)) - } - } - - // === Haplogroup-Variant Associations === - - def searchVariantsForHaplogroup(haplogroupId: Int, query: Option[String]): Action[AnyContent] = - withPermission("haplogroup.view").async { implicit request => - for { - haplogroupOpt <- haplogroupRepository.findById(haplogroupId) - variants <- query match { - case Some(q) if q.nonEmpty => variantV2Repository.searchByName(q) - case _ => Future.successful(Seq.empty) - } - existingVariantIds <- haplogroupVariantRepository.getHaplogroupVariants(haplogroupId).map(_.flatMap(_.variantId).toSet) - } yield { - // Filter out variants that are already associated - val availableVariants = variants.filterNot(v => v.variantId.exists(existingVariantIds.contains)) - - haplogroupOpt match { - case Some(haplogroup) => - Ok(views.html.curator.haplogroups.variantSearchResults(haplogroupId, haplogroup.name, query, availableVariants)) - case None => - NotFound("Haplogroup not found") - } - } - } - - def addVariantToHaplogroup(haplogroupId: Int, variantId: Int): Action[AnyContent] = - withPermission("haplogroup.update").async { implicit request => - for { - hvId <- haplogroupVariantRepository.addVariantToHaplogroup(haplogroupId, variantId) - _ <- auditService.logVariantAddedToHaplogroup( - request.user.email.getOrElse(request.user.id.map(_.toString).getOrElse("unknown")), - hvId, - Some(s"Added variant $variantId to haplogroup $haplogroupId") - ) - // Fetch updated variants for display - variants <- haplogroupVariantRepository.getHaplogroupVariants(haplogroupId) - } yield { - Ok(views.html.curator.haplogroups.variantsPanel(haplogroupId, variants)) - .withHeaders("HX-Trigger" -> "variantAdded") - } - } - - def removeVariantFromHaplogroup(haplogroupId: Int, variantId: Int): Action[AnyContent] = - withPermission("haplogroup.update").async { implicit request => - for { - removed <- haplogroupVariantRepository.removeVariantFromHaplogroup(haplogroupId, variantId) - // Fetch updated variants for display - variants <- haplogroupVariantRepository.getHaplogroupVariants(haplogroupId) - } yield { - if (removed > 0) { - Ok(views.html.curator.haplogroups.variantsPanel(haplogroupId, variants)) - .withHeaders("HX-Trigger" -> "variantRemoved") - } else { - BadRequest("Failed to remove variant") - } - } - } - - def haplogroupVariantHistory(haplogroupVariantId: Int): Action[AnyContent] = - withPermission("audit.view").async { implicit request => - auditService.getHaplogroupVariantHistory(haplogroupVariantId).map { history => - Ok(views.html.curator.haplogroups.variantHistoryPanel(haplogroupVariantId, history)) - } - } - - // === Tree Restructuring === - - def splitBranchForm(parentId: Int): Action[AnyContent] = - withPermission("haplogroup.update").async { implicit request => - treeRestructuringService.getSplitPreview(parentId).map { preview => - Ok(views.html.curator.haplogroups.splitBranchForm(preview.parent, preview.variants, preview.children, splitBranchForm)) - }.recover { - case e: IllegalArgumentException => - NotFound(e.getMessage) - } - } - - def splitBranch(parentId: Int): Action[AnyContent] = - withPermission("haplogroup.update").async { implicit request => - treeRestructuringService.getSplitPreview(parentId).flatMap { preview => - splitBranchForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(views.html.curator.haplogroups.splitBranchForm( - preview.parent, preview.variants, preview.children, formWithErrors - ))) - }, - data => { - val newHaplogroup = Haplogroup( - id = None, - name = data.name, - lineage = data.lineage, - description = data.description, - haplogroupType = preview.parent.haplogroupType, - revisionId = 1, - source = data.source, - confidenceLevel = data.confidenceLevel, - validFrom = LocalDateTime.now(), - validUntil = None - ) - - treeRestructuringService.splitBranch( - parentId, - newHaplogroup, - data.variantIds, - data.childIds, - request.user.id.get - ).map { newId => - Redirect(routes.CuratorController.listHaplogroups(None, None, 1, 20)) - .flashing("success" -> s"Created subclade '${data.name}' under '${preview.parent.name}'") - }.recover { - case e: IllegalArgumentException => - BadRequest(views.html.curator.haplogroups.splitBranchForm( - preview.parent, preview.variants, preview.children, - splitBranchForm.fill(data).withGlobalError(e.getMessage) - )) - } - } - ) - } - } - - def mergeConfirmForm(childId: Int): Action[AnyContent] = - withPermission("haplogroup.update").async { implicit request => - treeRestructuringService.getMergePreview(childId).map { preview => - Ok(views.html.curator.haplogroups.mergeConfirmForm(preview)) - }.recover { - case e: IllegalArgumentException => - NotFound(e.getMessage) - } - } - - def mergeIntoParent(childId: Int): Action[AnyContent] = - withPermission("haplogroup.update").async { implicit request => - treeRestructuringService.mergeIntoParent(childId, request.user.id.get).map { parentId => - Redirect(routes.CuratorController.haplogroupDetailPanel(parentId)) - .withHeaders("HX-Trigger" -> "haplogroupMerged") - }.recover { - case e: IllegalArgumentException => - BadRequest(e.getMessage) - } - } - - /** - * Show the reparent form for a haplogroup. - * Displays the current parent and allows selecting a new one. - */ - def reparentForm(id: Int): Action[AnyContent] = - withPermission("haplogroup.update").async { implicit request => - for { - haplogroupOpt <- haplogroupRepository.findById(id) - currentParentOpt <- haplogroupOpt match { - case Some(hg) => haplogroupRepository.getParent(id) - case None => Future.successful(None) - } - siblings <- haplogroupOpt match { - case Some(hg) => haplogroupRepository.search("", Some(hg.haplogroupType), 10000, 0) - case None => Future.successful(Seq.empty) - } - } yield { - haplogroupOpt match { - case Some(haplogroup) => - // Filter out the haplogroup itself and its descendants to avoid cycles - val potentialParents = siblings.filterNot(_.id == haplogroup.id) - Ok(views.html.curator.haplogroups.reparentForm(haplogroup, currentParentOpt, potentialParents, reparentForm)) - case None => - NotFound("Haplogroup not found") - } - } - } - - /** - * Process the reparent form submission. - * Validates the new parent and updates the relationship. - */ - def reparent(id: Int): Action[AnyContent] = - withPermission("haplogroup.update").async { implicit request => - haplogroupRepository.findById(id).flatMap { - case None => - Future.successful(NotFound("Haplogroup not found")) - - case Some(haplogroup) => - reparentForm.bindFromRequest().fold( - formWithErrors => { - for { - currentParent <- haplogroupRepository.getParent(id) - siblings <- haplogroupRepository.search("", Some(haplogroup.haplogroupType), 10000, 0) - } yield { - val potentialParents = siblings.filterNot(_.id == haplogroup.id) - BadRequest(views.html.curator.haplogroups.reparentForm(haplogroup, currentParent, potentialParents, formWithErrors)) - } - }, - data => { - // Validate the new parent - haplogroupRepository.findById(data.newParentId).flatMap { - case None => - for { - currentParent <- haplogroupRepository.getParent(id) - siblings <- haplogroupRepository.search("", Some(haplogroup.haplogroupType), 10000, 0) - } yield { - val potentialParents = siblings.filterNot(_.id == haplogroup.id) - val errorForm = reparentForm.fill(data).withGlobalError("Selected parent does not exist") - BadRequest(views.html.curator.haplogroups.reparentForm(haplogroup, currentParent, potentialParents, errorForm)) - } - - case Some(newParent) if newParent.haplogroupType != haplogroup.haplogroupType => - for { - currentParent <- haplogroupRepository.getParent(id) - siblings <- haplogroupRepository.search("", Some(haplogroup.haplogroupType), 10000, 0) - } yield { - val potentialParents = siblings.filterNot(_.id == haplogroup.id) - val errorForm = reparentForm.fill(data).withGlobalError( - s"Parent haplogroup type (${newParent.haplogroupType}) must match child type (${haplogroup.haplogroupType})" - ) - BadRequest(views.html.curator.haplogroups.reparentForm(haplogroup, currentParent, potentialParents, errorForm)) - } - - case Some(newParent) if newParent.id == haplogroup.id => - for { - currentParent <- haplogroupRepository.getParent(id) - siblings <- haplogroupRepository.search("", Some(haplogroup.haplogroupType), 10000, 0) - } yield { - val potentialParents = siblings.filterNot(_.id == haplogroup.id) - val errorForm = reparentForm.fill(data).withGlobalError("Cannot set a haplogroup as its own parent") - BadRequest(views.html.curator.haplogroups.reparentForm(haplogroup, currentParent, potentialParents, errorForm)) - } - - case Some(newParent) => - // Check for cycles: newParent cannot be a descendant of haplogroup - haplogroupRepository.getDescendants(id).flatMap { descendants => - if (descendants.exists(_.id == newParent.id)) { - for { - currentParent <- haplogroupRepository.getParent(id) - siblings <- haplogroupRepository.search("", Some(haplogroup.haplogroupType), 10000, 0) - } yield { - val potentialParents = siblings.filterNot(_.id == haplogroup.id) - val errorForm = reparentForm.fill(data).withGlobalError( - "Cannot reparent: the selected parent is a descendant of this haplogroup (would create a cycle)" - ) - BadRequest(views.html.curator.haplogroups.reparentForm(haplogroup, currentParent, potentialParents, errorForm)) - } - } else { - for { - oldParentOpt <- haplogroupRepository.getParent(id) - _ <- haplogroupRepository.updateParent(id, data.newParentId, data.source) - _ <- auditService.logHaplogroupReparent( - request.user.id.get, - haplogroup, - oldParentOpt, - newParent, - data.reason - ) - } yield { - Redirect(routes.CuratorController.listHaplogroups(None, None, 1, 20)) - .flashing("success" -> s"${haplogroup.name} reparented under ${newParent.name}") - } - } - } - } - } - ) - } - } -} diff --git a/app/controllers/DiscoveryApiController.scala b/app/controllers/DiscoveryApiController.scala deleted file mode 100644 index 5ab8e79d..00000000 --- a/app/controllers/DiscoveryApiController.scala +++ /dev/null @@ -1,171 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.domain.discovery.ProposedBranchStatus -import play.api.Logging -import play.api.libs.json.{Json, OFormat} -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import services.{DiscoveryProposalService, TreeEvolutionService} - -import scala.concurrent.{ExecutionContext, Future} - -/** - * API controller for the Haplogroup Discovery system. - * Provides endpoints for listing/viewing proposals and curator actions (accept/reject). - */ -@Singleton -class DiscoveryApiController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - discoveryService: DiscoveryProposalService, - treeEvolutionService: TreeEvolutionService -)(implicit ec: ExecutionContext) extends BaseController with Logging { - - // Audit identity for API-key-authenticated actions - private val ApiCuratorId = "api-system" - - // Request DTOs - case class AcceptProposalRequest(proposedName: String, reason: Option[String] = None) - object AcceptProposalRequest { implicit val format: OFormat[AcceptProposalRequest] = Json.format } - - case class RejectProposalRequest(reason: String) - object RejectProposalRequest { implicit val format: OFormat[RejectProposalRequest] = Json.format } - - /** - * List proposals with optional filters. - * GET /api/v1/discovery/proposals?type=Y&status=READY_FOR_REVIEW - */ - def listProposals( - haplogroupType: Option[String], - status: Option[String] - ): Action[AnyContent] = secureApi.async { _ => - val hgType = haplogroupType.flatMap(parseHaplogroupType) - val pbStatus = status.flatMap(ProposedBranchStatus.fromString) - - discoveryService.listProposals(hgType, pbStatus).map { proposals => - Ok(Json.obj( - "proposals" -> proposals, - "total" -> proposals.size - )) - }.recover { - case e: Exception => - logger.error(s"Error listing proposals: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Get proposal details with variants, evidence, and audit trail. - * GET /api/v1/discovery/proposals/:id - */ - def getProposalDetails(id: Int): Action[AnyContent] = secureApi.async { _ => - discoveryService.getProposalDetails(id).map { - case Some(details) => Ok(Json.toJson(details)) - case None => NotFound(Json.obj("error" -> s"Proposal $id not found")) - }.recover { - case e: Exception => - logger.error(s"Error getting proposal $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Start review of a proposal. - * POST /api/v1/discovery/proposals/:id/start-review - */ - def startReview(id: Int): Action[AnyContent] = - secureApi.async { request => - discoveryService.startReview(id, ApiCuratorId).map { proposal => - Ok(Json.toJson(proposal)) - }.recover { - case e: NoSuchElementException => - NotFound(Json.obj("error" -> e.getMessage)) - case e: IllegalStateException => - BadRequest(Json.obj("error" -> e.getMessage)) - case e: Exception => - logger.error(s"Error starting review for proposal $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Accept a proposal. - * POST /api/v1/discovery/proposals/:id/accept - */ - def acceptProposal(id: Int): Action[AcceptProposalRequest] = - secureApi.jsonAction[AcceptProposalRequest].async { request => - val body = request.body - discoveryService.acceptProposal(id, ApiCuratorId, body.proposedName, body.reason).map { proposal => - Ok(Json.toJson(proposal)) - }.recover { - case e: NoSuchElementException => - NotFound(Json.obj("error" -> e.getMessage)) - case e: IllegalStateException => - BadRequest(Json.obj("error" -> e.getMessage)) - case e: Exception => - logger.error(s"Error accepting proposal $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Reject a proposal. - * POST /api/v1/discovery/proposals/:id/reject - */ - def rejectProposal(id: Int): Action[RejectProposalRequest] = - secureApi.jsonAction[RejectProposalRequest].async { request => - val body = request.body - discoveryService.rejectProposal(id, ApiCuratorId, body.reason).map { proposal => - Ok(Json.toJson(proposal)) - }.recover { - case e: NoSuchElementException => - NotFound(Json.obj("error" -> e.getMessage)) - case e: IllegalStateException => - BadRequest(Json.obj("error" -> e.getMessage)) - case e: Exception => - logger.error(s"Error rejecting proposal $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Promote an accepted proposal to the canonical haplogroup tree. - * POST /api/v1/discovery/proposals/:id/promote - */ - def promoteProposal(id: Int): Action[AnyContent] = - secureApi.async { request => - treeEvolutionService.promoteProposal(id, ApiCuratorId).map { result => - Ok(Json.toJson(result)) - }.recover { - case e: NoSuchElementException => - NotFound(Json.obj("error" -> e.getMessage)) - case e: IllegalStateException => - BadRequest(Json.obj("error" -> e.getMessage)) - case e: Exception => - logger.error(s"Error promoting proposal $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Get audit trail for a proposal. - * GET /api/v1/discovery/proposals/:id/audit - */ - def getAuditTrail(id: Int): Action[AnyContent] = secureApi.async { _ => - discoveryService.getAuditTrail(id).map { actions => - Ok(Json.toJson(actions)) - }.recover { - case e: Exception => - logger.error(s"Error getting audit trail for proposal $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - private def parseHaplogroupType(s: String): Option[HaplogroupType] = s.toUpperCase match { - case "Y" => Some(HaplogroupType.Y) - case "MT" => Some(HaplogroupType.MT) - case _ => None - } -} diff --git a/app/controllers/ExternalBiosampleController.scala b/app/controllers/ExternalBiosampleController.scala deleted file mode 100644 index d32fdbaf..00000000 --- a/app/controllers/ExternalBiosampleController.scala +++ /dev/null @@ -1,119 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.ExternalBiosampleRequest -import play.api.libs.json.{Json, OFormat} -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import services.* - -import scala.concurrent.ExecutionContext - -case class ApiResponse(status: String) - -object ApiResponse { - implicit val format: OFormat[ApiResponse] = Json.format[ApiResponse] -} - -/** - * Controller for managing external biosample operations, such as creating biosamples. - * - * This controller handles HTTP actions related to external biosamples and interacts with the - * `BiosampleDomainService` to perform operations such as creating a biosample with provided data. - * - * Key functionalities include securing endpoints via `SecureApiAction` and handling JSON payloads - * representing external biosample requests. - * - * @param controllerComponents The Play Framework `ControllerComponents` for handling requests and responses. - * @param secureApi The `SecureApiAction` responsible for securing access to this controller's endpoints. - * @param biosampleDomainService The facade service for all biosample operations. - * @param ec An implicit `ExecutionContext` for handling asynchronous operations. - */ -@Singleton -class ExternalBiosampleController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - biosampleDomainService: BiosampleDomainService - )(implicit ec: ExecutionContext) extends BaseController { - - /** - * Handles an HTTP request to create an external biosample. - * - * This method processes a JSON payload of type `ExternalBiosampleRequest` and invokes the `externalBiosampleService` - * to create a new biosample with the provided data. Upon successful creation, it returns a `201 Created` HTTP response - * containing the GUID of the newly created biosample in JSON format. - * - * @return An asynchronous `Action` that expects a JSON request body of type `ExternalBiosampleRequest` - * and responds with the GUID of the created biosample in JSON format. - */ - def create: Action[ExternalBiosampleRequest] = secureApi.jsonAction[ExternalBiosampleRequest].async { request => - biosampleDomainService.createExternalBiosample(request.body).map { guid => - Created(Json.obj( - "status" -> "success", - "guid" -> guid - )) - }.recover { - case e: DuplicateAccessionException => - Conflict(Json.obj( - "error" -> "Duplicate accession", - "message" -> e.getMessage - )) - - case e: InvalidCoordinatesException => - BadRequest(Json.obj( - "error" -> "Invalid coordinates", - "message" -> e.getMessage - )) - - case e: SequenceDataValidationException => - BadRequest(Json.obj( - "error" -> "Invalid sequence data", - "message" -> e.getMessage - )) - - case e: PublicationLinkageException => - BadRequest(Json.obj( - "error" -> "Publication linkage failed", - "message" -> e.getMessage - )) - - case e: BiosampleServiceException => - BadRequest(Json.obj( - "error" -> "Validation error", - "message" -> e.getMessage - )) - - case e: Exception => - InternalServerError(Json.obj( - "error" -> "Internal server error", - "message" -> "An unexpected error occurred while processing the request" - )) - } - } - - /** - * Handles an HTTP request to delete an external biosample by its accession. - * - * This method processes a request to delete a biosample identified by its unique accession. - * The request must include the `citizenDid` to verify ownership and prevent collisions. - * Upon successful deletion, it returns a `204 No Content` HTTP response. - * If the biosample is not found or the DID does not match, it returns a `404 Not Found` response. - * - * @param accession The unique accession of the biosample to be deleted. - * @param citizenDid The DID of the citizen who owns the biosample. - * @return An asynchronous `Action` that responds with `204 No Content`, `404 Not Found`, - * or `500 Internal Server Error` in case of an unexpected error. - */ - def delete(accession: String, citizenDid: String): Action[AnyContent] = secureApi.async { - biosampleDomainService.deleteBiosample(accession, citizenDid).map { - case true => NoContent - case false => NotFound(Json.obj("error" -> "Biosample not found", "message" -> s"Biosample with accession '$accession' and DID '$citizenDid' not found or mismatch.")) - }.recover { - case e: Exception => - InternalServerError(Json.obj( - "error" -> "Internal server error", - "message" -> s"An unexpected error occurred while attempting to delete biosample with accession '$accession'" - )) - } - } -} \ No newline at end of file diff --git a/app/controllers/FirehoseController.scala b/app/controllers/FirehoseController.scala deleted file mode 100644 index e101613a..00000000 --- a/app/controllers/FirehoseController.scala +++ /dev/null @@ -1,43 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import play.api.libs.json.Json -import play.api.mvc.{Action, BaseController, ControllerComponents} -import services.firehose.{AtmosphereEventHandler, FirehoseEvent, FirehoseResult} // Removed individual event imports - -import scala.concurrent.{ExecutionContext, Future} -import play.api.libs.json.JsValue - -@Singleton -class FirehoseController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - atmosphereEventHandler: AtmosphereEventHandler - )(implicit ec: ExecutionContext) extends BaseController { - - def processEvent: Action[JsValue] = secureApi.jsonAction[JsValue].async { request => - val json = request.body - - // Use the discriminator-based JSON Reads for FirehoseEvent - val event: Option[FirehoseEvent] = json.validate[FirehoseEvent].asOpt - - event match { - case Some(e) => - atmosphereEventHandler.handle(e).map { - case FirehoseResult.Success(_, _, guid, msg) => - Ok(Json.obj("status" -> "success", "message" -> msg, "guid" -> guid)) - case FirehoseResult.Conflict(_, msg) => - Conflict(Json.obj("error" -> msg)) - case FirehoseResult.NotFound(uri) => - NotFound(Json.obj("error" -> s"Not found: $uri")) - case FirehoseResult.ValidationError(_, msg) => - BadRequest(Json.obj("error" -> msg)) - case FirehoseResult.Error(_, msg, _) => - InternalServerError(Json.obj("error" -> msg)) - } - case None => - Future.successful(BadRequest(Json.obj("error" -> "Unknown or invalid event structure"))) - } - } -} \ No newline at end of file diff --git a/app/controllers/GenomeRegionsApiController.scala b/app/controllers/GenomeRegionsApiController.scala deleted file mode 100644 index a5a21eae..00000000 --- a/app/controllers/GenomeRegionsApiController.scala +++ /dev/null @@ -1,82 +0,0 @@ -package controllers - -import jakarta.inject.{Inject, Singleton} -import models.api.genomics.SupportedBuildsResponse -import play.api.libs.json.Json -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import services.GenomeRegionsService - -import scala.concurrent.ExecutionContext -import scala.concurrent.duration.* - -/** - * Public API controller for genome region data. - * Provides reference genome structural annotations including centromeres, - * telomeres, cytobands, and Y-chromosome specific regions. - */ -@Singleton -class GenomeRegionsApiController @Inject()( - val controllerComponents: ControllerComponents, - genomeRegionsService: GenomeRegionsService -)(implicit ec: ExecutionContext) extends BaseController { - - private val CacheMaxAge = 7.days.toSeconds - - /** - * List supported genome builds. - * GET /api/v1/genome-regions - */ - def listBuilds(): Action[AnyContent] = Action { - val response = SupportedBuildsResponse( - supportedBuilds = genomeRegionsService.getSupportedBuilds, - version = "2024.12.1" // Default version - ) - Ok(Json.toJson(response)) - } - - /** - * Get genome regions for a specific build. - * GET /api/v1/genome-regions/:build - * - * Supports ETag-based caching for efficient client-side caching. - */ - def getRegions(build: String): Action[AnyContent] = Action.async { request => - // First get the ETag for this build - genomeRegionsService.getETag(build).flatMap { etagOpt => - val clientEtag = request.headers.get("If-None-Match") - - // Check if client already has current version - (etagOpt, clientEtag) match { - case (Some(serverEtag), Some(requestEtag)) if serverEtag == requestEtag => - // Client has current version - return 304 Not Modified - scala.concurrent.Future.successful( - NotModified.withHeaders( - "ETag" -> serverEtag, - "Cache-Control" -> s"public, max-age=$CacheMaxAge" - ) - ) - - case _ => - // Need to return full response - genomeRegionsService.getRegions(build).map { - case Right(response) => - val headers = Seq( - "Cache-Control" -> s"public, max-age=$CacheMaxAge", - "Vary" -> "Accept-Encoding" - ) ++ etagOpt.map("ETag" -> _) - - Ok(Json.toJson(response)).withHeaders(headers*) - - case Left(error) => - NotFound(Json.toJson(error)) - } - } - }.recover { - case e: Exception => - InternalServerError(Json.obj( - "error" -> "Internal error", - "message" -> "Failed to load region data" - )) - } - } -} diff --git a/app/controllers/GenomeRegionsApiManagementController.scala b/app/controllers/GenomeRegionsApiManagementController.scala deleted file mode 100644 index 5ac3104f..00000000 --- a/app/controllers/GenomeRegionsApiManagementController.scala +++ /dev/null @@ -1,90 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.genomics.* -import play.api.Logging -import play.api.libs.json.Json -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import services.GenomeRegionsManagementService - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Private API controller for managing genome regions. - * Secured with X-API-Key authentication. - * - * API changes are logged as "system" user in the audit log. - */ -@Singleton -class GenomeRegionsApiManagementController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - managementService: GenomeRegionsManagementService, - ingestionService: services.genomics.GenomeRegionIngestionService -)(implicit ec: ExecutionContext) extends BaseController with Logging { - - // ============================================================================ - // GenomeRegion Endpoints - // ============================================================================ - - def bootstrap(): Action[AnyContent] = secureApi.async { _ => - logger.info("API: Bootstrapping genome regions from CHM13v2.0 sources") - ingestionService.bootstrap().map { _ => - Ok(Json.obj("message" -> "Genome regions bootstrapping completed successfully")) - } - } - - def listRegions(regionType: Option[String], build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = secureApi.async { _ => - managementService.listRegions(regionType, build, page, pageSize).map { response => - Ok(Json.toJson(response)) - } - } - - def getRegion(id: Int): Action[AnyContent] = secureApi.async { _ => - managementService.getRegion(id).map { - case Some(region) => Ok(Json.toJson(region)) - case None => NotFound(Json.obj("error" -> "Region not found")) - } - } - - def createRegion(): Action[CreateGenomeRegionRequest] = - secureApi.jsonAction[CreateGenomeRegionRequest].async { request => - logger.info(s"API: Creating genome region") - managementService.createRegion(request.body, None).map { - case Right(region) => Created(Json.toJson(region)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def updateRegion(id: Int): Action[UpdateGenomeRegionRequest] = - secureApi.jsonAction[UpdateGenomeRegionRequest].async { request => - logger.info(s"API: Updating genome region $id") - managementService.updateRegion(id, request.body, None).map { - case Right(region) => Ok(Json.toJson(region)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def deleteRegion(id: Int): Action[AnyContent] = secureApi.async { _ => - logger.info(s"API: Deleting genome region $id") - managementService.deleteRegion(id, None).map { - case Right(_) => NoContent - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - private val MaxBulkRegions = 1000 - - def bulkCreateRegions(): Action[BulkCreateGenomeRegionsRequest] = - secureApi.jsonAction[BulkCreateGenomeRegionsRequest].async { request => - if (request.body.regions.size > MaxBulkRegions) { - Future.successful(BadRequest(Json.obj("error" -> s"Bulk create limited to $MaxBulkRegions regions per request"))) - } else { - logger.info(s"API: Bulk creating ${request.body.regions.size} genome regions") - managementService.bulkCreateRegions(request.body, None).map { response => - Ok(Json.toJson(response)) - } - } - } -} \ No newline at end of file diff --git a/app/controllers/GenomeRegionsCuratorController.scala b/app/controllers/GenomeRegionsCuratorController.scala deleted file mode 100644 index 9a47d339..00000000 --- a/app/controllers/GenomeRegionsCuratorController.scala +++ /dev/null @@ -1,240 +0,0 @@ -package controllers - -import actions.{AuthenticatedAction, AuthenticatedRequest, PermissionAction} -import config.GenomicsConfig -import jakarta.inject.{Inject, Singleton} -import models.api.genomics.* -import models.domain.genomics.GenbankContig -import org.webjars.play.WebJarsUtil -import play.api.Logging -import play.api.data.Form -import play.api.data.Forms.* -import play.api.i18n.I18nSupport -import play.api.libs.json.Json -import play.api.mvc.* -import repositories.GenbankContigRepository -import services.GenomeRegionsManagementService - -import scala.concurrent.{ExecutionContext, Future} - -// Form data classes -case class GenomeRegionFormData( - genbankContigId: Int, - regionType: String, - name: Option[String], - startPos: Long, - endPos: Long, - modifier: Option[BigDecimal] -) - -/** - * UI Controller for managing genome regions. - * Uses session-based authentication with permission checks. - */ -@Singleton -class GenomeRegionsCuratorController @Inject()( - val controllerComponents: ControllerComponents, - protected val authenticatedAction: AuthenticatedAction, - protected val permissionAction: PermissionAction, - managementService: GenomeRegionsManagementService, - genbankContigRepository: GenbankContigRepository, - genomicsConfig: GenomicsConfig -)(implicit ec: ExecutionContext, webJarsUtil: WebJarsUtil) - extends BaseController with I18nSupport with Logging with BaseCuratorController { - - // Forms - private val genomeRegionForm: Form[GenomeRegionFormData] = Form( - mapping( - "genbankContigId" -> number, - "regionType" -> nonEmptyText(1, 30), - "name" -> optional(text(maxLength = 50)), - "startPos" -> longNumber(min = 0), - "endPos" -> longNumber(min = 0), - "modifier" -> optional(bigDecimal(3, 2)) - )(GenomeRegionFormData.apply)(g => Some((g.genbankContigId, g.regionType, g.name, g.startPos, g.endPos, g.modifier))) - ) - - // ============================================================================ - // GenomeRegion UI Endpoints - // ============================================================================ - - def listRegions(regionType: Option[String], build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = - withPermission("genome_region.view").async { implicit request => - for { - response <- managementService.listRegions(regionType, build, page, pageSize) - } yield { - val totalPages = Math.max(1, (response.total + pageSize - 1) / pageSize) - Ok(views.html.curator.genomeregions.list(response.regions, build, page, totalPages, pageSize, response.total, genomicsConfig.supportedReferences)) - } - } - - def regionsFragment(regionType: Option[String], build: Option[String], page: Int = 1, pageSize: Int = 25): Action[AnyContent] = - withPermission("genome_region.view").async { implicit request => - for { - response <- managementService.listRegions(regionType, build, page, pageSize) - } yield { - val totalPages = Math.max(1, (response.total + pageSize - 1) / pageSize) - Ok(views.html.curator.genomeregions.listFragment(response.regions, build, page, totalPages, pageSize, response.total)) - } - } - - def regionDetailPanel(id: Int): Action[AnyContent] = - withPermission("genome_region.view").async { implicit request => - managementService.getRegion(id).map { - case Some(region) => Ok(views.html.curator.genomeregions.detailPanel(region)) - case None => NotFound("Region not found") - } - } - - def createRegionForm: Action[AnyContent] = - withPermission("genome_region.create").async { implicit request => - getContigsForForm.map { contigs => - Ok(views.html.curator.genomeregions.createForm(genomeRegionForm, contigs, genomicsConfig.supportedReferences)) - } - } - - def createRegion: Action[AnyContent] = - withPermission("genome_region.create").async { implicit request => - genomeRegionForm.bindFromRequest().fold( - formWithErrors => { - getContigsForForm.map { contigs => - BadRequest(views.html.curator.genomeregions.createForm(formWithErrors, contigs, genomicsConfig.supportedReferences)) - } - }, - formData => { - // Resolve contig to get build name and common name - genbankContigRepository.findById(formData.genbankContigId).flatMap { - case Some(contig) => - val build = contig.referenceGenome.getOrElse("unknown") - val contigName = contig.commonName.getOrElse("unknown") - - val createRequest = CreateGenomeRegionRequest( - regionType = formData.regionType, - name = formData.name, - coordinates = Map(build -> RegionCoordinateDto(contigName, formData.startPos, formData.endPos)), - properties = formData.modifier.map(m => Json.obj("modifier" -> m)) - ) - - managementService.createRegion(createRequest, request.user.id).map { - case Right(_) => - Redirect(routes.GenomeRegionsCuratorController.listRegions(None, None, 1, 25)) - .flashing("success" -> "Genome region created successfully") - case Left(error) => - getContigsForFormSync.map { contigs => - BadRequest(views.html.curator.genomeregions.createForm( - genomeRegionForm.fill(formData).withGlobalError(error), - contigs, - genomicsConfig.supportedReferences - )) - }.getOrElse(BadRequest(error)) - } - case None => - Future.successful(BadRequest("Invalid contig ID")) - } - } - ) - } - - def editRegionForm(id: Int): Action[AnyContent] = - withPermission("genome_region.update").async { implicit request => - for { - regionOpt <- managementService.getRegion(id) - contigs <- getContigsForForm - } yield regionOpt match { - case Some(region) => - // Try to map back to form data using the first coordinate found (limitation of this UI) - val (build, coord) = region.coordinates.headOption.getOrElse("unknown" -> RegionCoordinateDto("", 0, 0)) - // We need a genbankContigId for the form dropdown. - // This is tricky without a reverse lookup or storing it. - // For now, we might leave it 0 or try to find it in the list of contigs if possible. - // Or just pick the first contig that matches name and build. - val contigId = contigs.find(c => c.commonName.contains(coord.contig) && c.referenceGenome.contains(build)) - .flatMap(_.id).getOrElse(0) - - val modifier = (region.properties \ "modifier").asOpt[BigDecimal] - - val formData = GenomeRegionFormData( - contigId, - region.regionType, - region.name, - coord.start, - coord.end, - modifier - ) - Ok(views.html.curator.genomeregions.editForm(id, genomeRegionForm.fill(formData), contigs, genomicsConfig.supportedReferences)) - case None => - NotFound("Region not found") - } - } - - def updateRegion(id: Int): Action[AnyContent] = - withPermission("genome_region.update").async { implicit request => - genomeRegionForm.bindFromRequest().fold( - formWithErrors => { - getContigsForForm.map { contigs => - BadRequest(views.html.curator.genomeregions.editForm(id, formWithErrors, contigs, genomicsConfig.supportedReferences)) - } - }, - formData => { - genbankContigRepository.findById(formData.genbankContigId).flatMap { - case Some(contig) => - val build = contig.referenceGenome.getOrElse("unknown") - val contigName = contig.commonName.getOrElse("unknown") - - val updateRequest = UpdateGenomeRegionRequest( - regionType = Some(formData.regionType), - name = formData.name, - // Merging coordinates is complex. This simplistic update might overwrite other builds' coordinates - // if the service replaces the map. The Service logic currently REPLACES if provided. - // To support multi-build editing, the UI needs to change. - // For now, we assume single-build editing flow. - coordinates = Some(Map(build -> RegionCoordinateDto(contigName, formData.startPos, formData.endPos))), - properties = formData.modifier.map(m => Json.obj("modifier" -> m)) - ) - - managementService.updateRegion(id, updateRequest, request.user.id).map { - case Right(_) => - Redirect(routes.GenomeRegionsCuratorController.listRegions(None, None, 1, 25)) - .flashing("success" -> "Genome region updated successfully") - case Left(error) => - getContigsForFormSync.map { contigs => - BadRequest(views.html.curator.genomeregions.editForm( - id, - genomeRegionForm.fill(formData).withGlobalError(error), - contigs, - genomicsConfig.supportedReferences - )) - }.getOrElse(BadRequest(error)) - } - case None => Future.successful(BadRequest("Invalid Contig")) - } - } - ) - } - - def deleteRegion(id: Int): Action[AnyContent] = - withPermission("genome_region.delete").async { implicit request => - managementService.deleteRegion(id, request.user.id).map { - case Right(_) => - Ok("").withHeaders("HX-Trigger" -> "regionDeleted") - case Left(error) => - BadRequest(error) - } - } - - // ============================================================================ - // Helper Methods - // ============================================================================ - - private def getContigsForForm: Future[Seq[GenbankContig]] = { - // Get all contigs - they're pre-filtered by reference genome in the repository - genbankContigRepository.getAll.map { contigs => - contigs.filter(c => c.referenceGenome.exists(genomicsConfig.supportedReferences.contains)) - } - } - - private def getContigsForFormSync: Option[Seq[GenbankContig]] = { - // This is a fallback for sync error handling - not ideal but simple - None - } -} \ No newline at end of file diff --git a/app/controllers/GenomicsAdminController.scala b/app/controllers/GenomicsAdminController.scala deleted file mode 100644 index c4be6e00..00000000 --- a/app/controllers/GenomicsAdminController.scala +++ /dev/null @@ -1,105 +0,0 @@ -package controllers - -import actors.YBrowseVariantUpdateActor -import actors.YBrowseVariantUpdateActor.{RunUpdate, UpdateResult} -import actions.{AuthenticatedAction, RoleAction} -import jakarta.inject.{Inject, Named, Singleton} -import org.apache.pekko.actor.ActorRef -import org.apache.pekko.pattern.ask -import org.apache.pekko.util.Timeout -import org.webjars.play.WebJarsUtil -import play.api.Logging -import play.api.i18n.I18nSupport -import play.api.libs.json.{Json, OWrites} -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} - -import scala.concurrent.duration.* -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class GenomicsAdminController @Inject()( - val controllerComponents: ControllerComponents, - authenticatedAction: AuthenticatedAction, - roleAction: RoleAction, - @Named("ybrowse-variant-update-actor") ybrowseUpdateActor: ActorRef, - hipstrService: services.genomics.HipStrReferenceIngestionService, - regionIngestionService: services.genomics.GenomeRegionIngestionService -)(implicit ec: ExecutionContext, webJarsUtil: WebJarsUtil) extends BaseController with Logging with I18nSupport { - - implicit val timeout: Timeout = Timeout(10.minutes) - - implicit val updateResultWrites: OWrites[UpdateResult] = Json.writes[UpdateResult] - - private def AdminAction = authenticatedAction andThen roleAction("Admin") - - /** - * Admin dashboard for genomics operations. - */ - def dashboard(): Action[AnyContent] = AdminAction.async { implicit request => - Future.successful(Ok(views.html.admin.genomics.dashboard())) - } - - /** - * Trigger on-demand HipSTR reference update. - */ - def triggerHipStrUpdate(): Action[AnyContent] = AdminAction.async { implicit request => - logger.info(s"Admin ${request.user.id.get} triggered HipSTR reference update") - - // Run in background - hipstrService.bootstrap().onComplete { - case scala.util.Success(count) => logger.info(s"HipSTR update completed: $count variants") - case scala.util.Failure(e) => logger.error(s"HipSTR update failed", e) - } - - Future.successful(Ok(Json.obj("message" -> "HipSTR update started"))) - } - - /** - * Trigger on-demand Genome Regions bootstrap. - */ - def triggerRegionsBootstrap(): Action[AnyContent] = AdminAction.async { implicit request => - logger.info(s"Admin ${request.user.id.get} triggered Genome Regions bootstrap") - - // Run in background - regionIngestionService.bootstrap().onComplete { - case scala.util.Success(_) => logger.info(s"Genome Regions bootstrap completed successfully") - case scala.util.Failure(e) => logger.error(s"Genome Regions bootstrap failed", e) - } - - Future.successful(Ok(Json.obj("message" -> "Regions bootstrap started"))) - } - - /** - * Trigger on-demand YBrowse variant update. - * Only accessible by users with Admin role. - * Fire-and-forget: returns immediately, job runs in background. - */ - def triggerYBrowseUpdate(): Action[AnyContent] = AdminAction.async { implicit request => - logger.info(s"Admin ${request.user.id.get} triggered YBrowse variant update") - - // Use a short timeout just to check if job started or was rejected - implicit val shortTimeout: Timeout = Timeout(5.seconds) - - (ybrowseUpdateActor ? RunUpdate).mapTo[UpdateResult].map { result => - if (result.message.contains("already in progress")) { - Ok(Json.toJson(result)) - } else if (result.success) { - Ok(Json.toJson(result)) - } else { - InternalServerError(Json.toJson(result)) - } - }.recover { - case _: org.apache.pekko.pattern.AskTimeoutException => - Ok(Json.obj( - "success" -> true, - "variantsIngested" -> 0, - "message" -> "Update started. Monitor server logs for progress." - )) - case ex: Exception => - logger.error("YBrowse update request failed", ex) - InternalServerError(Json.obj( - "error" -> "An internal error occurred while starting the update." - )) - } - } -} diff --git a/app/controllers/HaplogroupTreeMergeController.scala b/app/controllers/HaplogroupTreeMergeController.scala deleted file mode 100644 index 18dafdcc..00000000 --- a/app/controllers/HaplogroupTreeMergeController.scala +++ /dev/null @@ -1,123 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.haplogroups.* -import play.api.Logging -import play.api.libs.json.Json -import play.api.mvc.{Action, BaseController, ControllerComponents} -import services.HaplogroupTreeMergeService - -import scala.concurrent.{ExecutionContext, Future} - -/** - * API controller for haplogroup tree merge operations. - * Secured with X-API-Key authentication. - * - * Endpoints: - * - POST /api/v1/manage/haplogroups/merge - Full tree merge - * - POST /api/v1/manage/haplogroups/merge/subtree - Subtree merge under anchor - * - POST /api/v1/manage/haplogroups/merge/preview - Preview merge without changes - */ -@Singleton -class HaplogroupTreeMergeController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - mergeService: HaplogroupTreeMergeService -)(implicit ec: ExecutionContext) extends BaseController with Logging { - - /** - * Merge a full haplogroup tree, replacing the existing tree for the given type. - * - * Request body: TreeMergeRequest - * - haplogroupType: "Y" or "MT" - * - sourceTree: Nested PhyloNodeInput tree structure - * - sourceName: Attribution source (e.g., "ytree.net", "ISOGG") - * - priorityConfig: Optional source priority ordering - * - conflictStrategy: Optional conflict resolution strategy - * - dryRun: If true, simulates merge without applying changes - */ - def mergeFullTree(): Action[TreeMergeRequest] = - secureApi.jsonAction[TreeMergeRequest].async { request => - logger.info(s"API: Full tree merge for ${request.body.haplogroupType} from ${request.body.sourceName}" + - (if (request.body.dryRun) " (dry run)" else "")) - - // Initiate the merge in the background - mergeService.mergeFullTree(request.body).onComplete { - case scala.util.Success(response) => - logger.info(s"Background full tree merge completed for ${request.body.haplogroupType} with success: ${response.success}. Stats: ${response.statistics}") - if (!response.success) { - logger.error(s"Background full tree merge completed with errors for ${request.body.haplogroupType}: ${response.errors}") - } - case scala.util.Failure(e) => - logger.error(s"Background full tree merge failed for ${request.body.haplogroupType}: ${e.getMessage}", e) - }(ec) // Ensure onComplete runs on the correct execution context - - // Immediately return an Accepted response - Future.successful(Accepted(Json.obj( - "status" -> "Processing", - "message" -> s"Full tree merge for ${request.body.haplogroupType} initiated and is running in the background. Check logs for details." - ))) - } - - /** - * Merge a subtree under a specific anchor haplogroup. - * - * Request body: SubtreeMergeRequest - * - haplogroupType: "Y" or "MT" - * - anchorHaplogroupName: Name of the haplogroup to merge under - * - sourceTree: Nested PhyloNodeInput tree structure - * - sourceName: Attribution source - * - priorityConfig: Optional source priority ordering - * - conflictStrategy: Optional conflict resolution strategy - * - dryRun: If true, simulates merge without applying changes - */ - def mergeSubtree(): Action[SubtreeMergeRequest] = - secureApi.jsonAction[SubtreeMergeRequest].async { request => - logger.info(s"API: Subtree merge under ${request.body.anchorHaplogroupName} " + - s"for ${request.body.haplogroupType} from ${request.body.sourceName}" + - (if (request.body.dryRun) " (dry run)" else "")) - - // Initiate the merge in the background - mergeService.mergeSubtree(request.body).onComplete { - case scala.util.Success(response) => - logger.info(s"Background subtree merge completed for ${request.body.haplogroupType} under ${request.body.anchorHaplogroupName} with success: ${response.success}. Stats: ${response.statistics}") - if (!response.success) { - logger.error(s"Background subtree merge completed with errors for ${request.body.haplogroupType} under ${request.body.anchorHaplogroupName}: ${response.errors}") - } - case scala.util.Failure(e) => - logger.error(s"Background subtree merge failed for ${request.body.haplogroupType} under ${request.body.anchorHaplogroupName}: ${e.getMessage}", e) - }(ec) // Ensure onComplete runs on the correct execution context - - // Immediately return an Accepted response - Future.successful(Accepted(Json.obj( - "status" -> "Processing", - "message" -> s"Subtree merge for ${request.body.haplogroupType} under ${request.body.anchorHaplogroupName} initiated and is running in the background. Check logs for details." - ))) - } - - /** - * Preview a merge operation without applying changes. - * - * Request body: MergePreviewRequest - * - haplogroupType: "Y" or "MT" - * - anchorHaplogroupName: Optional anchor for subtree preview - * - sourceTree: Nested PhyloNodeInput tree structure - * - sourceName: Attribution source - * - priorityConfig: Optional source priority ordering - */ - def previewMerge(): Action[MergePreviewRequest] = - secureApi.jsonAction[MergePreviewRequest].async { request => - logger.info(s"API: Preview merge for ${request.body.haplogroupType} from ${request.body.sourceName}" + - request.body.anchorHaplogroupName.map(a => s" under $a").getOrElse("")) - - mergeService.previewMerge(request.body).map { response => - Ok(Json.toJson(response)) - }.recover { case e: Exception => - logger.error(s"Merge preview failed: ${e.getMessage}", e) - InternalServerError(Json.obj( - "error" -> "Preview operation failed" - )) - } - } -} diff --git a/app/controllers/HomeController.scala b/app/controllers/HomeController.scala deleted file mode 100644 index 008b6576..00000000 --- a/app/controllers/HomeController.scala +++ /dev/null @@ -1,202 +0,0 @@ -package controllers - -import org.webjars.play.WebJarsUtil -import play.api.* -import play.api.cache.{Cached, SyncCacheApi} -import play.api.i18n.I18nSupport -import play.api.mvc.* - -import javax.inject.* -import scala.concurrent.duration.DurationInt - -/** - * A controller for handling HTTP requests to the application's main pages. - * - * This class contains actions for rendering HTML pages for various public-facing - * sections of the application, such as the homepage, cookie usage policy, privacy - * policy, terms of service, and public API information. - * - * @param controllerComponents provides the base controller components required by all controllers - * @param webJarsUtil utility for managing web jar assets - */ -@Singleton -class HomeController @Inject()(val controllerComponents: ControllerComponents, - cached: Cached, - cache: SyncCacheApi - ) - (using webJarsUtil: WebJarsUtil) extends BaseController with I18nSupport { - - /** - * Create an Action to render an HTML page. - * - * The configuration in the `routes` file means that this method - * will be called when the application receives a `GET` request with - * a path of `/`. - */ - def index(): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - Ok(views.html.index()) - } - - /** - * Renders the Reputation System explainer page. - */ - def reputation(): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - Ok(views.html.reputation()) - } - - /** - * Renders the guide for submitting haplogroup tree data. - */ - def howToSubmitTreeData(): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - Ok(views.html.content.howToSubmitTreeData()) - } - - /** - * Renders the Cookie Usage Policy page. - * - * This action handles GET requests for the cookie usage policy of the application. - * It loads and displays the static HTML content detailing the application's current, - * future, and potential use of cookies, including compliance with relevant data protection regulations. - * - * @return an action that renders the Cookie Usage Policy view as an HTML response - */ - def cookieUsage(): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - Ok(views.html.cookies()) - } - - /** - * Renders the Privacy Policy page. - * - * This action handles GET requests for the privacy policy of the application. - * It loads and displays a static HTML page outlining the application's policies - * regarding data privacy and protection. - * - * @return an action that renders the Privacy Policy view as an HTML response - */ - def privacy(): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - Ok(views.html.privacyPolicy()) - } - - /** - * Renders the Terms of Use page. - * - * This action handles GET requests for the Terms of Use of the application. - * It loads and displays the static HTML content detailing the application's terms and conditions - * for using the website and its features. - * - * @return an action that renders the Terms of Use view as an HTML response - */ - def terms(): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - Ok(views.html.terms()) - } - - /** - * Renders the FAQ (Frequently Asked Questions) page. - * - * This action handles GET requests to display the FAQ section of the application. - * It loads and renders a static HTML view containing common questions and answers - * related to the application, its features, or its usage. - * - * @return an action that renders the FAQ view as an HTML response - */ - def faq(): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - Ok(views.html.faq()) - } - - /** - * Generates and serves an XML sitemap for the application. The sitemap includes - * a list of predefined static routes that correspond to important pages, such as - * the homepage, cookie usage, terms, privacy policy, public API documentation, - * FAQ, and other significant sections of the website. - * - * The response is an XML document compliant with the sitemap protocol, which - * includes details like the URL location, change frequency, and priority for - * each page. It helps search engines effectively crawl and index the website's - * content. The generated sitemap is cached for 24 hours for performance optimization. - * - * @return an `EssentialAction` that produces the generated sitemap as an XML response with a "200 OK" status. - */ - def sitemap(): EssentialAction = cached.status(_ => "sitemap", 200, 24.hours) { - Action { implicit request => - val staticRoutes = List( - routes.HomeController.index(), - routes.HomeController.cookieUsage(), - routes.HomeController.terms(), - routes.HomeController.privacy(), - routes.HomeController.faq(), - routes.TreeController.ytree(None), - routes.TreeController.mtree(None), - routes.PublicationController.index(), - routes.CoverageController.index(), - routes.ContactController.show() - ) - - val baseUrl = s"${ - if (request.secure) { - "https" - } else { - "http" - } - }: //${request.host}" - - val apiDocsUrl = s""" - | $baseUrl/api/docs - | monthly - | 0.6 - | """.stripMargin - - val xmlContent = - """ - | - |""".stripMargin + - staticRoutes.map { route => - s""" - | ${route.absoluteURL(secure = true)} - | weekly - | 0.8 - | """.stripMargin - }.mkString("\n") + - "\n" + apiDocsUrl + - "\n" - - Ok(xmlContent).as("application/xml") - } - } - - /** - * Generates and serves the `robots.txt` file for the application. - * - * The robots.txt file provides directives to web crawlers about which parts - * of the website are accessible for crawling and indexing. It includes a - * link to the application's sitemap for search engines to discover and index - * the site's significant URLs efficiently. The response is a plain text file - * with appropriate directives for crawlers. - * - * @return an `EssentialAction` that produces the `robots.txt` file as a plain text response - * with a "200 OK" status, including a link to the XML sitemap. - */ - def robots(): EssentialAction = cached.status(_ => "robots", 200, 24.hours) { - Action { implicit request => - val sitemapUrl = routes.HomeController.sitemap().absoluteURL(secure = true) - Ok( - s"""User-agent: * - |Allow: / - | - |Sitemap: $sitemapUrl""".stripMargin - ).as("text/plain") - } - } - - /** - * Health check endpoint for load balancers and container orchestration. - * - * Returns a simple JSON response indicating the application is running. - * This endpoint is used by Docker health checks, Kubernetes probes, - * and load balancer health checks. - * - * @return an action that returns a 200 OK with a JSON health status - */ - def health(): Action[AnyContent] = Action { - Ok("""{"status":"ok"}""").as("application/json") - } -} diff --git a/app/controllers/IbdRelayController.scala b/app/controllers/IbdRelayController.scala deleted file mode 100644 index 55da829a..00000000 --- a/app/controllers/IbdRelayController.scala +++ /dev/null @@ -1,194 +0,0 @@ -package controllers - -import jakarta.inject.{Inject, Singleton} -import org.apache.pekko.stream.Materializer -import org.apache.pekko.stream.scaladsl.{Flow, Sink, Source} -import play.api.Logging -import play.api.libs.json.{Json, JsValue} -import play.api.mvc.* -import services.PdsSignatureVerifier -import services.ibd.{IbdRelaySessionManager, MatchDiscoveryService, RelayMessage, RelaySession} -import repositories.PdsNodeRepository - -import java.time.Instant -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class IbdRelayController @Inject()( - val controllerComponents: ControllerComponents, - sessionManager: IbdRelaySessionManager, - signatureVerifier: PdsSignatureVerifier, - nodeRepo: PdsNodeRepository, - discoveryService: MatchDiscoveryService -)(implicit ec: ExecutionContext, mat: Materializer) extends BaseController with Logging { - - /** - * WebSocket relay endpoint. - * Auth via query params: ?did=...×tamp=...&signature=...&nonce=... - * WebSocket handshake is an HTTP GET, so we can't use headers in the browser/client - * the same way as REST. Query params are the standard approach. - */ - def relay(sessionId: String): WebSocket = WebSocket.acceptOrResult[String, String] { requestHeader => - val did = requestHeader.getQueryString("did") - val timestamp = requestHeader.getQueryString("timestamp") - val signature = requestHeader.getQueryString("signature") - val nonce = requestHeader.getQueryString("nonce") - - (did, timestamp, signature) match { - case (Some(d), Some(ts), Some(sig)) => - authenticateAndConnect(sessionId, d, ts, sig, nonce, requestHeader) - case _ => - Future.successful(Left(Forbidden(Json.obj( - "error" -> "Missing authentication parameters (did, timestamp, signature)" - )))) - } - } - - /** - * Create a relay session when mutual consent is detected. - * Called by the Edge client after confirming mutual consent exists. - */ - case class CreateSessionRequest(matchRequestUri: String) - object CreateSessionRequest { - implicit val format: play.api.libs.json.OFormat[CreateSessionRequest] = Json.format - } - - def createSession(): Action[JsValue] = Action(parse.json).async { request => - // Auth via headers like other PDS endpoints - val headers = request.headers - val did = headers.get("X-PDS-DID") - val sig = headers.get("X-PDS-Signature") - val ts = headers.get("X-PDS-Timestamp") - - (did, sig, ts) match { - case (Some(d), Some(s), Some(t)) => - if (!signatureVerifier.isTimestampValid(t)) { - Future.successful(Unauthorized(Json.obj("error" -> "Timestamp expired"))) - } else { - val bodyHash = signatureVerifier.hashBody(request) - val signingInput = signatureVerifier.buildSigningInput("POST", request.path, t, bodyHash, headers.get("X-PDS-Nonce")) - signatureVerifier.verifySignature(d, signingInput, s).flatMap { - case false => - Future.successful(Unauthorized(Json.obj("error" -> "Invalid signature"))) - case true => - request.body.validate[CreateSessionRequest].fold( - errors => Future.successful(BadRequest(Json.obj("error" -> "Invalid request body"))), - payload => handleCreateSession(d, payload.matchRequestUri) - ) - } - } - case _ => - Future.successful(Unauthorized(Json.obj("error" -> "Missing auth headers"))) - } - } - - private def handleCreateSession(requesterDid: String, matchRequestUri: String): Future[Result] = { - for { - consentOpt <- discoveryService.getConsentStatus(matchRequestUri) - requestOpt <- discoveryService.getMatchRequest(matchRequestUri) - } yield { - (consentOpt, requestOpt) match { - case (None, _) | (_, None) => - NotFound(Json.obj("error" -> "Match request not found")) - case (Some(status), _) if !status.mutualConsent => - Forbidden(Json.obj("error" -> "Mutual consent not established")) - case (Some(_), Some(matchRequest)) => - // Check that the requester is actually a participant - val participantA = matchRequest.requesterDid - val participantB = matchRequest.targetDid.getOrElse("") - if (requesterDid != participantA && requesterDid != participantB) { - Forbidden(Json.obj("error" -> "Not a participant in this match request")) - } else { - sessionManager.findSessionForRequest(matchRequestUri) match { - case Some(existing) => - Ok(Json.obj( - "sessionId" -> existing.sessionId, - "expiresAt" -> existing.expiresAt.toString - )) - case None => - sessionManager.createSession(matchRequestUri, participantA, participantB) match { - case Some(session) => - Created(Json.obj( - "sessionId" -> session.sessionId, - "expiresAt" -> session.expiresAt.toString - )) - case None => - ServiceUnavailable(Json.obj("error" -> "Max concurrent sessions reached")) - } - } - } - } - } - } - - private def authenticateAndConnect( - sessionId: String, - did: String, - timestamp: String, - signature: String, - nonce: Option[String], - requestHeader: RequestHeader - ): Future[Either[Result, Flow[String, String, ?]]] = { - - // Validate timestamp - if (!signatureVerifier.isTimestampValid(timestamp)) { - return Future.successful(Left(Unauthorized(Json.obj("error" -> "Timestamp expired")))) - } - - // Validate nonce - if (nonce.exists(n => !signatureVerifier.checkAndRecordNonce(n))) { - return Future.successful(Left(Unauthorized(Json.obj("error" -> "Nonce already used")))) - } - - // Build signing input for WebSocket handshake (GET, no body) - val emptyBodyHash = java.util.Base64.getEncoder.encodeToString( - java.security.MessageDigest.getInstance("SHA-256").digest(Array.empty[Byte]) - ) - val signingInput = signatureVerifier.buildSigningInput("GET", requestHeader.path, timestamp, emptyBodyHash, nonce) - - signatureVerifier.verifySignature(did, signingInput, signature).map { - case false => - Left(Unauthorized(Json.obj("error" -> "Invalid signature"))) - case true => - sessionManager.getSession(sessionId) match { - case None => - Left(NotFound(Json.obj("error" -> "Session not found or expired"))) - case Some(session) if !sessionManager.isAuthorizedParticipant(sessionId, did) => - Left(Forbidden(Json.obj("error" -> "Not authorized for this session"))) - case Some(session) => - logger.info(s"WebSocket connected: DID=$did session=$sessionId") - Right(createRelayFlow(session, did)) - } - } - } - - private def createRelayFlow(session: RelaySession, did: String): Flow[String, String, ?] = { - // Incoming messages from this participant → publish to the bus - val incomingSink = Flow[String] - .map(msg => RelayMessage(fromDid = did, payload = msg)) - .to(session.bus.publishSink) - - // Outgoing messages from the bus → filtered to exclude own messages - val outgoingSource = session.bus.subscribeTo(did) - .map(_.payload) - - Flow.fromSinkAndSource(incomingSink, outgoingSource) - } - - /** - * REST endpoint to check session status (for Edge clients that need to poll). - */ - def getSessionStatus(sessionId: String): Action[AnyContent] = Action { _ => - sessionManager.getSession(sessionId) match { - case Some(session) => - Ok(Json.obj( - "sessionId" -> session.sessionId, - "matchRequestUri" -> session.matchRequestUri, - "expiresAt" -> session.expiresAt.toString, - "active" -> true - )) - case None => - NotFound(Json.obj("error" -> "Session not found or expired")) - } - } -} diff --git a/app/controllers/InstrumentProposalController.scala b/app/controllers/InstrumentProposalController.scala deleted file mode 100644 index e7388f6f..00000000 --- a/app/controllers/InstrumentProposalController.scala +++ /dev/null @@ -1,129 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import play.api.Logging -import play.api.libs.json.{Json, OFormat} -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import repositories.{InstrumentObservationRepository, InstrumentProposalRepository} -import services.InstrumentProposalService -import models.domain.genomics.ProposalStatus - -import scala.concurrent.ExecutionContext - -@Singleton -class InstrumentProposalController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - proposalService: InstrumentProposalService, - proposalRepo: InstrumentProposalRepository, - observationRepo: InstrumentObservationRepository - )(implicit ec: ExecutionContext) - extends BaseController with Logging { - - // Audit identity for API-key-authenticated actions - private val ApiCuratorId = "api-system" - - case class AcceptProposalRequest( - labName: String, - manufacturer: Option[String] = None, - model: Option[String] = None, - notes: Option[String] = None - ) - object AcceptProposalRequest { implicit val format: OFormat[AcceptProposalRequest] = Json.format } - - case class RejectProposalRequest(reason: String) - object RejectProposalRequest { implicit val format: OFormat[RejectProposalRequest] = Json.format } - - def listProposals(status: Option[String]): Action[AnyContent] = secureApi.async { _ => - val query = status.flatMap(s => scala.util.Try(ProposalStatus.fromString(s)).toOption) match { - case Some(s) => proposalRepo.findByStatus(s) - case None => proposalRepo.findPending() - } - - query.map { proposals => - Ok(Json.obj( - "proposals" -> proposals, - "total" -> proposals.size - )) - }.recover { - case e: Exception => - logger.error(s"Error listing instrument proposals: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - def getProposalDetail(id: Int): Action[AnyContent] = secureApi.async { _ => - proposalRepo.findById(id).flatMap { - case None => - scala.concurrent.Future.successful( - NotFound(Json.obj("error" -> s"Proposal $id not found")) - ) - case Some(proposal) => - observationRepo.findByInstrumentId(proposal.instrumentId).map { observations => - Ok(Json.obj( - "proposal" -> proposal, - "observations" -> observations, - "observationCount" -> observations.size, - "distinctCitizens" -> observations.map(_.biosampleRef).distinct.size - )) - } - }.recover { - case e: Exception => - logger.error(s"Error getting proposal $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - def acceptProposal(id: Int): Action[AcceptProposalRequest] = - secureApi.jsonAction[AcceptProposalRequest].async { request => - val body = request.body - proposalService.acceptProposal(id, ApiCuratorId, body.labName, body.manufacturer, body.model, body.notes).map { - case Right(proposal) => Ok(Json.toJson(proposal)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - }.recover { - case e: Exception => - logger.error(s"Error accepting proposal $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - def rejectProposal(id: Int): Action[RejectProposalRequest] = - secureApi.jsonAction[RejectProposalRequest].async { request => - val body = request.body - proposalService.rejectProposal(id, ApiCuratorId, body.reason).map { - case Right(proposal) => Ok(Json.toJson(proposal)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - }.recover { - case e: Exception => - logger.error(s"Error rejecting proposal $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - def detectConflicts(): Action[AnyContent] = secureApi.async { _ => - proposalService.detectConflicts().map { conflicts => - Ok(Json.obj( - "conflicts" -> conflicts.map { c => - Json.obj( - "instrumentId" -> c.instrumentId, - "dominantLabName" -> c.dominantLabName, - "dominantRatio" -> c.dominantRatio, - "labs" -> c.proposals.map { l => - Json.obj( - "labName" -> l.labName, - "observationCount" -> l.observationCount, - "ratio" -> l.ratio - ) - } - ) - }, - "total" -> conflicts.size - )) - }.recover { - case e: Exception => - logger.error(s"Error detecting conflicts: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } -} diff --git a/app/controllers/LanguageController.scala b/app/controllers/LanguageController.scala deleted file mode 100644 index 9f90fe38..00000000 --- a/app/controllers/LanguageController.scala +++ /dev/null @@ -1,24 +0,0 @@ -package controllers - -import play.api.i18n.{I18nSupport, Lang} -import play.api.mvc.* - -import javax.inject.* - -@Singleton -class LanguageController @Inject()(val controllerComponents: ControllerComponents) - extends BaseController with I18nSupport { - - def switchLanguage(lang: String): Action[AnyContent] = Action { implicit request => - val referer = request.headers.get(REFERER).getOrElse("/") - // Prevent open redirect: only allow relative paths - val safeTarget = if (referer.startsWith("/") && !referer.startsWith("//")) referer else "/" - val supportedLangs = messagesApi.messages.keys.filter(_ != "default").toSet - - if (supportedLangs.contains(lang)) { - Redirect(safeTarget).withLang(Lang(lang)) - } else { - Redirect(safeTarget) - } - } -} diff --git a/app/controllers/MatchDiscoveryController.scala b/app/controllers/MatchDiscoveryController.scala deleted file mode 100644 index c17a9672..00000000 --- a/app/controllers/MatchDiscoveryController.scala +++ /dev/null @@ -1,71 +0,0 @@ -package controllers - -import actions.PdsAuthAction -import jakarta.inject.{Inject, Singleton} -import play.api.Logging -import play.api.libs.json.{Json, OFormat} -import play.api.mvc.* -import services.ibd.{MatchDiscoveryService, PopulationAnalysisService} - -import java.util.UUID -import scala.concurrent.ExecutionContext - -@Singleton -class MatchDiscoveryController @Inject()( - val controllerComponents: ControllerComponents, - pdsAuth: PdsAuthAction, - discoveryService: MatchDiscoveryService, - populationService: PopulationAnalysisService -)(implicit ec: ExecutionContext) extends BaseController with Logging { - - def getSuggestions(suggestionType: Option[String], limit: Int): Action[AnyContent] = pdsAuth.async { request => - val did = request.pdsNode.did - // The PDS node's DID is used to look up the user's sample; for now we extract from query context - // In practice, the Edge client passes the sampleGuid it wants suggestions for - request.getQueryString("sampleGuid") match { - case Some(guidStr) => - val sampleGuid = UUID.fromString(guidStr) - discoveryService.getSuggestions(sampleGuid, suggestionType, limit).map { suggestions => - Ok(Json.toJson(suggestions.map { s => - Json.obj( - "id" -> s.id, - "targetSampleGuid" -> s.targetSampleGuid, - "suggestedSampleGuid" -> s.suggestedSampleGuid, - "suggestionType" -> s.suggestionType, - "score" -> s.score, - "metadata" -> s.metadata, - "status" -> s.status, - "createdAt" -> s.createdAt, - "expiresAt" -> s.expiresAt - ) - })) - } - case None => - scala.concurrent.Future.successful(BadRequest(Json.obj("error" -> "sampleGuid query parameter required"))) - } - } - - def dismissSuggestion(id: Long): Action[AnyContent] = pdsAuth.async { _ => - discoveryService.dismissSuggestion(id).map { success => - Ok(Json.obj("success" -> success)) - } - } - - def getPopulationBreakdown(sampleGuid: UUID): Action[AnyContent] = pdsAuth.async { _ => - populationService.getBreakdown(sampleGuid).map { - case Some(cache) => Ok(Json.obj( - "sampleGuid" -> cache.sampleGuid, - "breakdown" -> cache.breakdown, - "cachedAt" -> cache.cachedAt - )) - case None => NotFound(Json.obj("error" -> s"No population breakdown for sample $sampleGuid")) - } - } - - def getPopulationOverlap(guid1: UUID, guid2: UUID): Action[AnyContent] = pdsAuth.async { _ => - populationService.computeOverlap(guid1, guid2).map { - case Some(score) => Ok(Json.obj("overlapScore" -> score)) - case None => NotFound(Json.obj("error" -> "Population data not available for one or both samples")) - } - } -} diff --git a/app/controllers/MatchRequestController.scala b/app/controllers/MatchRequestController.scala deleted file mode 100644 index 3e01af57..00000000 --- a/app/controllers/MatchRequestController.scala +++ /dev/null @@ -1,155 +0,0 @@ -package controllers - -import actions.PdsAuthAction -import jakarta.inject.{Inject, Singleton} -import models.domain.ibd.{MatchConsentTracking, MatchRequestTracking} -import play.api.Logging -import play.api.libs.json.{Json, OFormat} -import play.api.mvc.* -import services.ibd.MatchDiscoveryService - -import java.time.ZonedDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class MatchRequestController @Inject()( - val controllerComponents: ControllerComponents, - pdsAuth: PdsAuthAction, - discoveryService: MatchDiscoveryService -)(implicit ec: ExecutionContext) extends BaseController with Logging { - - case class MatchRequestPayload( - fromSampleGuid: UUID, - toSampleGuid: UUID, - requestType: Option[String] = None, - discoveryReason: Option[play.api.libs.json.JsValue] = None, - message: Option[String] = None, - expiresInDays: Option[Int] = None - ) - object MatchRequestPayload { implicit val format: OFormat[MatchRequestPayload] = Json.format } - - def createRequest(): Action[MatchRequestPayload] = pdsAuth.jsonAction[MatchRequestPayload].async { request => - val node = request.pdsNode - val payload = request.body - val now = ZonedDateTime.now() - val atUri = s"at://${node.did}/us.decoding.matching.matchRequest/${UUID.randomUUID()}" - - val tracking = MatchRequestTracking( - id = None, - atUri = atUri, - requesterDid = node.did, - targetDid = None, - fromSampleGuid = payload.fromSampleGuid, - toSampleGuid = payload.toSampleGuid, - requestType = payload.requestType.getOrElse("FULL"), - status = "PENDING", - discoveryReason = payload.discoveryReason, - message = payload.message, - createdAt = now, - updatedAt = now, - expiresAt = payload.expiresInDays.map(d => now.plusDays(d.toLong)), - completedAt = None - ) - - discoveryService.createMatchRequest(tracking).map { created => - Created(Json.obj( - "atUri" -> created.atUri, - "fromSampleGuid" -> created.fromSampleGuid, - "toSampleGuid" -> created.toSampleGuid, - "requestType" -> created.requestType, - "status" -> created.status, - "createdAt" -> created.createdAt, - "expiresAt" -> created.expiresAt - )) - } - } - - def getPendingRequests(): Action[AnyContent] = pdsAuth.async { request => - request.getQueryString("sampleGuid") match { - case Some(guidStr) => - val sampleGuid = UUID.fromString(guidStr) - discoveryService.getPendingRequests(sampleGuid).map { requests => - Ok(Json.toJson(requests.map(requestToJson))) - } - case None => - Future.successful(BadRequest(Json.obj("error" -> "sampleGuid query parameter required"))) - } - } - - def getSentRequests(): Action[AnyContent] = pdsAuth.async { request => - val did = request.pdsNode.did - discoveryService.getSentRequests(did).map { requests => - Ok(Json.toJson(requests.map(requestToJson))) - } - } - - def cancelRequest(uri: String): Action[AnyContent] = pdsAuth.async { _ => - discoveryService.cancelRequest(uri).map { success => - Ok(Json.obj("success" -> success)) - } - } - - // --- Consent endpoints --- - - case class ConsentPayload( - sampleGuid: UUID, - consentLevel: String, - allowedMatchTypes: Option[Seq[String]] = None, - shareContactInfo: Option[Boolean] = None, - expiresInDays: Option[Int] = None - ) - object ConsentPayload { implicit val format: OFormat[ConsentPayload] = Json.format } - - def submitConsent(): Action[ConsentPayload] = pdsAuth.jsonAction[ConsentPayload].async { request => - val node = request.pdsNode - val payload = request.body - val now = ZonedDateTime.now() - val atUri = s"at://${node.did}/us.decoding.matching.matchConsent/${UUID.randomUUID()}" - - val consent = MatchConsentTracking( - id = None, - atUri = atUri, - consentingDid = node.did, - sampleGuid = payload.sampleGuid, - consentLevel = payload.consentLevel, - allowedMatchTypes = payload.allowedMatchTypes.map(t => Json.toJson(t)), - shareContactInfo = payload.shareContactInfo.getOrElse(false), - consentedAt = now, - expiresAt = payload.expiresInDays.map(d => now.plusDays(d.toLong)), - revokedAt = None - ) - - discoveryService.trackConsent(consent).map { created => - Created(Json.obj( - "atUri" -> created.atUri, - "sampleGuid" -> created.sampleGuid, - "consentLevel" -> created.consentLevel, - "consentedAt" -> created.consentedAt - )) - } - } - - def getConsentStatus(requestUri: String): Action[AnyContent] = pdsAuth.async { _ => - discoveryService.getConsentStatus(requestUri).map { - case Some(status) => Ok(Json.toJson(status)) - case None => NotFound(Json.obj("error" -> s"Match request not found: $requestUri")) - } - } - - private def requestToJson(r: MatchRequestTracking) = Json.obj( - "atUri" -> r.atUri, - "requesterDid" -> r.requesterDid, - "targetDid" -> r.targetDid, - "fromSampleGuid" -> r.fromSampleGuid, - "toSampleGuid" -> r.toSampleGuid, - "requestType" -> r.requestType, - "status" -> r.status, - "discoveryReason" -> r.discoveryReason, - "message" -> r.message, - "createdAt" -> r.createdAt, - "updatedAt" -> r.updatedAt, - "expiresAt" -> r.expiresAt, - "completedAt" -> r.completedAt - ) -} diff --git a/app/controllers/PDSRegistrationController.scala b/app/controllers/PDSRegistrationController.scala deleted file mode 100644 index 91b6bb08..00000000 --- a/app/controllers/PDSRegistrationController.scala +++ /dev/null @@ -1,69 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import api.PdsRegistrationRequest -import play.api.libs.json.{JsError, JsSuccess, Json} -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import repositories.UserRepository -import services.PDSRegistrationService -import services.social.ReputationService -import play.api.Logging - -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class PDSRegistrationController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - pdsRegistrationService: PDSRegistrationService, - reputationService: ReputationService, - userRepository: UserRepository - )(implicit ec: ExecutionContext) extends BaseController with Logging { - - /** - * Handles the registration of a new Personal Data Server (PDS). - * Expects a JSON body containing PdsRegistrationRequest. - * Secured with X-API-Key authentication. - * - * @return An `Action` that processes the registration request. - */ - def registerPDS(): Action[play.api.libs.json.JsValue] = secureApi.async(parse.json) { implicit request => - request.body.validate[PdsRegistrationRequest] match { - case JsSuccess(pdsRegistrationRequest, _) => - pdsRegistrationService.registerPDS( - pdsRegistrationRequest.did, - pdsRegistrationRequest.handle, - pdsRegistrationRequest.pdsUrl, - pdsRegistrationRequest.rToken - ).flatMap { - case Right(pdsRegistration) => - userRepository.findByDid(pdsRegistration.did).flatMap { - case Some(user) => - reputationService.recordEvent( - userId = user.id.get, // Assuming user.id is always defined for a registered user - eventTypeName = "ACCOUNT_VERIFIED", - notes = Some("Initial PDS Registration") - ).flatMap { _ => - reputationService.recordEvent( - userId = user.id.get, - eventTypeName = "NEW_USER_BONUS", - notes = Some("Welcome Bonus") - ) - }.map { _ => - Ok(Json.toJson(pdsRegistration)) - } - case None => - // This case should ideally not happen if PDS registration implies an existing user. - // Log an error and proceed without awarding reputation. - logger.error(s"User not found for DID ${pdsRegistration.did} after successful PDS registration.") - Future.successful(Ok(Json.toJson(pdsRegistration))) - } - case Left(errorMessage) => - Future.successful(BadRequest(Json.obj("error" -> errorMessage))) - } - case JsError(errors) => - Future.successful(BadRequest(Json.obj("error" -> "Invalid JSON body", "details" -> JsError.toJson(errors)))) - } - } -} diff --git a/app/controllers/PatronageApiController.scala b/app/controllers/PatronageApiController.scala deleted file mode 100644 index b234a1e9..00000000 --- a/app/controllers/PatronageApiController.scala +++ /dev/null @@ -1,85 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import play.api.Logging -import play.api.libs.json.{Json, OFormat} -import play.api.mvc.* -import services.{PatronageService, WebhookEvent} - -import java.util.UUID -import scala.concurrent.ExecutionContext - -@Singleton -class PatronageApiController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - patronageService: PatronageService - )(implicit ec: ExecutionContext) extends BaseController with Logging { - - case class CreateSubscriptionRequest( - userId: UUID, - tier: String, - billingInterval: String, - paymentProvider: String, - providerSubscriptionId: Option[String] = None, - providerCustomerId: Option[String] = None - ) - object CreateSubscriptionRequest { implicit val format: OFormat[CreateSubscriptionRequest] = Json.format } - - case class CancelSubscriptionRequest(userId: UUID) - object CancelSubscriptionRequest { implicit val format: OFormat[CancelSubscriptionRequest] = Json.format } - - def createSubscription(): Action[CreateSubscriptionRequest] = - secureApi.jsonAction[CreateSubscriptionRequest].async { request => - val r = request.body - patronageService.createSubscription( - userId = r.userId, - tier = r.tier, - billingInterval = r.billingInterval, - paymentProvider = r.paymentProvider, - providerSubscriptionId = r.providerSubscriptionId, - providerCustomerId = r.providerCustomerId - ).map { - case Right(sub) => Created(Json.toJson(sub)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def cancelSubscription(id: Int): Action[CancelSubscriptionRequest] = - secureApi.jsonAction[CancelSubscriptionRequest].async { request => - patronageService.cancelSubscription(id, request.body.userId).map { - case Right(_) => Ok(Json.obj("cancelled" -> true)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def getSubscription(userId: UUID): Action[AnyContent] = secureApi.async { _ => - patronageService.getActiveSubscription(userId).map { - case Some(sub) => Ok(Json.toJson(sub)) - case None => NotFound(Json.obj("error" -> "No active subscription")) - } - } - - def getUserSubscriptions(userId: UUID): Action[AnyContent] = secureApi.async { _ => - patronageService.getUserSubscriptions(userId).map { subs => - Ok(Json.obj("subscriptions" -> subs, "total" -> subs.size)) - } - } - - def isPatron(userId: UUID): Action[AnyContent] = secureApi.async { _ => - patronageService.isPatron(userId).map { isPatron => - Ok(Json.obj("isPatron" -> isPatron)) - } - } - - def getPatronSummary: Action[AnyContent] = secureApi.async { _ => - patronageService.getPatronSummary.map(summary => Ok(Json.toJson(summary))) - } - - def expireOverdue(): Action[AnyContent] = secureApi.async { _ => - patronageService.expireOverdueSubscriptions().map { count => - Ok(Json.obj("expired" -> count)) - } - } -} diff --git a/app/controllers/PdsFleetApiController.scala b/app/controllers/PdsFleetApiController.scala deleted file mode 100644 index bd7aa90f..00000000 --- a/app/controllers/PdsFleetApiController.scala +++ /dev/null @@ -1,155 +0,0 @@ -package controllers - -import actions.{ApiSecurityAction, PdsAuthAction} -import jakarta.inject.{Inject, Singleton} -import play.api.Logging -import play.api.libs.json.{Json, OFormat} -import play.api.mvc.* -import services.{HeartbeatRequest, PdsFleetService, SubmissionProvenanceService} - -import java.util.UUID -import scala.concurrent.ExecutionContext - -@Singleton -class PdsFleetApiController @Inject()( - val controllerComponents: ControllerComponents, - pdsAuth: PdsAuthAction, - secureApi: ApiSecurityAction, - fleetService: PdsFleetService, - submissionService: SubmissionProvenanceService - )(implicit ec: ExecutionContext) extends BaseController with Logging { - - // --- PDS-authenticated endpoints (called by edge nodes) --- - - case class HeartbeatPayload( - status: String, - softwareVersion: Option[String] = None, - loadMetrics: Option[play.api.libs.json.JsValue] = None, - processingQueueSize: Option[Int] = None, - errorMessage: Option[String] = None, - lastCommitCid: Option[String] = None, - lastCommitRev: Option[String] = None - ) - object HeartbeatPayload { implicit val format: OFormat[HeartbeatPayload] = Json.format } - - def heartbeat(): Action[HeartbeatPayload] = pdsAuth.jsonAction[HeartbeatPayload].async { request => - val node = request.pdsNode - val payload = request.body - val hbRequest = HeartbeatRequest( - did = node.did, - pdsUrl = node.pdsUrl, - handle = node.handle, - nodeName = node.nodeName, - softwareVersion = payload.softwareVersion, - status = payload.status, - loadMetrics = payload.loadMetrics, - processingQueueSize = payload.processingQueueSize, - lastCommitCid = payload.lastCommitCid, - lastCommitRev = payload.lastCommitRev, - errorMessage = payload.errorMessage - ) - - fleetService.processHeartbeat(hbRequest).map { - case Right(updatedNode) => Ok(Json.toJson(updatedNode)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - case class SubmissionPayload( - submissionType: String, - proposedValue: String, - biosampleId: Option[Int] = None, - biosampleGuid: Option[UUID] = None, - confidenceScore: Option[Double] = None, - algorithmVersion: Option[String] = None, - softwareVersion: Option[String] = None, - payload: Option[play.api.libs.json.JsValue] = None, - atUri: Option[String] = None, - atCid: Option[String] = None - ) - object SubmissionPayload { implicit val format: OFormat[SubmissionPayload] = Json.format } - - def submitData(): Action[SubmissionPayload] = pdsAuth.jsonAction[SubmissionPayload].async { request => - val node = request.pdsNode - val p = request.body - - submissionService.recordSubmission( - did = node.did, - submissionType = p.submissionType, - proposedValue = p.proposedValue, - biosampleId = p.biosampleId, - biosampleGuid = p.biosampleGuid, - confidenceScore = p.confidenceScore, - algorithmVersion = p.algorithmVersion, - softwareVersion = p.softwareVersion, - payload = p.payload, - atUri = p.atUri, - atCid = p.atCid - ).map { - case Right(submission) => Created(Json.toJson(submission)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - // --- Admin-authenticated endpoints (X-API-Key secured) --- - - def getFleetSummary: Action[AnyContent] = secureApi.async { _ => - fleetService.getFleetSummary.map(summary => Ok(Json.toJson(summary))) - } - - def listNodes(status: Option[String]): Action[AnyContent] = secureApi.async { _ => - fleetService.listNodes(status).map { nodes => - Ok(Json.obj("nodes" -> nodes, "total" -> nodes.size)) - } - } - - def getNode(did: String): Action[AnyContent] = secureApi.async { _ => - fleetService.getNode(did).map { - case Some(node) => Ok(Json.toJson(node)) - case None => NotFound(Json.obj("error" -> s"Node not found: $did")) - } - } - - def removeNode(did: String): Action[AnyContent] = secureApi.async { _ => - fleetService.removeNode(did).map { - case Right(_) => Ok(Json.obj("removed" -> true)) - case Left(error) => NotFound(Json.obj("error" -> error)) - } - } - - def markStaleOffline(): Action[AnyContent] = secureApi.async { _ => - fleetService.markStaleNodesOffline().map { count => - Ok(Json.obj("markedOffline" -> count)) - } - } - - def getPendingSubmissions(submissionType: Option[String], limit: Int): Action[AnyContent] = secureApi.async { _ => - submissionService.getPendingSubmissions(submissionType, limit).map { submissions => - Ok(Json.obj("submissions" -> submissions, "total" -> submissions.size)) - } - } - - case class ReviewRequest(reviewedBy: String, notes: Option[String] = None) - object ReviewRequest { implicit val format: OFormat[ReviewRequest] = Json.format } - - def acceptSubmission(id: Int): Action[ReviewRequest] = secureApi.jsonAction[ReviewRequest].async { request => - submissionService.acceptSubmission(id, request.body.reviewedBy, request.body.notes).map { - case Right(_) => Ok(Json.obj("accepted" -> true)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def rejectSubmission(id: Int): Action[ReviewRequest] = secureApi.jsonAction[ReviewRequest].async { request => - submissionService.rejectSubmission(id, request.body.reviewedBy, request.body.notes).map { - case Right(_) => Ok(Json.obj("rejected" -> true)) - case Left(error) => BadRequest(Json.obj("error" -> error)) - } - } - - def getNodeSubmissionSummary(did: String): Action[AnyContent] = secureApi.async { _ => - submissionService.getNodeSubmissionSummary(did).map { - case Right(summary) => Ok(Json.toJson(summary)) - case Left(error) => NotFound(Json.obj("error" -> error)) - } - } -} diff --git a/app/controllers/PgpBiosampleController.scala b/app/controllers/PgpBiosampleController.scala deleted file mode 100644 index e988ba4d..00000000 --- a/app/controllers/PgpBiosampleController.scala +++ /dev/null @@ -1,79 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.PgpBiosampleRequest -import play.api.libs.json.Json -import play.api.mvc.{Action, BaseController, ControllerComponents} -import services.{BiosampleDomainService, DuplicateParticipantException, InvalidCoordinatesException} - -import scala.concurrent.ExecutionContext - -/** - * Controller responsible for handling PGP (Personal Genome Project) biosample-related actions. - * - * This controller provides endpoints for managing and creating biosample entities that - * represent biological samples associated with participants in the PGP. The main responsibility - * is to facilitate the creation of biosamples by processing incoming HTTP requests, validating - * the data, and delegating the actual sample creation logic to the service layer. - * - * @constructor Initializes the controller with its required dependencies. - * @param controllerComponents A base set of helper methods provided by Play Framework for handling HTTP responses. - * @param secureApi An action builder that secures API endpoints by enforcing strict authentication and JSON validation. - * @param biosampleDomainService The facade service for all biosample operations. - * @param ec An implicit execution context for asynchronous operations. - */ -@Singleton -class PgpBiosampleController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - biosampleDomainService: BiosampleDomainService - )(implicit ec: ExecutionContext) extends BaseController { - - /** - * Handles the creation of a new PGP biosample. Validates the input request, creates the biosample in the system, - * and returns a response with the unique identifier of the created biosample. If an error occurs during processing, - * appropriate HTTP responses are returned based on the error type. - * - * @return A Play `Action` for processing a request with a `PgpBiosampleRequest` body. Returns the following: - * - `201 Created` with the unique identifier of the created biosample on success. - * - `409 Conflict` if a biosample for the specified participant already exists. - * - `400 Bad Request` if the input request contains invalid data (e.g., invalid coordinates). - * - `500 Internal Server Error` if an unexpected issue occurs during processing. - */ - def create: Action[PgpBiosampleRequest] = secureApi.jsonAction[PgpBiosampleRequest].async { request => - biosampleDomainService.createPgpBiosample( - participantId = request.body.participantId, - description = request.body.description, - centerName = request.body.centerName, - sex = request.body.sex, - latitude = request.body.latitude, - longitude = request.body.longitude - ).map { guid => - Created(Json.toJson(guid)) - }.recover { - case e: DuplicateParticipantException => - Conflict(Json.obj( - "error" -> "Duplicate submission", - "message" -> s"A biosample for participant ${request.body.participantId} already exists", - "details" -> e.getMessage - )) - case e: InvalidCoordinatesException => - BadRequest(Json.obj( - "error" -> "Invalid coordinates", - "message" -> e.getMessage - )) - case e: IllegalArgumentException => - BadRequest(Json.obj( - "error" -> "Invalid request", - "message" -> e.getMessage - )) - case e: Exception => - InternalServerError(Json.obj( - "error" -> "Internal server error", - "message" -> "An unexpected error occurred while processing the request" - )) - } - } - -} \ No newline at end of file diff --git a/app/controllers/ProfileController.scala b/app/controllers/ProfileController.scala deleted file mode 100644 index f2a80d3c..00000000 --- a/app/controllers/ProfileController.scala +++ /dev/null @@ -1,48 +0,0 @@ -package controllers - -import actions.AuthenticatedAction -import jakarta.inject.{Inject, Singleton} -import models.domain.user.User -import play.api.Logging -import play.api.data.Form -import play.api.data.Forms._ -import play.api.i18n.I18nSupport -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import repositories.UserRepository -import org.webjars.play.WebJarsUtil - -import scala.concurrent.{ExecutionContext, Future} - -case class ProfileFormData(displayName: Option[String]) - -@Singleton -class ProfileController @Inject()( - val controllerComponents: ControllerComponents, - authenticatedAction: AuthenticatedAction, - userRepository: UserRepository - )(implicit ec: ExecutionContext, webJarsUtil: WebJarsUtil) extends BaseController with I18nSupport with Logging { - - val profileForm = Form( - mapping( - "displayName" -> optional(text(maxLength = 50)) - )(ProfileFormData.apply)(data => Some(data.displayName)) - ) - - def view: Action[AnyContent] = authenticatedAction { implicit request => - val filledForm = profileForm.fill(ProfileFormData(request.user.displayName)) - Ok(views.html.user.profile(filledForm, request.user)) - } - - def update: Action[AnyContent] = authenticatedAction.async { implicit request => - profileForm.bindFromRequest().fold( - formWithErrors => Future.successful(BadRequest(views.html.user.profile(formWithErrors, request.user))), - data => { - val updatedUser = request.user.copy(displayName = data.displayName) - userRepository.update(updatedUser).map { _ => - Redirect(routes.ProfileController.view) - .flashing("success" -> "Profile updated successfully.") - } - } - ) - } -} diff --git a/app/controllers/ProjectController.scala b/app/controllers/ProjectController.scala deleted file mode 100644 index af7dc4b4..00000000 --- a/app/controllers/ProjectController.scala +++ /dev/null @@ -1,20 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.ProjectRequest -import play.api.libs.json.Json -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import services.ProjectService - -import java.util.UUID -import scala.concurrent.ExecutionContext - -@Singleton -class ProjectController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - projectService: ProjectService - )(implicit ec: ExecutionContext) extends BaseController { - -} diff --git a/app/controllers/PublicationCandidateController.scala b/app/controllers/PublicationCandidateController.scala deleted file mode 100644 index 74ec5bc3..00000000 --- a/app/controllers/PublicationCandidateController.scala +++ /dev/null @@ -1,127 +0,0 @@ -package controllers - -import actions.{AuthenticatedAction, RoleAction} -import jakarta.inject.{Inject, Singleton} -import play.api.Logging -import play.api.i18n.{I18nSupport, MessagesApi} -import play.api.libs.json.Json -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import repositories.PublicationCandidateRepository -import services.PublicationDiscoveryService -import org.webjars.play.WebJarsUtil - -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class PublicationCandidateController @Inject()( - val controllerComponents: ControllerComponents, - publicationCandidateRepository: PublicationCandidateRepository, - publicationDiscoveryService: PublicationDiscoveryService, - override val messagesApi: MessagesApi, - authenticatedAction: AuthenticatedAction, - roleAction: RoleAction - )(implicit ec: ExecutionContext, webJarsUtil: WebJarsUtil) extends BaseController with I18nSupport with Logging { - - private def CuratorAction = authenticatedAction andThen roleAction("Curator", "Admin") - - private val validStatuses = Set("pending", "accepted", "rejected", "deferred") - - def listCandidates(page: Int = 1, pageSize: Int = 20, status: String = "pending"): Action[AnyContent] = CuratorAction.async { implicit request => - val effectiveStatus = if (validStatuses.contains(status)) status else "pending" - - for { - (candidates, total) <- publicationCandidateRepository.listByStatus(effectiveStatus, page, pageSize) - statusCounts <- publicationCandidateRepository.countByStatus() - } yield { - Ok(views.html.publicationCandidates.list(candidates, page, pageSize, total.toInt, effectiveStatus, statusCounts)) - } - } - - def accept(id: Int): Action[AnyContent] = CuratorAction.async { implicit request => - val reviewerId = request.user.id.get - - publicationDiscoveryService.acceptCandidate(id, reviewerId).map { - case Some(pub) => Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("success" -> messagesApi.preferred(request)("publicationCandidates.acceptSuccess", pub.title)) - case None => Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("error" -> messagesApi.preferred(request)("publicationCandidates.acceptFailed")) - }.recover { - case e: Exception => - logger.error(s"Error accepting candidate $id: ${e.getMessage}", e) - Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("error" -> messagesApi.preferred(request)("publicationCandidates.acceptError", e.getMessage)) - } - } - - def reject(id: Int): Action[AnyContent] = CuratorAction.async { implicit request => - val reviewerId = request.user.id.get - val reason = request.body.asFormUrlEncoded.flatMap(_.get("reason").flatMap(_.headOption)) - - publicationDiscoveryService.rejectCandidate(id, reviewerId, reason).map { success => - if (success) Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("success" -> messagesApi.preferred(request)("publicationCandidates.rejectSuccess")) - else Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("error" -> messagesApi.preferred(request)("publicationCandidates.rejectFailed")) - }.recover { - case e: Exception => - logger.error(s"Error rejecting candidate $id: ${e.getMessage}", e) - Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("error" -> messagesApi.preferred(request)("publicationCandidates.rejectError", e.getMessage)) - } - } - - def defer(id: Int): Action[AnyContent] = CuratorAction.async { implicit request => - val reviewerId = request.user.id.get - - publicationDiscoveryService.deferCandidate(id, reviewerId).map { success => - if (success) Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("success" -> messagesApi.preferred(request)("publicationCandidates.deferSuccess")) - else Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("error" -> messagesApi.preferred(request)("publicationCandidates.deferFailed")) - } - } - - def bulkAction(): Action[AnyContent] = CuratorAction.async { implicit request => - val reviewerId = request.user.id.get - val formData = request.body.asFormUrlEncoded.getOrElse(Map.empty) - val MaxBulkSize = 500 - val ids = formData.getOrElse("candidateIds", Seq.empty).flatMap(_.split(",")).flatMap(_.toIntOption).take(MaxBulkSize).toSeq - val action = formData.get("bulkAction").flatMap(_.headOption).getOrElse("") - val reason = formData.get("reason").flatMap(_.headOption) - - if (ids.isEmpty) { - Future.successful( - Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("error" -> messagesApi.preferred(request)("publicationCandidates.bulk.noSelection")) - ) - } else { - val resultFuture = action match { - case "accept" => - publicationDiscoveryService.bulkAcceptCandidates(ids, reviewerId).map { results => - val accepted = results.count(_.isDefined) - messagesApi.preferred(request)("publicationCandidates.bulk.acceptSuccess", accepted.toString) - } - case "reject" => - publicationDiscoveryService.bulkRejectCandidates(ids, reviewerId, reason).map { count => - messagesApi.preferred(request)("publicationCandidates.bulk.rejectSuccess", count.toString) - } - case "defer" => - publicationDiscoveryService.bulkDeferCandidates(ids, reviewerId).map { count => - messagesApi.preferred(request)("publicationCandidates.bulk.deferSuccess", count.toString) - } - case _ => - Future.successful(messagesApi.preferred(request)("publicationCandidates.bulk.unknownAction")) - } - - resultFuture.map { message => - Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("success" -> message) - }.recover { - case e: Exception => - logger.error(s"Error in bulk action '$action': ${e.getMessage}", e) - Redirect(routes.PublicationCandidateController.listCandidates()) - .flashing("error" -> messagesApi.preferred(request)("publicationCandidates.bulk.error", e.getMessage)) - } - } - } -} diff --git a/app/controllers/PublicationController.scala b/app/controllers/PublicationController.scala deleted file mode 100644 index 8a499c41..00000000 --- a/app/controllers/PublicationController.scala +++ /dev/null @@ -1,133 +0,0 @@ -package controllers - -import models.forms.PaperSubmissionForm -import org.apache.pekko.actor.ActorRef -import org.webjars.play.WebJarsUtil -import play.api.Logging -import play.api.i18n.I18nSupport -import play.api.libs.json.Json -import play.api.mvc.* -import services.PublicationService - -import javax.inject.* -import scala.concurrent.{ExecutionContext, Future} - -/** - * Controller for managing and serving publications. - * - * This controller handles actions related to displaying and retrieving publications. - * It supports both JSON and HTML responses, allowing integration with front-end views - * and API responses. - * - * @constructor Creates a new instance of `PublicationController`. - * @param controllerComponents The controller components for managing HTTP actions. - * @param publicationService The service layer responsible for publication-related operations. - * @param webJarsUtil A utility for WebJars integration, provided implicitly. - * @param ec An execution context for handling asynchronous operations, provided implicitly. - */ -@Singleton -class PublicationController @Inject()( - val controllerComponents: ControllerComponents, - publicationService: PublicationService, - @Named("genomic-study-update-actor") studyUpdateActor: ActorRef - ) - (using webJarsUtil: WebJarsUtil, ec: ExecutionContext) - extends BaseController with I18nSupport with Logging { - /** - * Renders the references page. - * - * This action handles GET requests to display the references page of the application. - * It serves an HTML view with static content and links related to references. - * - * @return an action that renders the References view as an HTML response - */ - def index(): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - Ok(views.html.references()) - } - - - /** - * Returns all publications along with their associated details in JSON format. - * - * This method handles an asynchronous request to fetch all publications and their - * detailed information, including associated studies and sample counts. The response - * is serialized as JSON and returned with an HTTP OK status. - * - * @return an asynchronous action that produces an HTTP response containing JSON-encoded - * details of all publications. - */ - def getAllPublicationsWithDetailsJson: Action[play.api.mvc.AnyContent] = Action.async { - publicationService.getAllPublicationsWithDetails.map { publicationsWithDetails => - Ok(Json.toJson(publicationsWithDetails)) - } - } - - /** - * Fetches and displays a paginated list of publications along with their details in an HTML format. - * If a search query is provided, filters publications by title, authors, or abstract. - * - * @param page An optional parameter specifying the current page number for paginated results. - * Defaults to the first page if not provided. - * @param pageSize An optional parameter specifying the number of items per page for paginated results. - * Defaults to 10 if not provided. - * @param query An optional search query to filter publications by title, authors, or abstract. - * @return An asynchronous action that renders an HTML view containing the paginated publication details. - */ - def getAllPublicationsWithDetailsHtml(page: Option[Int], pageSize: Option[Int], query: Option[String]): Action[AnyContent] = Action.async { implicit request => - val currentPage = page.getOrElse(1) - val currentPageSize = pageSize.getOrElse(10) - val searchQuery = query.filter(_.trim.nonEmpty) - - val resultFuture = searchQuery match { - case Some(q) => publicationService.searchPublications(q.trim, currentPage, currentPageSize) - case None => publicationService.getPaginatedPublicationsWithDetails(currentPage, currentPageSize) - } - - resultFuture.map { paginatedResult => - Ok(views.html.publicationList(paginatedResult, searchQuery)) - } - } - - def showSubmissionForm(): Action[AnyContent] = Action { implicit request: Request[AnyContent] => - Ok(views.html.publications.submitPaper(PaperSubmissionForm.form)) - } - - def submitPaper() = Action.async { implicit request => - import actors.GenomicStudyUpdateActor.{UpdateResult, UpdateStudy} - import models.domain.publications.StudySource - import org.apache.pekko.pattern.ask - import org.apache.pekko.util.Timeout - - import scala.concurrent.duration.* - - implicit val timeout: Timeout = Timeout(30.seconds) - - PaperSubmissionForm.form.bindFromRequest().fold( - formWithErrors => - Future.successful(BadRequest(views.html.publications.submitPaper(formWithErrors))), - submission => { - for { - publicationOpt <- publicationService.processPublication(submission.doi, submission.forceRefresh) - result <- publicationOpt match { - case Some(publication) if submission.enaAccession.exists(_.nonEmpty) => - (studyUpdateActor ? UpdateStudy( - submission.enaAccession.get, - StudySource.ENA, - Some(publication.id.get) - )).mapTo[UpdateResult] - case _ => Future.successful(UpdateResult("", true, "No ENA accession provided")) - } - } yield { - if (result.success) { - Redirect(routes.PublicationController.showSubmissionForm()) - .flashing("success" -> "Publication and associated data have been processed") - } else { - Redirect(routes.PublicationController.showSubmissionForm()) - .flashing("error" -> s"Error processing study: ${result.message}") - } - } - } - ) - } - -} \ No newline at end of file diff --git a/app/controllers/PublicationDiscoveryController.scala b/app/controllers/PublicationDiscoveryController.scala deleted file mode 100644 index d45d5b9a..00000000 --- a/app/controllers/PublicationDiscoveryController.scala +++ /dev/null @@ -1,22 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import actors.PublicationDiscoveryActor -import jakarta.inject.{Inject, Named, Singleton} -import org.apache.pekko.actor.ActorRef -import play.api.Logging -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} - -@Singleton -class PublicationDiscoveryController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - @Named("publication-discovery-actor") publicationDiscoveryActor: ActorRef - ) extends BaseController with Logging { - - def triggerDiscovery(): Action[AnyContent] = secureApi { - logger.info("Manually triggering publication discovery via API.") - publicationDiscoveryActor ! PublicationDiscoveryActor.RunDiscovery - Ok("Publication discovery run triggered.") - } -} diff --git a/app/controllers/SequencerController.scala b/app/controllers/SequencerController.scala deleted file mode 100644 index 32464f2a..00000000 --- a/app/controllers/SequencerController.scala +++ /dev/null @@ -1,60 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.genomics.AssociateLabWithInstrumentRequest -import models.api.SequencerLabInstrumentsResponse -import play.api.libs.json.Json -import play.api.mvc.{AbstractController, Action, AnyContent, ControllerComponents} -import services.genomics.SequencerInstrumentService - -import scala.concurrent.ExecutionContext - -@Singleton -class SequencerController @Inject()( - cc: ControllerComponents, - apiSecurityAction: ApiSecurityAction, - sequencerService: SequencerInstrumentService - )(implicit ec: ExecutionContext) - extends AbstractController(cc) { - - def getLabByInstrumentId(instrumentId: String): Action[AnyContent] = Action.async { implicit request => - sequencerService.lookupLab(instrumentId).map { - case Some(lookupResult) => - Ok(Json.toJson(lookupResult)) - case None => - NotFound(Json.obj("error" -> s"No lab association found for instrument '$instrumentId'")) - } - } - - def getAllLabInstruments: Action[AnyContent] = Action.async { implicit request => - sequencerService.getAllLabInstrumentAssociations.map { associations => - Ok(Json.toJson(SequencerLabInstrumentsResponse( - data = associations, - count = associations.length - ))) - }.recover { - case _: Exception => - InternalServerError(Json.obj("error" -> "Failed to retrieve lab-instrument associations")) - } - } - - def associateLabWithInstrument(): Action[AssociateLabWithInstrumentRequest] = - Action.async(parse.json[AssociateLabWithInstrumentRequest]) { request => - apiSecurityAction.invokeBlock(request, { _ => - sequencerService.associateLabWithInstrument( - request.body.instrumentId, - request.body.labName, - request.body.manufacturer, - request.body.model - ).map { result => - Ok(Json.toJson(result)) - }.recover { - case e: IllegalArgumentException => - BadRequest(Json.obj("error" -> e.getMessage)) - case _: Exception => - InternalServerError(Json.obj("error" -> "Failed to associate lab with instrument")) - } - }) - } -} diff --git a/app/controllers/SequencingLabAdminController.scala b/app/controllers/SequencingLabAdminController.scala deleted file mode 100644 index ef700fc1..00000000 --- a/app/controllers/SequencingLabAdminController.scala +++ /dev/null @@ -1,81 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.genomics.{SequencingLabCreateRequest, SequencingLabUpdateRequest} -import models.domain.genomics.SequencingLab -import play.api.libs.json.Json -import play.api.mvc.{AbstractController, Action, AnyContent, ControllerComponents} -import repositories.SequencingLabRepository - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -/** - * Private CRUD APIs for managing sequencing_lab entries. - * All endpoints are protected by ApiSecurityAction and are not exposed via Swagger (not Tapir-based). - */ -@Singleton -class SequencingLabAdminController @Inject()( - cc: ControllerComponents, - secureApi: ApiSecurityAction, - labs: SequencingLabRepository - )(implicit ec: ExecutionContext) extends AbstractController(cc) { - - // List all labs - def list: Action[AnyContent] = secureApi.async { _ => - labs.list().map(ls => Ok(Json.toJson(ls))) - } - - // Get one lab by id - def get(id: Int): Action[AnyContent] = secureApi.async { _ => - labs.findById(id).map { - case Some(l) => Ok(Json.toJson(l)) - case None => NotFound(Json.obj("error" -> s"Sequencing lab $id not found")) - } - } - - // Create a lab - def create: Action[SequencingLabCreateRequest] = secureApi.jsonAction[SequencingLabCreateRequest].async { req => - val body = req.body - val lab = SequencingLab( - id = None, - name = body.name, - isD2c = body.isD2c.getOrElse(false), - websiteUrl = body.websiteUrl, - descriptionMarkdown = body.descriptionMarkdown, - createdAt = LocalDateTime.now(), - updatedAt = None - ) - labs.create(lab).map(created => Created(Json.toJson(created))) - .recover { case e => BadRequest(Json.obj("error" -> e.getMessage)) } - } - - // Update a lab (partial) - def update(id: Int): Action[SequencingLabUpdateRequest] = secureApi.jsonAction[SequencingLabUpdateRequest].async { req => - val patch = req.body - labs.findById(id).flatMap { - case None => Future.successful(NotFound(Json.obj("error" -> s"Sequencing lab $id not found"))) - case Some(existing) => - val updated = existing.copy( - name = patch.name.getOrElse(existing.name), - isD2c = patch.isD2c.getOrElse(existing.isD2c), - websiteUrl = patch.websiteUrl.orElse(existing.websiteUrl), - descriptionMarkdown = patch.descriptionMarkdown.orElse(existing.descriptionMarkdown), - updatedAt = Some(LocalDateTime.now()) - ) - labs.update(id, updated).map { - case Some(u) => Ok(Json.toJson(u)) - case None => InternalServerError(Json.obj("error" -> "Failed to update sequencing lab")) - } - } - } - - // Delete a lab - def delete(id: Int): Action[AnyContent] = secureApi.async { _ => - labs.delete(id).map { - case true => NoContent - case false => NotFound(Json.obj("error" -> s"Sequencing lab $id not found")) - } - } -} diff --git a/app/controllers/SpecimenDonorController.scala b/app/controllers/SpecimenDonorController.scala deleted file mode 100644 index ac8ad23b..00000000 --- a/app/controllers/SpecimenDonorController.scala +++ /dev/null @@ -1,52 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.genomics.SpecimenDonorMergeRequest -import play.api.libs.json.Json -import play.api.mvc.* -import services.genomics.SpecimenDonorService - -import scala.concurrent.ExecutionContext - -/** - * Controller responsible for handling operations related to specimen donors. - * - * This controller provides endpoints for managing donor records, including - * the ability to merge multiple donors into a single unified record. - * - * @constructor Creates an instance of SpecimenDonorController - * @param donorService the service used to manage and merge specimen donor data - * @param secureApi the security action used to secure API endpoints - * @param cc controller components needed to construct the controller - * @param ec the execution context for handling asynchronous operations - */ -@Singleton -class SpecimenDonorController @Inject()( - donorService: SpecimenDonorService, - secureApi: ApiSecurityAction, - cc: ControllerComponents - )(implicit ec: ExecutionContext) extends AbstractController(cc) { - - /** - * Merges multiple specimen donor records into a single unified record. - * - * This method handles the merging of donor data by using a secure API invocation. - * The merging process is performed based on the details provided in the request body, - * which includes the target donor ID, a list of source donor IDs, and the merge strategy. - * It returns the result of the operation or an appropriate error response in case of failure. - * - * @return An asynchronous `Action` that processes a `SpecimenDonorMergeRequest` - * and yields a response containing the result of the donor merge operation. - */ - def mergeDonors(): Action[SpecimenDonorMergeRequest] = Action.async(parse.json[SpecimenDonorMergeRequest]) { request => - secureApi.invokeBlock(request, { secureRequest => - donorService.mergeDonors(request.body).map { result => - Ok(Json.toJson(result)) - }.recover { - case e: IllegalArgumentException => BadRequest(Json.obj("error" -> e.getMessage)) - case e: Exception => InternalServerError(Json.obj("error" -> "Failed to merge donors")) - } - }) - } -} diff --git a/app/controllers/SupportAdminController.scala b/app/controllers/SupportAdminController.scala deleted file mode 100644 index 149303d0..00000000 --- a/app/controllers/SupportAdminController.scala +++ /dev/null @@ -1,201 +0,0 @@ -package controllers - -import actions.{AuthenticatedAction, RoleAction} -import jakarta.inject.{Inject, Singleton} -import models.domain.support.{MessageReply, MessageStatus} -import org.webjars.play.WebJarsUtil -import play.api.Logging -import play.api.data.Form -import play.api.data.Forms.* -import play.api.i18n.I18nSupport -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import repositories.{ContactMessageRepository, UserRepository} -import services.{AuthService, EmailService} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -case class ReplyFormData(replyText: String, sendEmail: Boolean) - -@Singleton -class SupportAdminController @Inject()( - val controllerComponents: ControllerComponents, - authenticatedAction: AuthenticatedAction, - roleAction: RoleAction, - contactMessageRepository: ContactMessageRepository, - userRepository: UserRepository, - authService: AuthService, - emailService: EmailService -)(implicit ec: ExecutionContext, webJarsUtil: WebJarsUtil) - extends BaseController with I18nSupport with Logging { - - private val replyForm = Form( - mapping( - "replyText" -> nonEmptyText(1, 4096), - "sendEmail" -> boolean - )(ReplyFormData.apply)(r => Some((r.replyText, r.sendEmail))) - ) - - private def AdminAction = authenticatedAction andThen roleAction("Admin") - - /** - * List all contact messages for admin review. - */ - def listMessages(status: Option[String], page: Int, pageSize: Int): Action[AnyContent] = AdminAction.async { implicit request => - val statusFilter = status.flatMap(MessageStatus.fromString) - val offset = (page - 1) * pageSize - - for { - messages <- contactMessageRepository.findAll(statusFilter, pageSize, offset) - totalCount <- contactMessageRepository.countByStatus(statusFilter) - } yield { - val totalPages = (totalCount + pageSize - 1) / pageSize - Ok(views.html.support.admin.messageList(messages, statusFilter, page, totalPages, pageSize)) - } - } - - /** - * View a single message with its replies. - */ - def viewMessage(id: UUID): Action[AnyContent] = AdminAction.async { implicit request => - val adminUserId = request.user.id.get - contactMessageRepository.findWithReplies(id).flatMap { - case Some((message, replies)) => - // Mark as read if new - val updateFuture = if (message.status == MessageStatus.New) { - contactMessageRepository.updateStatus(id, MessageStatus.Read) - } else { - Future.successful(0) - } - - // Get sender info if authenticated user - val senderFuture = message.userId match { - case Some(userId) => userRepository.findById(userId) - case None => Future.successful(None) - } - - for { - _ <- updateFuture - senderOpt <- senderFuture - } yield { - Ok(views.html.support.admin.messageDetail(message, replies, senderOpt, replyForm)) - } - - case None => - Future.successful(NotFound(views.html.errors.notFound("Message not found."))) - } - } - - /** - * Submit a reply to a message. - */ - def submitReply(messageId: UUID): Action[AnyContent] = AdminAction.async { implicit request => - val adminUserId = request.user.id.get - contactMessageRepository.findById(messageId).flatMap { - case Some(message) => - replyForm.bindFromRequest().fold( - formWithErrors => { - for { - replies <- contactMessageRepository.findRepliesByMessageId(messageId) - senderOpt <- message.userId.map(userRepository.findById).getOrElse(Future.successful(None)) - } yield { - BadRequest(views.html.support.admin.messageDetail(message, replies, senderOpt, formWithErrors)) - } - }, - data => { - val now = LocalDateTime.now() - val reply = MessageReply( - id = None, - messageId = messageId, - adminUserId = adminUserId, - replyText = data.replyText, - emailSent = false, - emailSentAt = None, - createdAt = now - ) - - for { - createdReply <- contactMessageRepository.createReply(reply) - _ <- contactMessageRepository.updateStatus(messageId, MessageStatus.Replied) - _ <- if (data.sendEmail && message.senderEmail.isDefined) { - sendReplyEmail(message, data.replyText, createdReply.id.get) - } else { - Future.successful(()) - } - } yield { - Redirect(routes.SupportAdminController.viewMessage(messageId)) - .flashing("success" -> "Reply sent successfully.") - } - } - ) - - case None => - Future.successful(NotFound(views.html.errors.notFound("Message not found."))) - } - } - - /** - * Update message status. - */ - def updateStatus(messageId: UUID, status: String): Action[AnyContent] = AdminAction.async { implicit request => - MessageStatus.fromString(status) match { - case Some(newStatus) => - contactMessageRepository.updateStatus(messageId, newStatus).map { _ => - Redirect(routes.SupportAdminController.viewMessage(messageId)) - .flashing("success" -> s"Status updated to ${newStatus.value}.") - } - case None => - Future.successful(BadRequest("Invalid status")) - } - } - - /** - * HTMX endpoint: Get unread message count badge for admins. - */ - def adminMessageBadge: Action[AnyContent] = AdminAction.async { implicit request => - contactMessageRepository.countUnreadForAdmin.map { count => - Ok(views.html.partials.messageBadge(count)) - } - } - - /** - * Send email reply to anonymous user. - */ - private def sendReplyEmail(message: models.domain.support.ContactMessage, replyText: String, replyId: UUID): Future[Unit] = { - message.senderEmail match { - case Some(email) => - val result = emailService.sendEmail( - to = Seq(email), - from = "support@decoding-us.com", - subject = s"Re: ${message.subject}", - body = - s""" - |Hello ${message.senderName.getOrElse("there")}, - | - |Thank you for contacting Decoding Us. Here is our response: - | - |$replyText - | - |--- - |Original message: - |${message.message} - | - |Best regards, - |The Decoding Us Team - |""".stripMargin - ) - - result match { - case Right(_) => - contactMessageRepository.markEmailSent(replyId).map(_ => ()) - case Left(error) => - logger.error(s"Failed to send reply email: $error") - Future.successful(()) - } - - case None => - Future.successful(()) - } - } -} diff --git a/app/controllers/TreeController.scala b/app/controllers/TreeController.scala deleted file mode 100644 index ba18662d..00000000 --- a/app/controllers/TreeController.scala +++ /dev/null @@ -1,320 +0,0 @@ -package controllers - -import config.FeatureFlags -import models.HaplogroupType -import models.HaplogroupType.{MT, Y} -import models.api.{SubcladeDTO, TreeNodeDTO} -import models.domain.haplogroups.HaplogroupProvenance -import models.view.TreeViewModel -import org.webjars.play.WebJarsUtil -import play.api.cache.{AsyncCacheApi, Cached} -import play.api.Logging -import play.api.i18n.I18nSupport -import play.api.libs.json.Json -import play.api.mvc.* -import services.{ApiRoute, FragmentRoute, HaplogroupTreeService} - -import javax.inject.* -import scala.concurrent.duration.DurationInt -import scala.concurrent.{ExecutionContext, Future} - -/** - * Controller responsible for handling actions related to haplogroup trees. - * Provides routes to render tree views and API endpoints to retrieve tree data. - * - * @constructor Creates a new TreeController. - * @param controllerComponents Essential components required for Play controllers. - * @param treeService Service responsible for building haplogroup tree responses. - * @param webJarsUtil Utility for managing web resources. - * @param ec Execution context for handling asynchronous operations. - */ -@Singleton -class TreeController @Inject()(val controllerComponents: MessagesControllerComponents, - treeService: HaplogroupTreeService, - featureFlags: FeatureFlags, - cached: Cached, - cache: AsyncCacheApi) - (using webJarsUtil: WebJarsUtil, ec: ExecutionContext) - extends BaseController with I18nSupport with Logging { - - /** - * Configuration for initializing and handling tree-based data structures - * specific to genetic haplogroup analysis. - * - * @param haplogroupType Specifies the type of haplogroup classification, - * either paternal (Y) or maternal (MT). - * @param defaultRoot Represents the default root haplogroup to be used - * when rendering or querying the tree structure. - */ - private case class TreeConfig( - haplogroupType: HaplogroupType, - defaultRoot: String - ) - - private val YConfig = TreeConfig(Y, "Y") - private val MTConfig = TreeConfig(MT, "L") - private val VERTICAL_TREE_COOKIE = "showVerticalTree" - - private def shouldShowVerticalTree(request: RequestHeader): Boolean = { - request.cookies.get(VERTICAL_TREE_COOKIE).map(_.value.toBoolean).getOrElse(featureFlags.showVerticalTree) - } - - /** - * Renders the Y-DNA tree page. - * - * This action responds to HTTP GET requests and renders a view that displays - * the Y-DNA haplogroup tree. The view includes interactive elements such as - * a search form for navigating to specific haplogroups. - * - * @param rootHaplogroup Optional haplogroup to use as the initial root. - * If provided, the tree will load centered on this haplogroup. - * @return an action that renders the Y-DNA tree page as an HTML response - */ - def ytree(rootHaplogroup: Option[String]): Action[AnyContent] = Action { implicit request => - Ok(views.html.ytree(rootHaplogroup, shouldShowVerticalTree(request))) - } - - /** - * Renders the MT-DNA tree page. - * - * This action responds to HTTP GET requests and renders a view that displays - * the MT-DNA haplogroup tree. The view includes interactive elements such as - * a search form for navigating to specific haplogroups. - * - * @param rootHaplogroup Optional haplogroup to use as the initial root. - * If provided, the tree will load centered on this haplogroup. - * @return an action that renders the MT-DNA tree page as an HTML response - */ - def mtree(rootHaplogroup: Option[String]): Action[AnyContent] = Action { implicit request => - Ok(views.html.mtree(rootHaplogroup, shouldShowVerticalTree(request))) - } - - /** - * Handles API requests to retrieve the Y-DNA haplogroup tree structure. - * - * This method generates a JSON representation of the Y-DNA haplogroup tree - * starting from a specified root haplogroup. If no root haplogroup is provided, - * it defaults to the configuration's default root haplogroup. - * - * @param rootHaplogroup an optional string representing the root haplogroup - * for the Y-DNA tree. If None, the default root is used. - * @return an Action that produces a JSON response containing the Y-DNA haplogroup tree. - */ - def apiYTree(rootHaplogroup: Option[String]): EssentialAction = - cached.status( - (request: RequestHeader) => s"ytree-${rootHaplogroup.getOrElse("all")}", - 200, - 24.hours - ) { - treeAction(rootHaplogroup, YConfig, ApiRoute) - } - - /** - * Handles API requests to retrieve the MT-DNA haplogroup tree structure. - * - * This method generates a JSON representation of the MT-DNA haplogroup tree - * starting from a specified root haplogroup. If no root haplogroup is provided, - * it defaults to the configuration's default root haplogroup. - * - * @param rootHaplogroup an optional string representing the root haplogroup - * for the MT-DNA tree. If None, the default root is used. - * @return an Action that produces a JSON response containing the MT-DNA haplogroup tree. - */ - def apiMTree(rootHaplogroup: Option[String]): EssentialAction = - cached.status( - (request: RequestHeader) => s"mtree-${rootHaplogroup.getOrElse("all")}", - 200, - 24.hours - ) { - treeAction(rootHaplogroup, MTConfig, ApiRoute) - } - - /** - * Handles requests to render a fragment of the Y-DNA haplogroup tree. - * - * This method generates an HTML fragment that represents a specific portion of the Y-DNA haplogroup tree. - * The portion of the tree rendered can be controlled by specifying a root haplogroup. If no root haplogroup - * is provided, the configuration's default root is used. - * - * For HTMX requests (identified by the HX-Request header), returns the HTML fragment. - * For direct browser requests (e.g., shared URLs), redirects to the full page with the rootHaplogroup parameter. - * - * @param rootHaplogroup an optional string indicating the root haplogroup for the Y-DNA tree fragment. - * If None, the default root haplogroup is used. - * @return an Action that produces an HTML response containing the Y-DNA tree fragment, - * or a redirect to the full page for non-HTMX requests. - */ - def yTreeFragment(rootHaplogroup: Option[String]): Action[AnyContent] = Action.async { implicit request => - if (isHtmxRequest(request)) { - // HTMX request - return fragment (with caching handled internally) - cachedTreeFragment(rootHaplogroup, YConfig, s"ytree-fragment-${rootHaplogroup.getOrElse("all")}") - } else { - // Direct browser request - redirect to full page - Future.successful(Redirect(routes.TreeController.ytree(rootHaplogroup))) - } - } - - /** - * Handles requests to render a fragment of the MT-DNA haplogroup tree. - * - * This method generates an HTML fragment that represents a specific portion of the MT-DNA haplogroup tree. - * The portion of the tree rendered can be controlled by specifying a root haplogroup. If no root haplogroup - * is provided, the configuration's default root is used. - * - * For HTMX requests (identified by the HX-Request header), returns the HTML fragment. - * For direct browser requests (e.g., shared URLs), redirects to the full page with the rootHaplogroup parameter. - * - * @param rootHaplogroup an optional string indicating the root haplogroup for the MT-DNA tree fragment. - * If None, the default root haplogroup is used. - * @return an Action that produces an HTML response containing the MT-DNA tree fragment, - * or a redirect to the full page for non-HTMX requests. - */ - def mTreeFragment(rootHaplogroup: Option[String]): Action[AnyContent] = Action.async { implicit request => - if (isHtmxRequest(request)) { - // HTMX request - return fragment (with caching handled internally) - cachedTreeFragment(rootHaplogroup, MTConfig, s"mtree-fragment-${rootHaplogroup.getOrElse("all")}") - } else { - // Direct browser request - redirect to full page - Future.successful(Redirect(routes.TreeController.mtree(rootHaplogroup))) - } - } - - /** - * Checks if the request is from HTMX by looking for the HX-Request header. - */ - private def isHtmxRequest(request: Request[_]): Boolean = { - request.headers.get("HX-Request").contains("true") - } - - /** - * Returns a cached tree fragment response, using the async cache API. - */ - private def cachedTreeFragment( - rootHaplogroup: Option[String], - config: TreeConfig, - cacheKey: String - )(using request: Request[AnyContent]): Future[Result] = { - val useVerticalTree = shouldShowVerticalTree(request) - val effectiveCacheKey = s"$cacheKey-vertical:$useVerticalTree" - - cache.getOrElseUpdate(effectiveCacheKey, 24.hours) { - buildTreeFragment(rootHaplogroup, config, useVerticalTree) - } - } - - /** - * Builds the tree fragment response. - */ - private def buildTreeFragment( - rootHaplogroup: Option[String], - config: TreeConfig, - showVerticalTree: Boolean - )(using request: Request[AnyContent]): Future[Result] = { - val haplogroupName = rootHaplogroup.getOrElse(config.defaultRoot) - val isAbsoluteTopRootView = haplogroupName == config.defaultRoot - - val orientation = if (showVerticalTree) services.TreeOrientation.Vertical else services.TreeOrientation.Horizontal - - treeService.buildTreeResponse(haplogroupName, config.haplogroupType, FragmentRoute) - .map { treeDto => - val treeViewModel: Option[TreeViewModel] = treeDto.subclade.flatMap { _ => - services.TreeLayoutService.layoutTree(treeDto, isAbsoluteTopRootView, orientation) - } - - if (showVerticalTree) { - Ok(views.html.fragments.verticalTree(treeDto, config.haplogroupType, treeViewModel, request.uri)) - } else { - Ok(views.html.fragments.haplogroup(treeDto, config.haplogroupType, treeViewModel, request.uri, featureFlags.showBranchAgeEstimates)) - } - } - .recover { - case _: IllegalArgumentException => - Ok(views.html.fragments.error(s"Haplogroup $haplogroupName not found")) - case e => - logger.error(s"Error loading tree fragment: ${e.getMessage}", e) - Ok(views.html.fragments.error("An unexpected error occurred while loading this view.")) - } - } - - /** - * Generates a tree structure for a given root haplogroup and renders it as either - * a JSON response or an HTML fragment depending on the specified route type. - * - * This is where TreeLayoutService is now called for FragmentRoute responses. - * - * @param rootHaplogroup an optional string specifying the root haplogroup - * for the tree. If None, the default root defined in - * the configuration is used. - * @param config the tree configuration containing settings such - * as the default root and haplogroup type. - * @param routeType the type of response to generate, either JSON - * (for API responses) or HTML fragments. - * @return an Action that produces either a JSON response with the tree - * structure or an HTML fragment based on the route type. - */ - private def treeAction( - rootHaplogroup: Option[String], - config: TreeConfig, - routeType: services.RouteType - ): Action[AnyContent] = Action.async { implicit request => - - val haplogroupName = rootHaplogroup.getOrElse(config.defaultRoot) - val isAbsoluteTopRootView = haplogroupName == config.defaultRoot - val showVerticalTree = shouldShowVerticalTree(request) - val orientation = if (showVerticalTree) services.TreeOrientation.Vertical else services.TreeOrientation.Horizontal - - treeService.buildTreeResponse(haplogroupName, config.haplogroupType, routeType) - .map { treeDto => - routeType match { - case ApiRoute => - // TAPIR can't deal with the recursive tree, so we need to flatten it. - val apiBody: Seq[SubcladeDTO] = treeService.mapApiResponse(treeDto.subclade) - Ok(Json.toJson(apiBody)) - case FragmentRoute => - val treeViewModel: Option[TreeViewModel] = treeDto.subclade.flatMap { rootNodeDTO => - services.TreeLayoutService.layoutTree(treeDto, isAbsoluteTopRootView, orientation) - } - - if (showVerticalTree) { - Ok(views.html.fragments.verticalTree(treeDto, config.haplogroupType, treeViewModel, request.uri)) - } else { - Ok(views.html.fragments.haplogroup(treeDto, config.haplogroupType, treeViewModel, request.uri, featureFlags.showBranchAgeEstimates)) - } - } - } - .recover { - case _: IllegalArgumentException => - routeType match { - case ApiRoute => NotFound(Json.obj("error" -> s"Haplogroup $haplogroupName not found")) - case FragmentRoute => Ok(views.html.fragments.error(s"Haplogroup $haplogroupName not found")) - } - case e => - logger.error(s"Error loading haplogroup tree", e) - routeType match { - case ApiRoute => InternalServerError(Json.obj("error" -> "An internal error occurred.")) - case FragmentRoute => Ok(views.html.fragments.error("An unexpected error occurred.")) - } - } - } - - def getSnpDetailSidebar(haplogroupName: String, haplogroupType: HaplogroupType): Action[AnyContent] = Action.async { implicit request => - treeService.findHaplogroupWithVariants(haplogroupName, haplogroupType).map { case (haplogroup, snps) => - val provenance = haplogroup.flatMap(_.provenance) - Ok(views.html.fragments.snpDetailSidebar(haplogroupName, snps, provenance)) - } - } - - def emptySnpDetailSidebarPlaceholder: Action[AnyContent] = Action { implicit request => - Ok(
) - } - - // TODO: Should probably move this to the service. - private def mapApiResponse(root: Option[TreeNodeDTO]): Seq[SubcladeDTO] = { - def map(node: TreeNodeDTO, parent: Option[TreeNodeDTO]): Seq[SubcladeDTO] = { - SubcladeDTO(node.name, parent.map(_.name), node.variants, node.updated, node.isBackbone) +: node.children.flatMap(c => map(c, Option(node))) - } - - root.map(x => map(x, None)) - .getOrElse(Seq()) - } -} \ No newline at end of file diff --git a/app/controllers/TreeVersioningApiController.scala b/app/controllers/TreeVersioningApiController.scala deleted file mode 100644 index 260624b1..00000000 --- a/app/controllers/TreeVersioningApiController.scala +++ /dev/null @@ -1,331 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.domain.haplogroups.{ChangeSetStatus, ChangeStatus} -import play.api.Logging -import play.api.libs.json.{Format, JsError, JsSuccess, Json, OFormat, Reads} -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import services.TreeVersioningService - -import scala.concurrent.{ExecutionContext, Future} - -/** - * API controller for Tree Versioning operations. - * Provides endpoints for managing change sets from tree merge operations. - * Secured with X-API-Key authentication. - */ -@Singleton -class TreeVersioningApiController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - treeVersioningService: TreeVersioningService -)(implicit ec: ExecutionContext) extends BaseController with Logging { - - // ============================================================================ - // Request/Response DTOs - // ============================================================================ - - // Audit identity for API-key-authenticated actions - private val ApiCuratorId = "api-system" - - case class DiscardChangeSetRequest(reason: String) - object DiscardChangeSetRequest { - implicit val format: OFormat[DiscardChangeSetRequest] = Json.format[DiscardChangeSetRequest] - } - - case class ReviewChangeRequest( - action: String, // "APPLIED", "SKIPPED", "REVERTED" - notes: Option[String] = None - ) - object ReviewChangeRequest { - implicit val format: OFormat[ReviewChangeRequest] = Json.format[ReviewChangeRequest] - } - - case class AddCommentRequest(content: String, treeChangeId: Option[Int] = None) - object AddCommentRequest { - implicit val format: OFormat[AddCommentRequest] = Json.format[AddCommentRequest] - } - - // ============================================================================ - // Change Set Endpoints - // ============================================================================ - - /** - * List change sets with optional filters. - * GET /api/v1/manage/change-sets?haplogroupType=Y&status=READY_FOR_REVIEW&page=1&pageSize=20 - */ - def listChangeSets( - haplogroupType: Option[String], - status: Option[String], - page: Int, - pageSize: Int - ): Action[AnyContent] = secureApi.async { _ => - val hgType = haplogroupType.flatMap(parseHaplogroupType) - val csStatus = status.flatMap(parseChangeSetStatus) - - treeVersioningService.listChangeSets(hgType, csStatus, page, pageSize).map { case (summaries, total) => - Ok(Json.obj( - "changeSets" -> summaries, - "total" -> total, - "page" -> page, - "pageSize" -> pageSize, - "totalPages" -> ((total + pageSize - 1) / pageSize) - )) - }.recover { - case e: Exception => - logger.error(s"Error listing change sets: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Get change set details. - * GET /api/v1/manage/change-sets/:id - */ - def getChangeSetDetails(id: Int): Action[AnyContent] = secureApi.async { _ => - treeVersioningService.getChangeSetDetails(id).map { - case Some(details) => Ok(Json.toJson(details)) - case None => NotFound(Json.obj("error" -> s"Change set $id not found")) - }.recover { - case e: Exception => - logger.error(s"Error getting change set $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Start review of a change set. - * POST /api/v1/manage/change-sets/:id/start-review - */ - def startReview(id: Int): Action[AnyContent] = - secureApi.async { request => - treeVersioningService.startReview(id, ApiCuratorId).map { success => - if (success) { - Ok(Json.obj("success" -> true, "message" -> s"Review started for change set $id")) - } else { - BadRequest(Json.obj("error" -> "Failed to start review")) - } - }.recover { - case e: IllegalStateException => - BadRequest(Json.obj("error" -> e.getMessage)) - case e: NoSuchElementException => - NotFound(Json.obj("error" -> e.getMessage)) - case e: Exception => - logger.error(s"Error starting review for change set $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Apply a change set to Production. - * POST /api/v1/manage/change-sets/:id/apply - */ - def applyChangeSet(id: Int): Action[AnyContent] = - secureApi.async { request => - treeVersioningService.applyChangeSet(id, ApiCuratorId).map { success => - if (success) { - Ok(Json.obj("success" -> true, "message" -> s"Change set $id applied to Production")) - } else { - BadRequest(Json.obj("error" -> "Failed to apply change set")) - } - }.recover { - case e: IllegalStateException => - BadRequest(Json.obj("error" -> e.getMessage)) - case e: NoSuchElementException => - NotFound(Json.obj("error" -> e.getMessage)) - case e: Exception => - logger.error(s"Error applying change set $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Discard a change set. - * POST /api/v1/manage/change-sets/:id/discard - */ - def discardChangeSet(id: Int): Action[DiscardChangeSetRequest] = - secureApi.jsonAction[DiscardChangeSetRequest].async { request => - val req = request.body - treeVersioningService.discardChangeSet(id, ApiCuratorId, req.reason).map { success => - if (success) { - Ok(Json.obj("success" -> true, "message" -> s"Change set $id discarded")) - } else { - BadRequest(Json.obj("error" -> "Failed to discard change set")) - } - }.recover { - case e: IllegalStateException => - BadRequest(Json.obj("error" -> e.getMessage)) - case e: NoSuchElementException => - NotFound(Json.obj("error" -> e.getMessage)) - case e: Exception => - logger.error(s"Error discarding change set $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - // ============================================================================ - // Change Review Endpoints - // ============================================================================ - - /** - * Get pending changes for review. - * GET /api/v1/manage/change-sets/:id/changes/pending?limit=50 - */ - def getPendingChanges(id: Int, limit: Int): Action[AnyContent] = secureApi.async { _ => - treeVersioningService.getPendingReviewChanges(id, limit).map { changes => - Ok(Json.obj( - "changeSetId" -> id, - "pendingChanges" -> changes, - "count" -> changes.size - )) - }.recover { - case e: Exception => - logger.error(s"Error getting pending changes for set $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Review an individual change. - * POST /api/v1/manage/change-sets/:changeSetId/changes/:changeId/review - */ - def reviewChange(changeSetId: Int, changeId: Int): Action[ReviewChangeRequest] = - secureApi.jsonAction[ReviewChangeRequest].async { request => - val req = request.body - parseChangeStatus(req.action) match { - case None => - Future.successful(BadRequest(Json.obj("error" -> s"Invalid action: ${req.action}"))) - case Some(action) => - treeVersioningService.reviewChange(changeId, ApiCuratorId, action, req.notes).map { success => - if (success) { - Ok(Json.obj("success" -> true, "message" -> s"Change $changeId reviewed as ${req.action}")) - } else { - BadRequest(Json.obj("error" -> "Failed to review change")) - } - }.recover { - case e: IllegalArgumentException => - BadRequest(Json.obj("error" -> e.getMessage)) - case e: Exception => - logger.error(s"Error reviewing change $changeId: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - } - - /** - * Approve all pending changes in a change set. - * POST /api/v1/manage/change-sets/:id/approve-all - */ - def approveAllPending(id: Int): Action[AnyContent] = - secureApi.async { request => - treeVersioningService.approveAllPending(id, ApiCuratorId).map { count => - Ok(Json.obj("success" -> true, "approvedCount" -> count)) - }.recover { - case e: Exception => - logger.error(s"Error approving all changes in set $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - // ============================================================================ - // Comment Endpoints - // ============================================================================ - - /** - * Add a comment to a change set. - * POST /api/v1/manage/change-sets/:id/comments - */ - def addComment(id: Int): Action[AddCommentRequest] = - secureApi.jsonAction[AddCommentRequest].async { request => - val req = request.body - treeVersioningService.addComment(id, ApiCuratorId, req.content, req.treeChangeId).map { commentId => - Created(Json.obj("success" -> true, "commentId" -> commentId)) - }.recover { - case e: Exception => - logger.error(s"Error adding comment to set $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * List comments for a change set. - * GET /api/v1/manage/change-sets/:id/comments - */ - def listComments(id: Int): Action[AnyContent] = secureApi.async { _ => - treeVersioningService.listComments(id).map { comments => - Ok(Json.obj("changeSetId" -> id, "comments" -> comments)) - }.recover { - case e: Exception => - logger.error(s"Error listing comments for set $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - // ============================================================================ - // Tree Diff Endpoints (Phase 3) - // ============================================================================ - - /** - * Get tree diff for a change set. - * GET /api/v1/manage/change-sets/:id/diff - */ - def getTreeDiff(id: Int): Action[AnyContent] = secureApi.async { _ => - treeVersioningService.getTreeDiff(id).map { diff => - Ok(Json.toJson(diff)) - }.recover { - case e: Exception => - logger.error(s"Error getting tree diff for set $id: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Get active tree diff for a haplogroup type. - * GET /api/v1/manage/tree-diff/:haplogroupType - */ - def getActiveTreeDiff(haplogroupType: String): Action[AnyContent] = secureApi.async { _ => - parseHaplogroupType(haplogroupType) match { - case None => - Future.successful(BadRequest(Json.obj("error" -> s"Invalid haplogroup type: $haplogroupType"))) - case Some(hgType) => - treeVersioningService.getActiveTreeDiff(hgType).map { - case Some(diff) => Ok(Json.toJson(diff)) - case None => Ok(Json.obj("message" -> s"No active change set for $haplogroupType")) - }.recover { - case e: Exception => - logger.error(s"Error getting active tree diff for $haplogroupType: ${e.getMessage}", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - } - - // ============================================================================ - // Helper Methods - // ============================================================================ - - private def parseHaplogroupType(s: String): Option[HaplogroupType] = { - try { - Some(HaplogroupType.valueOf(s.toUpperCase)) - } catch { - case _: IllegalArgumentException => None - } - } - - private def parseChangeSetStatus(s: String): Option[ChangeSetStatus] = { - try { - Some(ChangeSetStatus.fromString(s)) - } catch { - case _: IllegalArgumentException => None - } - } - - private def parseChangeStatus(s: String): Option[ChangeStatus] = { - try { - Some(ChangeStatus.fromString(s)) - } catch { - case _: IllegalArgumentException => None - } - } -} diff --git a/app/controllers/TreeVersioningCuratorController.scala b/app/controllers/TreeVersioningCuratorController.scala deleted file mode 100644 index 4dd352bd..00000000 --- a/app/controllers/TreeVersioningCuratorController.scala +++ /dev/null @@ -1,675 +0,0 @@ -package controllers - -import actions.{AuthenticatedAction, AuthenticatedRequest, PermissionAction} -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.dal.domain.haplogroups.{DeferPriority, ResolutionType, WipResolutionRow} -import models.domain.haplogroups.{ChangeSetStatus, ChangeStatus} -import org.webjars.play.WebJarsUtil -import play.api.Logging -import play.api.data.Form -import play.api.data.Forms.* -import play.api.i18n.I18nSupport -import play.api.libs.json.* -import play.api.mvc.* -import repositories.WipTreeRepository -import services.TreeVersioningService - -import java.io.File -import java.time.LocalDateTime -import scala.io.Source -import scala.concurrent.{ExecutionContext, Future} -import scala.util.Using - -/** - * Curator UI controller for Tree Versioning operations. - * Provides HTML views for managing change sets from tree merge operations. - */ -@Singleton -class TreeVersioningCuratorController @Inject()( - val controllerComponents: ControllerComponents, - protected val authenticatedAction: AuthenticatedAction, - protected val permissionAction: PermissionAction, - treeVersioningService: TreeVersioningService, - wipTreeRepository: WipTreeRepository -)(implicit ec: ExecutionContext, webJarsUtil: WebJarsUtil) - extends BaseController with I18nSupport with Logging with BaseCuratorController { - - // Forms - case class DiscardFormData(reason: String) - private val discardForm: Form[DiscardFormData] = Form( - mapping( - "reason" -> nonEmptyText(minLength = 10, maxLength = 500) - )(DiscardFormData.apply)(d => Some(d.reason)) - ) - - case class ReviewChangeFormData(action: String, notes: Option[String]) - private val reviewChangeForm: Form[ReviewChangeFormData] = Form( - mapping( - "action" -> nonEmptyText.verifying("Invalid action", a => Seq("APPLIED", "SKIPPED", "REVERTED").contains(a)), - "notes" -> optional(text(maxLength = 1000)) - )(ReviewChangeFormData.apply)(d => Some((d.action, d.notes))) - ) - - // Resolution form data classes - case class ReparentFormData( - wipHaplogroupId: Option[Int], - wipReparentId: Option[Int], - newParentId: Option[Int], - newParentPlaceholderId: Option[Int], - notes: Option[String] - ) - private val reparentForm: Form[ReparentFormData] = Form( - mapping( - "wipHaplogroupId" -> optional(number), - "wipReparentId" -> optional(number), - "newParentId" -> optional(number), - "newParentPlaceholderId" -> optional(number), - "notes" -> optional(text(maxLength = 1000)) - )(ReparentFormData.apply)(d => Some((d.wipHaplogroupId, d.wipReparentId, d.newParentId, d.newParentPlaceholderId, d.notes))) - ) - - case class EditVariantsFormData( - wipHaplogroupId: Option[Int], - wipReparentId: Option[Int], - variantsToAdd: String, - variantsToRemove: String, - notes: Option[String] - ) - private val editVariantsForm: Form[EditVariantsFormData] = Form( - mapping( - "wipHaplogroupId" -> optional(number), - "wipReparentId" -> optional(number), - "variantsToAdd" -> text, - "variantsToRemove" -> text, - "notes" -> optional(text(maxLength = 1000)) - )(EditVariantsFormData.apply)(d => Some((d.wipHaplogroupId, d.wipReparentId, d.variantsToAdd, d.variantsToRemove, d.notes))) - ) - - case class MergeExistingFormData( - wipHaplogroupId: Option[Int], - wipReparentId: Option[Int], - mergeTargetId: Int, - notes: Option[String] - ) - private val mergeExistingForm: Form[MergeExistingFormData] = Form( - mapping( - "wipHaplogroupId" -> optional(number), - "wipReparentId" -> optional(number), - "mergeTargetId" -> number, - "notes" -> optional(text(maxLength = 1000)) - )(MergeExistingFormData.apply)(d => Some((d.wipHaplogroupId, d.wipReparentId, d.mergeTargetId, d.notes))) - ) - - case class DeferFormData( - wipHaplogroupId: Option[Int], - wipReparentId: Option[Int], - priority: String, - reason: String, - notes: Option[String] - ) - private val deferForm: Form[DeferFormData] = Form( - mapping( - "wipHaplogroupId" -> optional(number), - "wipReparentId" -> optional(number), - "priority" -> nonEmptyText.verifying("Invalid priority", p => Seq("LOW", "NORMAL", "HIGH", "CRITICAL").contains(p.toUpperCase)), - "reason" -> nonEmptyText(minLength = 5, maxLength = 500), - "notes" -> optional(text(maxLength = 1000)) - )(DeferFormData.apply)(d => Some((d.wipHaplogroupId, d.wipReparentId, d.priority, d.reason, d.notes))) - ) - - // ============================================================================ - // Change Set List - // ============================================================================ - - /** - * Change set list page (wrapper with filters). - */ - def listChangeSets(hgType: Option[String], status: Option[String], pageSize: Int): Action[AnyContent] = - withPermission("tree.version.view") { implicit request => - Ok(views.html.curator.changesets.list(hgType, status, pageSize)) - } - - /** - * Change set list fragment (loaded via HTMX). - */ - def changeSetsFragment(hgType: Option[String], status: Option[String], page: Int, pageSize: Int): Action[AnyContent] = - withPermission("tree.version.view").async { implicit request => - val haplogroupType = hgType.flatMap(parseHaplogroupType) - val changeSetStatus = status.flatMap(parseChangeSetStatus) - - treeVersioningService.listChangeSets(haplogroupType, changeSetStatus, page, pageSize).map { case (summaries, total) => - val totalPages = Math.max(1, (total + pageSize - 1) / pageSize) - Ok(views.html.curator.changesets.listFragment(summaries, hgType, status, page, totalPages, pageSize)) - } - } - - // ============================================================================ - // Change Set Details - // ============================================================================ - - /** - * Change set detail panel (loaded via HTMX). - */ - def changeSetDetailPanel(id: Int): Action[AnyContent] = - withPermission("tree.version.view").async { implicit request => - treeVersioningService.getChangeSetDetails(id).map { - case Some(details) => - Ok(views.html.curator.changesets.detailPanel(details, discardForm)) - case None => - NotFound(views.html.fragments.errorPanel("Change set not found")) - } - } - - /** - * Get pending changes for a change set (loaded via HTMX). - */ - def pendingChangesFragment(id: Int, limit: Int): Action[AnyContent] = - withPermission("tree.version.view").async { implicit request => - treeVersioningService.getPendingReviewChangesWithNames(id, limit).map { changes => - Ok(views.html.curator.changesets.changesFragment(id, changes, reviewChangeForm)) - } - } - - // ============================================================================ - // Change Set Actions - // ============================================================================ - - /** - * Start review of a change set. - */ - def startReview(id: Int): Action[AnyContent] = - withPermission("tree.version.review").async { implicit request => - treeVersioningService.startReview(id, curatorId(request)).map { success => - if (success) { - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("success" -> s"Review started for change set #$id") - } else { - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("error" -> "Failed to start review") - } - }.recover { - case e: IllegalStateException => - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("error" -> e.getMessage) - } - } - - /** - * Apply a change set to Production. - */ - def applyChangeSet(id: Int): Action[AnyContent] = - withPermission("tree.version.promote").async { implicit request => - treeVersioningService.applyChangeSet(id, curatorId(request)).map { success => - if (success) { - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("success" -> s"Change set #$id applied to Production!") - } else { - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("error" -> "Failed to apply change set") - } - }.recover { - case e: IllegalStateException => - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("error" -> e.getMessage) - } - } - - /** - * Discard a change set. - */ - def discardChangeSet(id: Int): Action[AnyContent] = - withPermission("tree.version.discard").async { implicit request => - discardForm.bindFromRequest().fold( - formWithErrors => { - Future.successful( - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("error" -> "Please provide a reason for discarding (min 10 characters)") - ) - }, - data => { - treeVersioningService.discardChangeSet(id, curatorId(request), data.reason).map { success => - if (success) { - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("success" -> s"Change set #$id discarded") - } else { - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("error" -> "Failed to discard change set") - } - }.recover { - case e: IllegalStateException => - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("error" -> e.getMessage) - } - } - ) - } - - /** - * Approve all pending changes in a change set. - */ - def approveAllPending(id: Int): Action[AnyContent] = - withPermission("tree.version.review").async { implicit request => - treeVersioningService.approveAllPending(id, curatorId(request)).map { count => - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("success" -> s"Approved $count pending changes in change set #$id") - }.recover { - case e: Exception => - Redirect(routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - .flashing("error" -> e.getMessage) - } - } - - // ============================================================================ - // Individual Change Review - // ============================================================================ - - /** - * Review an individual change (HTMX POST). - */ - def reviewChange(changeSetId: Int, changeId: Int): Action[AnyContent] = - withPermission("tree.version.review").async { implicit request => - reviewChangeForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(views.html.fragments.errorPanel("Invalid review action"))) - }, - data => { - parseChangeStatus(data.action) match { - case None => - Future.successful(BadRequest(views.html.fragments.errorPanel(s"Invalid action: ${data.action}"))) - case Some(status) => - treeVersioningService.reviewChange(changeId, curatorId(request), status, data.notes).map { success => - if (success) { - Ok(views.html.fragments.successPanel(s"Change #$changeId marked as ${data.action}")) - } else { - BadRequest(views.html.fragments.errorPanel("Failed to review change")) - } - }.recover { - case e: Exception => - BadRequest(views.html.fragments.errorPanel(e.getMessage)) - } - } - } - ) - } - - // ============================================================================ - // Ambiguity Report Views - // ============================================================================ - - /** - * View ambiguity report for a change set. - */ - def ambiguityReport(id: Int): Action[AnyContent] = - withPermission("tree.version.view").async { implicit request => - treeVersioningService.getChangeSetDetails(id).map { - case Some(details) => - details.changeSet.ambiguityReportPath match { - case Some(path) => - val file = new File(path) - if (file.exists()) { - Using(Source.fromFile(file)) { source => - val content = source.mkString - Ok(views.html.curator.changesets.ambiguityReport(details.changeSet, content)) - }.getOrElse { - InternalServerError(views.html.fragments.errorPanel("Failed to read ambiguity report")) - } - } else { - NotFound(views.html.fragments.errorPanel(s"Ambiguity report not found at: ${file.getName}")) - } - case None => - Ok(views.html.curator.changesets.ambiguityReport(details.changeSet, "No ambiguities were detected during this merge.")) - } - case None => - NotFound(views.html.fragments.errorPanel("Change set not found")) - } - } - - /** - * Download ambiguity report as markdown file. - */ - def downloadAmbiguityReport(id: Int): Action[AnyContent] = - withPermission("tree.version.view").async { implicit request => - treeVersioningService.getChangeSetDetails(id).map { - case Some(details) => - details.changeSet.ambiguityReportPath match { - case Some(path) => - val file = new File(path) - if (file.exists()) { - Ok.sendFile(file, fileName = _ => Some(file.getName)) - .as("text/markdown") - } else { - NotFound("Ambiguity report file not found") - } - case None => - NotFound("No ambiguity report available for this change set") - } - case None => - NotFound("Change set not found") - } - } - - // ============================================================================ - // Tree Diff Views - // ============================================================================ - - /** - * Diff view page for a change set. - */ - def diffView(id: Int): Action[AnyContent] = - withPermission("tree.version.view").async { implicit request => - treeVersioningService.getChangeSetDetails(id).map { - case Some(details) => - Ok(views.html.curator.changesets.diffView(details)) - case None => - NotFound(views.html.fragments.errorPanel("Change set not found")) - } - } - - /** - * Diff fragment (loaded via HTMX). - */ - def diffFragment(id: Int): Action[AnyContent] = - withPermission("tree.version.view").async { implicit request => - treeVersioningService.getTreeDiff(id).map { diff => - Ok(views.html.curator.changesets.diffFragment(diff)) - } - } - - /** - * Get ASCII tree preview of proposed changes for a change set. - * Returns plain text showing the tree structure with markers for new/reparented nodes. - */ - def treePreview(id: Int): Action[AnyContent] = - withPermission("tree.version.view").async { implicit request => - treeVersioningService.getTreePreview(id).map { preview => - Ok(preview).as("text/plain; charset=utf-8") - } - } - - // ============================================================================ - // Conflict Resolution API - // ============================================================================ - - /** - * Get all resolutions for a change set. - */ - def listResolutions(id: Int): Action[AnyContent] = - withPermission("tree.version.view").async { implicit request => - wipTreeRepository.getResolutionsForChangeSet(id).map { resolutions => - Ok(Json.toJson(resolutions.map(resolutionToJson))) - } - } - - /** - * Get deferred items for a change set. - */ - def listDeferredItems(id: Int): Action[AnyContent] = - withPermission("tree.version.view").async { implicit request => - wipTreeRepository.getDeferredItems(id).map { deferred => - Ok(Json.toJson(deferred.map(resolutionToJson))) - } - } - - /** - * Create a REPARENT resolution - change the parent of a node. - */ - def resolveReparent(id: Int): Action[AnyContent] = - withPermission("tree.version.review").async { implicit request => - reparentForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(Json.obj( - "error" -> "Invalid form data", - "details" -> formWithErrors.errors.map(e => s"${e.key}: ${e.message}").mkString(", ") - ))) - }, - data => { - if (data.wipHaplogroupId.isEmpty && data.wipReparentId.isEmpty) { - Future.successful(BadRequest(Json.obj( - "error" -> "Either wipHaplogroupId or wipReparentId must be provided" - ))) - } else if (data.newParentId.isEmpty && data.newParentPlaceholderId.isEmpty) { - Future.successful(BadRequest(Json.obj( - "error" -> "Either newParentId or newParentPlaceholderId must be provided" - ))) - } else { - val resolution = WipResolutionRow( - id = None, - changeSetId = id, - wipHaplogroupId = data.wipHaplogroupId, - wipReparentId = data.wipReparentId, - resolutionType = "REPARENT", - newParentId = data.newParentId, - newParentPlaceholderId = data.newParentPlaceholderId, - mergeTargetId = None, - variantsToAdd = None, - variantsToRemove = None, - deferReason = None, - deferPriority = "NORMAL", - curatorId = curatorId(request), - curatorNotes = data.notes, - status = "PENDING", - createdAt = LocalDateTime.now(), - appliedAt = None - ) - wipTreeRepository.createResolution(resolution).map { resolutionId => - Created(Json.obj( - "message" -> "Reparent resolution created", - "resolutionId" -> resolutionId - )) - } - } - } - ) - } - - /** - * Create an EDIT_VARIANTS resolution - add or remove variant associations. - */ - def resolveEditVariants(id: Int): Action[AnyContent] = - withPermission("tree.version.review").async { implicit request => - editVariantsForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(Json.obj( - "error" -> "Invalid form data", - "details" -> formWithErrors.errors.map(e => s"${e.key}: ${e.message}").mkString(", ") - ))) - }, - data => { - if (data.wipHaplogroupId.isEmpty && data.wipReparentId.isEmpty) { - Future.successful(BadRequest(Json.obj( - "error" -> "Either wipHaplogroupId or wipReparentId must be provided" - ))) - } else { - val resolution = WipResolutionRow( - id = None, - changeSetId = id, - wipHaplogroupId = data.wipHaplogroupId, - wipReparentId = data.wipReparentId, - resolutionType = "EDIT_VARIANTS", - newParentId = None, - newParentPlaceholderId = None, - mergeTargetId = None, - variantsToAdd = Some(data.variantsToAdd), - variantsToRemove = Some(data.variantsToRemove), - deferReason = None, - deferPriority = "NORMAL", - curatorId = curatorId(request), - curatorNotes = data.notes, - status = "PENDING", - createdAt = LocalDateTime.now(), - appliedAt = None - ) - wipTreeRepository.createResolution(resolution).map { resolutionId => - Created(Json.obj( - "message" -> "Edit variants resolution created", - "resolutionId" -> resolutionId - )) - } - } - } - ) - } - - /** - * Create a MERGE_EXISTING resolution - map WIP node to existing production node. - */ - def resolveMergeExisting(id: Int): Action[AnyContent] = - withPermission("tree.version.review").async { implicit request => - mergeExistingForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(Json.obj( - "error" -> "Invalid form data", - "details" -> formWithErrors.errors.map(e => s"${e.key}: ${e.message}").mkString(", ") - ))) - }, - data => { - if (data.wipHaplogroupId.isEmpty && data.wipReparentId.isEmpty) { - Future.successful(BadRequest(Json.obj( - "error" -> "Either wipHaplogroupId or wipReparentId must be provided" - ))) - } else { - val resolution = WipResolutionRow( - id = None, - changeSetId = id, - wipHaplogroupId = data.wipHaplogroupId, - wipReparentId = data.wipReparentId, - resolutionType = "MERGE_EXISTING", - newParentId = None, - newParentPlaceholderId = None, - mergeTargetId = Some(data.mergeTargetId), - variantsToAdd = None, - variantsToRemove = None, - deferReason = None, - deferPriority = "NORMAL", - curatorId = curatorId(request), - curatorNotes = data.notes, - status = "PENDING", - createdAt = LocalDateTime.now(), - appliedAt = None - ) - wipTreeRepository.createResolution(resolution).map { resolutionId => - Created(Json.obj( - "message" -> "Merge existing resolution created", - "resolutionId" -> resolutionId - )) - } - } - } - ) - } - - /** - * Create a DEFER resolution - move to manual review queue. - */ - def resolveDefer(id: Int): Action[AnyContent] = - withPermission("tree.version.review").async { implicit request => - deferForm.bindFromRequest().fold( - formWithErrors => { - Future.successful(BadRequest(Json.obj( - "error" -> "Invalid form data", - "details" -> formWithErrors.errors.map(e => s"${e.key}: ${e.message}").mkString(", ") - ))) - }, - data => { - if (data.wipHaplogroupId.isEmpty && data.wipReparentId.isEmpty) { - Future.successful(BadRequest(Json.obj( - "error" -> "Either wipHaplogroupId or wipReparentId must be provided" - ))) - } else { - val resolution = WipResolutionRow( - id = None, - changeSetId = id, - wipHaplogroupId = data.wipHaplogroupId, - wipReparentId = data.wipReparentId, - resolutionType = "DEFER", - newParentId = None, - newParentPlaceholderId = None, - mergeTargetId = None, - variantsToAdd = None, - variantsToRemove = None, - deferReason = Some(data.reason), - deferPriority = data.priority.toUpperCase, - curatorId = curatorId(request), - curatorNotes = data.notes, - status = "PENDING", - createdAt = LocalDateTime.now(), - appliedAt = None - ) - wipTreeRepository.createResolution(resolution).map { resolutionId => - Created(Json.obj( - "message" -> "Defer resolution created", - "resolutionId" -> resolutionId - )) - } - } - } - ) - } - - /** - * Cancel a resolution. - */ - def cancelResolution(changeSetId: Int, resolutionId: Int): Action[AnyContent] = - withPermission("tree.version.review").async { implicit request => - wipTreeRepository.cancelResolution(resolutionId).map { updated => - if (updated > 0) { - Ok(Json.obj( - "message" -> "Resolution cancelled", - "resolutionId" -> resolutionId - )) - } else { - NotFound(Json.obj("error" -> "Resolution not found")) - } - } - } - - // JSON serialization helper for WipResolutionRow - private def resolutionToJson(r: WipResolutionRow): JsObject = Json.obj( - "id" -> r.id, - "changeSetId" -> r.changeSetId, - "wipHaplogroupId" -> r.wipHaplogroupId, - "wipReparentId" -> r.wipReparentId, - "resolutionType" -> r.resolutionType, - "newParentId" -> r.newParentId, - "newParentPlaceholderId" -> r.newParentPlaceholderId, - "mergeTargetId" -> r.mergeTargetId, - "variantsToAdd" -> r.variantsToAdd, - "variantsToRemove" -> r.variantsToRemove, - "deferReason" -> r.deferReason, - "deferPriority" -> r.deferPriority, - "curatorId" -> r.curatorId, - "curatorNotes" -> r.curatorNotes, - "status" -> r.status, - "createdAt" -> r.createdAt.toString, - "appliedAt" -> r.appliedAt.map(_.toString) - ) - - // ============================================================================ - // Helpers - // ============================================================================ - - private def parseHaplogroupType(s: String): Option[HaplogroupType] = { - try { - Some(HaplogroupType.valueOf(s.toUpperCase)) - } catch { - case _: IllegalArgumentException => None - } - } - - private def parseChangeSetStatus(s: String): Option[ChangeSetStatus] = { - try { - Some(ChangeSetStatus.fromString(s)) - } catch { - case _: IllegalArgumentException => None - } - } - - private def parseChangeStatus(s: String): Option[ChangeStatus] = { - try { - Some(ChangeStatus.fromString(s)) - } catch { - case _: IllegalArgumentException => None - } - } -} diff --git a/app/controllers/VariantApiController.scala b/app/controllers/VariantApiController.scala deleted file mode 100644 index 5684bf05..00000000 --- a/app/controllers/VariantApiController.scala +++ /dev/null @@ -1,416 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Singleton} -import models.api.* -import models.domain.genomics.VariantV2 -import play.api.Logging -import play.api.libs.json.{JsObject, Json} -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import repositories.VariantV2Repository - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Private API controller for bulk variant operations. - * Secured with X-API-Key authentication. - * - * Updated for VariantV2 schema with JSONB coordinates and aliases. - */ -@Singleton -class VariantApiController @Inject()( - val controllerComponents: ControllerComponents, - secureApi: ApiSecurityAction, - variantRepository: VariantV2Repository -)(implicit ec: ExecutionContext) extends BaseController with Logging { - - /** - * Bulk add reference builds (coordinates) to existing variants. - * Matches variants by name or rsId, then adds coordinates for the specified reference genome. - */ - private val MaxBulkSize = 1000 - - def bulkAddBuilds(): Action[BulkAddVariantBuildsRequest] = - secureApi.jsonAction[BulkAddVariantBuildsRequest].async { request => - if (request.body.variants.size > MaxBulkSize) { - Future.successful(BadRequest(Json.obj("error" -> s"Bulk operations limited to $MaxBulkSize items per request"))) - } else { - val requests = request.body.variants - logger.info(s"Bulk add builds request for ${requests.size} variants") - - val resultFutures = requests.map(processAddBuildRequest) - - Future.sequence(resultFutures).map { results => - val succeeded = results.count(_.status == "success") - val failed = results.count(_.status != "success") - - logger.info(s"Bulk add builds completed: $succeeded succeeded, $failed failed") - - Ok(Json.toJson(BulkVariantOperationResponse( - total = results.size, - succeeded = succeeded, - failed = failed, - results = results - ))) - } - } - } - - /** - * Bulk update rsIds for variants matched by name. - * Adds rsId as an alias to the variant's aliases JSONB. - */ - def bulkUpdateRsIds(): Action[BulkUpdateRsIdsRequest] = - secureApi.jsonAction[BulkUpdateRsIdsRequest].async { request => - if (request.body.variants.size > MaxBulkSize) { - Future.successful(BadRequest(Json.obj("error" -> s"Bulk operations limited to $MaxBulkSize items per request"))) - } else { - val requests = request.body.variants - logger.info(s"Bulk update rsIds request for ${requests.size} variants") - - val resultFutures = requests.map(processUpdateRsIdRequest) - - Future.sequence(resultFutures).map { results => - val succeeded = results.count(_.status == "success") - val failed = results.count(_.status != "success") - - logger.info(s"Bulk update rsIds completed: $succeeded succeeded, $failed failed") - - Ok(Json.toJson(BulkVariantOperationResponse( - total = results.size, - succeeded = succeeded, - failed = failed, - results = results - ))) - } - } - } - - private def processAddBuildRequest(req: AddVariantBuildRequest): Future[VariantOperationResult] = { - val identifier = req.name.orElse(req.rsId) - - identifier match { - case None => - Future.successful(VariantOperationResult( - name = req.name, - rsId = req.rsId, - status = "error", - message = Some("Either name or rsId must be provided") - )) - - case Some(id) => - // Find variant by name or alias - val findFuture = req.name match { - case Some(name) => variantRepository.findByCanonicalName(name) - case None => variantRepository.findByAlias(req.rsId.get).map(_.headOption) - } - - findFuture.flatMap { - case None => - Future.successful(VariantOperationResult( - name = req.name, - rsId = req.rsId, - status = "error", - message = Some(s"Variant not found with identifier '$id'") - )) - - case Some(variant) => - // Check if this build already exists - val existingCoords = variant.coordinates.asOpt[Map[String, JsObject]].getOrElse(Map.empty) - if (existingCoords.contains(req.refGenome)) { - Future.successful(VariantOperationResult( - name = req.name, - rsId = req.rsId, - status = "skipped", - message = Some(s"Build ${req.refGenome} already exists"), - variantId = variant.variantId - )) - } else { - // Add the new coordinates - val newCoords = Json.obj( - "contig" -> req.contig, - "position" -> req.position, - "ref" -> req.refAllele, - "alt" -> req.altAllele - ) - - variantRepository.addCoordinates(variant.variantId.get, req.refGenome, newCoords).map { _ => - VariantOperationResult( - name = req.name, - rsId = req.rsId, - status = "success", - message = Some(s"Added coordinates for ${req.refGenome}"), - variantId = variant.variantId - ) - }.recover { case e: Exception => - logger.error(s"Failed to add coordinates: ${e.getMessage}", e) - VariantOperationResult( - name = req.name, - rsId = req.rsId, - status = "error", - message = Some("A database error occurred while processing this request") - ) - } - } - } - } - } - - private def processUpdateRsIdRequest(req: UpdateVariantRsIdRequest): Future[VariantOperationResult] = { - variantRepository.findByCanonicalName(req.name).flatMap { - case None => - // Try finding by alias - variantRepository.findByAlias(req.name).flatMap { variants => - if (variants.isEmpty) { - Future.successful(VariantOperationResult( - name = Some(req.name), - rsId = Some(req.rsId), - status = "error", - message = Some(s"No variant found with name '${req.name}'") - )) - } else { - updateVariantRsId(variants.head, req) - } - } - - case Some(variant) => - updateVariantRsId(variant, req) - } - } - - private def updateVariantRsId(variant: VariantV2, req: UpdateVariantRsIdRequest): Future[VariantOperationResult] = { - // Add rsId as an alias - variantRepository.addAlias(variant.variantId.get, "rs_id", req.rsId, Some("bulk_update")).map { _ => - VariantOperationResult( - name = Some(req.name), - rsId = Some(req.rsId), - status = "success", - message = Some("Added rsId as alias"), - variantId = variant.variantId - ) - }.recover { case e: Exception => - logger.error(s"Failed to update rsId: ${e.getMessage}", e) - VariantOperationResult( - name = Some(req.name), - rsId = Some(req.rsId), - status = "error", - message = Some("A database error occurred while processing this request") - ) - } - } - - // ============================================================================ - // Alias Source Management Endpoints - // ============================================================================ - - /** - * Bulk update alias sources by prefix pattern. - * Updates the source field in aliases JSONB for matching alias values. - */ - def bulkUpdateAliasSources(): Action[BulkUpdateAliasSourcesRequest] = - secureApi.jsonAction[BulkUpdateAliasSourcesRequest].async { request => - val updates = request.body.updates - logger.info(s"Bulk update alias sources request for ${updates.size} prefix patterns") - - val resultFutures = updates.map { req => - variantRepository.bulkUpdateAliasSource(req.aliasPrefix, req.newSource, req.oldSource).map { count => - AliasSourceUpdateResult( - aliasPrefix = req.aliasPrefix, - newSource = req.newSource, - aliasesUpdated = count, - status = "success", - message = Some(s"Updated $count aliases") - ) - }.recover { case e: Exception => - logger.error(s"Failed to update aliases for prefix '${req.aliasPrefix}': ${e.getMessage}", e) - AliasSourceUpdateResult( - aliasPrefix = req.aliasPrefix, - newSource = req.newSource, - aliasesUpdated = 0, - status = "error", - message = Some("A database error occurred while processing this request") - ) - } - } - - Future.sequence(resultFutures).map { results => - val totalUpdated = results.map(_.aliasesUpdated).sum - logger.info(s"Bulk update alias sources completed: $totalUpdated total aliases updated") - - Ok(Json.toJson(BulkAliasSourceUpdateResponse( - total = results.size, - totalAliasesUpdated = totalUpdated, - results = results - ))) - } - } - - /** - * Get statistics about alias sources in the database. - */ - def getAliasSourceStats(): Action[AnyContent] = secureApi.async { _ => - variantRepository.getAliasSourceStats().map { stats => - val totalAliases = stats.map(_._2).sum - Ok(Json.toJson(AliasSourceStatsResponse( - sources = stats.map { case (source, count) => AliasSourceSummary(source, count) }, - totalAliases = totalAliases - ))) - } - } - - /** - * Preview how many aliases would be affected by a source update. - */ - def previewAliasSourceUpdate(aliasPrefix: String, currentSource: String): Action[AnyContent] = secureApi.async { _ => - variantRepository.countAliasesByPrefixAndSource(aliasPrefix, Some(currentSource)).map { count => - Ok(Json.obj( - "aliasPrefix" -> aliasPrefix, - "currentSource" -> currentSource, - "matchingAliases" -> count - )) - } - } - - // ============================================================================ - // DU Naming Authority Endpoints - // ============================================================================ - - /** - * Assign a DU name to a single variant. - * The variant must exist and not already have a DU name. - */ - def assignDuName(variantId: Int): Action[AnyContent] = secureApi.async { _ => - variantRepository.findById(variantId).flatMap { - case None => - Future.successful(NotFound(Json.toJson(DuNameAssignmentResult( - variantId = variantId, - duName = None, - previousName = None, - status = "error", - message = Some(s"Variant $variantId not found") - )))) - - case Some(variant) => - // Check if already has a DU name - if (variant.canonicalName.exists(variantRepository.isDuName)) { - Future.successful(Ok(Json.toJson(DuNameAssignmentResult( - variantId = variantId, - duName = variant.canonicalName, - previousName = variant.canonicalName, - status = "skipped", - message = Some("Variant already has a DU name") - )))) - } else { - // Assign new DU name - assignDuNameToVariant(variant).map { result => - Ok(Json.toJson(result)) - } - } - } - } - - /** - * Bulk assign DU names to multiple variants. - * Skips variants that already have DU names. - */ - def bulkAssignDuNames(): Action[BulkAssignDuNamesRequest] = - secureApi.jsonAction[BulkAssignDuNamesRequest].async { request => - if (request.body.variantIds.size > MaxBulkSize) { - Future.successful(BadRequest(Json.obj("error" -> s"Bulk operations limited to $MaxBulkSize items per request"))) - } else { - val variantIds = request.body.variantIds - logger.info(s"Bulk assign DU names request for ${variantIds.size} variants") - - // Process sequentially to maintain name ordering - variantIds.foldLeft(Future.successful(Seq.empty[DuNameAssignmentResult])) { (accFuture, variantId) => - accFuture.flatMap { acc => - processAssignDuName(variantId).map(result => acc :+ result) - } - }.map { results => - val succeeded = results.count(_.status == "success") - val failed = results.count(_.status == "error") - val skipped = results.count(_.status == "skipped") - - logger.info(s"Bulk assign DU names completed: $succeeded succeeded, $failed failed, $skipped skipped") - - Ok(Json.toJson(BulkDuNameAssignmentResponse( - total = results.size, - succeeded = succeeded, - failed = failed, - skipped = skipped, - results = results - ))) - } - } - } - - /** - * Get the next DU name that would be assigned (preview without consuming). - */ - def previewNextDuName(): Action[AnyContent] = secureApi.async { _ => - variantRepository.nextDuName().map { nextName => - Ok(Json.obj( - "nextDuName" -> nextName, - "note" -> "This name has been reserved. Use assignDuName to apply it to a variant." - )) - } - } - - private def processAssignDuName(variantId: Int): Future[DuNameAssignmentResult] = { - variantRepository.findById(variantId).flatMap { - case None => - Future.successful(DuNameAssignmentResult( - variantId = variantId, - duName = None, - previousName = None, - status = "error", - message = Some(s"Variant $variantId not found") - )) - - case Some(variant) => - if (variant.canonicalName.exists(variantRepository.isDuName)) { - Future.successful(DuNameAssignmentResult( - variantId = variantId, - duName = variant.canonicalName, - previousName = variant.canonicalName, - status = "skipped", - message = Some("Variant already has a DU name") - )) - } else { - assignDuNameToVariant(variant) - } - } - } - - private def assignDuNameToVariant(variant: VariantV2): Future[DuNameAssignmentResult] = { - val previousName = variant.canonicalName - - for { - duName <- variantRepository.nextDuName() - updated = variant.copy( - canonicalName = Some(duName), - namingStatus = models.domain.genomics.NamingStatus.Named - ) - success <- variantRepository.update(updated) - } yield { - if (success) { - logger.info(s"Assigned DU name $duName to variant ${variant.variantId.get} (was: ${previousName.getOrElse("unnamed")})") - DuNameAssignmentResult( - variantId = variant.variantId.get, - duName = Some(duName), - previousName = previousName, - status = "success", - message = Some(s"Assigned $duName") - ) - } else { - DuNameAssignmentResult( - variantId = variant.variantId.get, - duName = None, - previousName = previousName, - status = "error", - message = Some("Failed to update variant") - ) - } - } - } -} diff --git a/app/controllers/VariantBrowserController.scala b/app/controllers/VariantBrowserController.scala deleted file mode 100644 index 16941367..00000000 --- a/app/controllers/VariantBrowserController.scala +++ /dev/null @@ -1,89 +0,0 @@ -package controllers - -import jakarta.inject.{Inject, Singleton} -import org.webjars.play.WebJarsUtil -import play.api.cache.AsyncCacheApi -import play.api.i18n.I18nSupport -import play.api.mvc.* -import services.VariantBrowserService - -import scala.concurrent.duration.* -import scala.concurrent.{ExecutionContext, Future} - -/** - * Public controller for browsing variants (read-only). - * Provides a searchable variant database for researchers. - * Results are cached to improve performance for the public view. - */ -@Singleton -class VariantBrowserController @Inject()( - val controllerComponents: ControllerComponents, - variantBrowserService: VariantBrowserService, - cache: AsyncCacheApi -)(using webJarsUtil: WebJarsUtil, ec: ExecutionContext) - extends BaseController with I18nSupport { - - private val DefaultPageSize = 25 - - // Cache durations - public view can be stale - private val SearchCacheDuration = 15.minutes - private val DetailCacheDuration = 1.hour - - /** - * Main variant browser page with search functionality. - */ - def index(query: Option[String], page: Int, pageSize: Int): Action[AnyContent] = Action { - implicit request: Request[AnyContent] => - Ok(views.html.variants.browser(query, page, pageSize)) - } - - /** - * HTMX fragment for variant list updates (search/pagination). - */ - def listFragment(query: Option[String], page: Int, pageSize: Int): Action[AnyContent] = Action.async { - implicit request: Request[AnyContent] => - val safePage = Math.max(1, page) - val safePageSize = Math.max(1, Math.min(pageSize, 100)) - val offset = (safePage - 1) * safePageSize - for { - (variants, totalCount) <- getCachedSearchResults(query.getOrElse(""), offset, safePageSize) - } yield { - val totalPages = Math.max(1, (totalCount + safePageSize - 1) / safePageSize) - Ok(views.html.variants.listFragment(variants, query, safePage, totalPages, safePageSize, totalCount)) - } - } - - /** - * HTMX fragment for variant detail panel (read-only). - */ - def detailPanel(id: Int): Action[AnyContent] = Action.async { implicit request: Request[AnyContent] => - getCachedDetailPanel(id) - } - - // === Caching helpers === - - /** - * Get cached search results or fetch from service. - */ - private def getCachedSearchResults(query: String, offset: Int, limit: Int): Future[(Seq[models.domain.genomics.VariantV2], Int)] = { - val cacheKey = s"variant-browser:${query.toLowerCase.trim}:$offset:$limit" - cache.getOrElseUpdate(cacheKey, SearchCacheDuration) { - variantBrowserService.searchPaginated(query, offset, limit) - } - } - - /** - * Get cached detail panel or fetch from service. - */ - private def getCachedDetailPanel(id: Int)(implicit request: Request[AnyContent]): Future[Result] = { - val cacheKey = s"variant-browser-detail:$id" - cache.getOrElseUpdate(cacheKey, DetailCacheDuration) { - variantBrowserService.getVariantWithHaplogroups(id).map { - case Some((variant, haplogroups)) => - Ok(views.html.variants.detailPanel(variant, haplogroups)) - case None => - NotFound("Variant not found") - } - } - } -} diff --git a/app/controllers/VariantPublicApiController.scala b/app/controllers/VariantPublicApiController.scala deleted file mode 100644 index e048359a..00000000 --- a/app/controllers/VariantPublicApiController.scala +++ /dev/null @@ -1,138 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import jakarta.inject.{Inject, Named, Singleton} -import org.apache.pekko.actor.ActorRef -import org.apache.pekko.pattern.ask -import org.apache.pekko.util.Timeout -import play.api.Logging -import play.api.libs.json.Json -import play.api.mvc.{Action, AnyContent, BaseController, ControllerComponents} -import services.{ExportResult, VariantExportService, VariantPublicApiService} - -import java.nio.file.Files -import scala.concurrent.ExecutionContext -import scala.concurrent.duration.* - -/** - * Public API controller for variant data. - * Provides read-only access to variant information with forward-compatible response format. - */ -@Singleton -class VariantPublicApiController @Inject()( - val controllerComponents: ControllerComponents, - variantPublicApiService: VariantPublicApiService, - variantExportService: VariantExportService, - secureApi: ApiSecurityAction, - @Named("variant-export-actor") variantExportActor: ActorRef - )(implicit ec: ExecutionContext) extends BaseController with Logging { - - /** - * Search variants with pagination. - * GET /api/v1/variants?query=M269&page=1&pageSize=25 - */ - def searchVariants(query: Option[String], page: Int, pageSize: Int): Action[AnyContent] = Action.async { - val validPage = Math.max(1, page) - val validPageSize = Math.min(100, Math.max(1, pageSize)) - - variantPublicApiService.searchVariants(query, validPage, validPageSize).map { response => - Ok(Json.toJson(response)) - }.recover { - case e: Exception => - logger.error("Unexpected error in variant API", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Get a single variant by ID. - * GET /api/v1/variants/:id - */ - def getVariantById(variantId: Int): Action[AnyContent] = Action.async { - variantPublicApiService.getVariantById(variantId).map { - case Some(variant) => Ok(Json.toJson(variant)) - case None => NotFound(Json.obj("error" -> s"Variant not found: $variantId")) - }.recover { - case e: Exception => - logger.error("Unexpected error in variant API", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Get all variants defining a specific haplogroup. - * GET /api/v1/haplogroups/:name/variants - */ - def getVariantsByHaplogroup(haplogroupName: String): Action[AnyContent] = Action.async { - variantPublicApiService.getVariantsByHaplogroup(haplogroupName).map { variants => - Ok(Json.toJson(variants)) - }.recover { - case e: Exception => - logger.error("Unexpected error in variant API", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } - - /** - * Download the full variant export file (gzipped JSONL). - * GET /api/v1/variants/export - * - * Returns 404 if no export file exists yet. - * Returns the pre-generated file with metadata headers. - */ - def downloadExport(): Action[AnyContent] = Action { request => - val exportPath = variantExportService.getExportFilePath - - if (Files.exists(exportPath)) { - val metadata = variantExportService.getExportMetadata - - Ok.sendFile( - content = exportPath.toFile, - fileName = _ => Some("variants-full.jsonl.gz") - ).withHeaders( - "Content-Type" -> "application/gzip", - "X-Variant-Count" -> metadata.map(_.variantCount.toString).getOrElse("unknown"), - "X-Generated-At" -> metadata.map(_.generatedAt.toString).getOrElse("unknown") - ) - } else { - NotFound(Json.obj( - "error" -> "Export file not yet generated", - "message" -> "The variant export is generated daily at 4 AM UTC. Please try again later or trigger a manual generation." - )) - } - } - - /** - * Get metadata about the current export file. - * GET /api/v1/variants/export/metadata - */ - def exportMetadata(): Action[AnyContent] = Action { - variantExportService.getExportMetadata match { - case Some(metadata) => - Ok(Json.toJson(metadata)) - case None => - NotFound(Json.obj("error" -> "No export metadata available")) - } - } - - /** - * Trigger manual export generation (admin only, requires X-API-Key). - * POST /api/private/variants/export/generate - */ - def triggerExport(): Action[AnyContent] = secureApi.async { _ => - import actors.VariantExportActor.RunExport - implicit val timeout: Timeout = Timeout(30.minutes) - - (variantExportActor ? RunExport).mapTo[ExportResult].map { result => - if (result.success) { - Ok(Json.toJson(result)) - } else { - InternalServerError(Json.toJson(result)) - } - }.recover { - case e: Exception => - logger.error("Unexpected error in variant API", e) - InternalServerError(Json.obj("error" -> "An internal error occurred.")) - } - } -} diff --git a/app/filters/ApiKeyFilter.scala b/app/filters/ApiKeyFilter.scala deleted file mode 100644 index 35403b81..00000000 --- a/app/filters/ApiKeyFilter.scala +++ /dev/null @@ -1,48 +0,0 @@ -package filters - -import play.api.mvc.* -import services.CachedSecretsManagerService - -import java.security.MessageDigest -import javax.inject.Inject -import scala.concurrent.{ExecutionContext, Future} - -/** - * A filter that validates API key headers in incoming requests. - * - * This class checks for the presence of an API key in the request headers and validates - * it against a stored API key retrieved from a cached secrets manager service. If a valid - * API key is not provided, the request is rejected with a `401 Unauthorized` response. - * - * @constructor Creates a new instance of the filter with the given secrets manager service. - * @param secretsManager The service used to retrieve and cache the API key. - * @param ec The implicit ExecutionContext for handling asynchronous operations. - */ -class ApiKeyFilter @Inject()( - secretsManager: CachedSecretsManagerService - )(implicit ec: ExecutionContext) { - - private val ApiKeyHeader = "X-API-Key" - - /** - * Filters an incoming HTTP request based on the presence and validity of an API key in the request headers. - * - * @param requestHeader the HTTP request headers provided for the incoming request - * @return a Future containing an Option of Result, where: - * - None indicates that the request is authorized and can proceed - * - Some(Result) indicates that the request is unauthorized and includes the appropriate response - */ - def filter(requestHeader: RequestHeader): Future[Option[Result]] = Future { - requestHeader.headers.get(ApiKeyHeader) match { - case None => - Some(Results.Unauthorized("API key missing")) - case Some(providedKey) => - secretsManager.getCachedApiKey match { - case Some(storedKey) if MessageDigest.isEqual(providedKey.getBytes("UTF-8"), storedKey.getBytes("UTF-8")) => - None // Allow the request to proceed - case _ => - Some(Results.Unauthorized("Invalid API key")) - } - } - } -} \ No newline at end of file diff --git a/app/models/HaplogroupType.scala b/app/models/HaplogroupType.scala deleted file mode 100644 index e35d415d..00000000 --- a/app/models/HaplogroupType.scala +++ /dev/null @@ -1,64 +0,0 @@ -package models - -import play.api.libs.json.{Format, Reads, Writes} -import play.api.mvc.QueryStringBindable - -/** - * Represents a type of haplogroup classification, distinguishing between paternal (Y) and maternal (MT) lineages. - * - * Enumeration values: - * - `Y`: Represents the Y-DNA haplogroup type, associated with the paternal lineage. - * - `MT`: Represents the mtDNA (mitochondrial DNA) haplogroup type, associated with the maternal lineage. - * - * This enumeration provides a structured way to classify genetic lineage data based on the type of haplogroup. - */ -enum HaplogroupType { - case Y, MT - - override def toString: String = this match { - case Y => "Y" - case MT => "MT" - } -} - -/** - * Provides methods for working with the HaplogroupType enumeration, which represents types of haplogroup classifications - * (e.g., Y-DNA for paternal lineage and mtDNA for maternal lineage). - * - * This companion object includes utility methods for handling HaplogroupType values. - */ -object HaplogroupType { - def fromString(str: String): Option[HaplogroupType] = str.toUpperCase match { - case "Y" => Some(Y) - case "MT" => Some(MT) - case _ => None - } - - // JSON serialization - implicit val reads: Reads[HaplogroupType] = Reads.StringReads.map { str => - fromString(str).getOrElse(throw new IllegalArgumentException(s"Invalid HaplogroupType: $str")) - } - - implicit val writes: Writes[HaplogroupType] = Writes.StringWrites.contramap(_.toString) - - implicit val format: Format[HaplogroupType] = Format(reads, writes) - - implicit val queryStringBindable: QueryStringBindable[HaplogroupType] = - new QueryStringBindable[HaplogroupType] { - def bind(key: String, params: Map[String, Seq[String]]): Option[Either[String, HaplogroupType]] = { - params.get(key).flatMap(_.headOption).map { value => - try { - Right(HaplogroupType.valueOf(value)) - } catch { - case _: IllegalArgumentException => - Left(s"Invalid HaplogroupType value: $value") - } - } - } - - def unbind(key: String, value: HaplogroupType): String = { - s"$key=${value.toString}" - } - } - -} \ No newline at end of file diff --git a/app/models/PDSRegistration.scala b/app/models/PDSRegistration.scala deleted file mode 100644 index 890d40a3..00000000 --- a/app/models/PDSRegistration.scala +++ /dev/null @@ -1,23 +0,0 @@ -package models - -import play.api.libs.json.{Format, Json} - -import java.time.ZonedDateTime // Import Play-JSON classes - -case class PDSRegistration( - did: String, - pdsUrl: String, - handle: String, - lastCommitCid: Option[String], - lastCommitSeq: Option[Long], - cursor: Long = 0L, - createdAt: ZonedDateTime, - updatedAt: ZonedDateTime, - leasedByInstanceId: Option[String] = None, - leaseExpiresAt: Option[ZonedDateTime] = None, - processingStatus: String = "idle" - ) - -object PDSRegistration { - implicit val format: Format[PDSRegistration] = Json.format[PDSRegistration] -} diff --git a/app/models/api/BiosampleOperationResponse.scala b/app/models/api/BiosampleOperationResponse.scala deleted file mode 100644 index f67156cd..00000000 --- a/app/models/api/BiosampleOperationResponse.scala +++ /dev/null @@ -1,11 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -import java.util.UUID - -case class BiosampleOperationResponse(status: String, guid: UUID) - -object BiosampleOperationResponse { - implicit val format: OFormat[BiosampleOperationResponse] = Json.format[BiosampleOperationResponse] -} diff --git a/app/models/api/BiosampleOriginalHaplogroupUpdate.scala b/app/models/api/BiosampleOriginalHaplogroupUpdate.scala deleted file mode 100644 index a03744b8..00000000 --- a/app/models/api/BiosampleOriginalHaplogroupUpdate.scala +++ /dev/null @@ -1,35 +0,0 @@ -package models.api - -import models.domain.genomics.{HaplogroupResult, OriginalHaplogroupEntry} -import play.api.libs.json.{Format, Json} - -case class BiosampleOriginalHaplogroupUpdate( - originalYHaplogroup: Option[HaplogroupResult], - originalMtHaplogroup: Option[HaplogroupResult], - notes: Option[String] - ) - -object BiosampleOriginalHaplogroupUpdate { - implicit val format: Format[BiosampleOriginalHaplogroupUpdate] = Json.format -} - -case class BiosampleOriginalHaplogroupView( - biosampleId: Int, - publicationId: Int, - originalYHaplogroup: Option[HaplogroupResult], - originalMtHaplogroup: Option[HaplogroupResult], - notes: Option[String] - ) - -object BiosampleOriginalHaplogroupView { - implicit val format: Format[BiosampleOriginalHaplogroupView] = Json.format - - def fromEntry(biosampleId: Int, entry: OriginalHaplogroupEntry): BiosampleOriginalHaplogroupView = - BiosampleOriginalHaplogroupView( - biosampleId = biosampleId, - publicationId = entry.publicationId, - originalYHaplogroup = entry.yHaplogroupResult, - originalMtHaplogroup = entry.mtHaplogroupResult, - notes = entry.notes - ) -} diff --git a/app/models/api/BiosamplePublicationLinkRequest.scala b/app/models/api/BiosamplePublicationLinkRequest.scala deleted file mode 100644 index b8cc0d13..00000000 --- a/app/models/api/BiosamplePublicationLinkRequest.scala +++ /dev/null @@ -1,18 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -/** - * Represents a request to link a biosample to a publication using their respective identifiers. - * - * @param sampleAccession The accession ID of the biosample to be linked. - * @param publicationDoi The DOI (Digital Object Identifier) of the publication to be linked. - */ -case class BiosamplePublicationLinkRequest( - sampleAccession: String, - publicationDoi: String - ) - -object BiosamplePublicationLinkRequest { - implicit val format: OFormat[BiosamplePublicationLinkRequest] = Json.format -} \ No newline at end of file diff --git a/app/models/api/BiosampleUpdate.scala b/app/models/api/BiosampleUpdate.scala deleted file mode 100644 index 6edabc21..00000000 --- a/app/models/api/BiosampleUpdate.scala +++ /dev/null @@ -1,37 +0,0 @@ -package models.api - -import models.domain.genomics.{BiologicalSex, HaplogroupResult} -import play.api.libs.json.{Json, Reads} - -/** - * Represents an update to a biobanking biosample, allowing specific properties to be modified. - * - * @param sex An optional sex of the biosample source (e.g., "male", "female"). - * @param geoCoord An optional geographical coordinate representing the location associated with the biosample. - * @param alias An optional alias or alternative identifier for the biosample. - * @param locked An optional flag indicating whether the biosample metadata is locked for updates. - * @param dateRangeStart An optional start date for the date range associated with the biosample. Typically represented as a year (e.g., 1980). - * @param dateRangeEnd An optional end date for the date range associated with the biosample. Typically represented as a year (e.g., 1990). - * @param yHaplogroup An optional Y-chromosomal haplogroup assignment for the source of the biosample. - * @param mtHaplogroup An optional mitochondrial haplogroup assignment for the source of the biosample. - */ -case class BiosampleUpdate( - sex: Option[BiologicalSex] = None, - geoCoord: Option[GeoCoord] = None, - alias: Option[String] = None, - locked: Option[Boolean] = None, - dateRangeStart: Option[Int] = None, - dateRangeEnd: Option[Int] = None, - yHaplogroup: Option[HaplogroupResult] = None, - mtHaplogroup: Option[HaplogroupResult] = None - ) { - def hasUpdates: Boolean = { - sex.isDefined || geoCoord.isDefined || alias.isDefined || locked.isDefined || - dateRangeStart.isDefined || dateRangeEnd.isDefined || yHaplogroup.isDefined || - mtHaplogroup.isDefined - } -} - -object BiosampleUpdate { - implicit val reads: Reads[BiosampleUpdate] = Json.reads[BiosampleUpdate] -} \ No newline at end of file diff --git a/app/models/api/BiosampleView.scala b/app/models/api/BiosampleView.scala deleted file mode 100644 index 1bbf038d..00000000 --- a/app/models/api/BiosampleView.scala +++ /dev/null @@ -1,66 +0,0 @@ -package models.api - -import models.domain.genomics.{Biosample, SpecimenDonor} -import play.api.libs.json.{Json, OFormat} -import utils.GeometryUtils - -import java.util.UUID - -/** - * Represents a view of a biosample with relevant metadata and attributes. - * - * @param id Optional identifier for the biosample, typically used for internal purposes. - * @param sampleAccession Unique accession identifier for the biosample, often used in external systems. - * @param description Description or details about the biosample. - * @param alias Optional alternative name or alias associated with the biosample. - * @param centerName Name of the center or organization responsible for the biosample. - * @param sex Optional biological sex associated with the biosample, if applicable. - * @param geoCoord Optional geographical location as a set of latitude and longitude coordinates. - * @param specimenDonorId Optional identifier for the donor of the specimen associated with the biosample. - * @param sampleGuid Globally unique identifier (GUID) for the biosample. - * @param locked Boolean flag indicating whether the biosample is locked for further modifications. - * @param dateRangeStart Optional start of the date range associated with the biosample. - * @param dateRangeEnd Optional end of the date range associated with the biosample. - */ -case class BiosampleView( - id: Option[Int], - sampleAccession: String, - description: String, - alias: Option[String], - centerName: String, - sex: Option[String], - geoCoord: Option[GeoCoord], - specimenDonorId: Option[Int], - sampleGuid: UUID, - locked: Boolean, - dateRangeStart: Option[Int], - dateRangeEnd: Option[Int] - ) - -/** - * Companion object for the BiosampleView case class. - * - * Provides functionality for serializing and deserializing BiosampleView instances, - * as well as a method to convert a domain object of type Biosample into a BiosampleView. - */ -object BiosampleView { - implicit val format: OFormat[BiosampleView] = Json.format[BiosampleView] - - def fromDomain(biosample: Biosample, specimenDonor: Option[SpecimenDonor] = None): BiosampleView = { - BiosampleView( - id = biosample.id, - sampleAccession = biosample.sampleAccession, - description = biosample.description, - alias = biosample.alias, - centerName = biosample.centerName, - sex = specimenDonor.flatMap(_.sex.map(_.toString)), - geoCoord = specimenDonor.flatMap(_.geocoord).map(point => GeoCoord(point.getY, point.getX)), - specimenDonorId = biosample.specimenDonorId, - sampleGuid = biosample.sampleGuid, - locked = biosample.locked, - dateRangeStart = specimenDonor.flatMap(_.dateRangeStart), - dateRangeEnd = specimenDonor.flatMap(_.dateRangeEnd) - ) - } -} - diff --git a/app/models/api/BiosampleWithOrigin.scala b/app/models/api/BiosampleWithOrigin.scala deleted file mode 100644 index 23fa5a78..00000000 --- a/app/models/api/BiosampleWithOrigin.scala +++ /dev/null @@ -1,140 +0,0 @@ -package models.api - -import models.domain.genomics.BiosampleType -import play.api.libs.json.{Json, OFormat} - -/** - * Represents information about a specific population. - * - * @constructor Creates a new instance of PopulationInfo. - * @param populationName Name of the population. - * @param probability Probability associated with the population. - * @param methodName Name of the method or approach used to determine the population information. - */ -case class PopulationInfo(populationName: String, probability: BigDecimal, methodName: String) - -/** - * Represents a biosample with detailed origin and associated metadata. - * - * @param sampleName An optional name of the sample. - * @param accession ENA (European Nucleotide Archive) accession identifier for the sample. - * @param sex An optional gender or sex information for the sample. - * @param yDnaHaplogroup An optional Y-DNA haplogroup associated with the sample. - * @param mtDnaHaplogroup An optional mitochondrial DNA haplogroup associated with the sample. - * @param reads An optional number of reads generated for the sample. - * @param readLen An optional length of each read. - * @param geoCoord An optional geographical coordinate specifying the origin of the sample. - * @param bestFitPopulation An optional population information associated with the sample. - */ -case class BiosampleWithOrigin( - sampleName: Option[String], - accession: String, - sampleType: BiosampleType, - sex: Option[String], - yDnaHaplogroup: Option[String], - mtDnaHaplogroup: Option[String], - reads: Option[Int], - readLen: Option[Int], - geoCoord: Option[GeoCoord], - bestFitPopulation: Option[PopulationInfo], - dateRangeStart: Option[Int] = None, - dateRangeEnd: Option[Int] = None - ) { - - /** - * Formats the geographic coordinate of the origin into a human-readable string. - * If the geographic coordinate is available, it will return the latitude and longitude with appropriate directional indicators (N/S and E/W). - * If the geographic coordinate is not available, it will return "Origin Not Available". - * - * @return A formatted string representing the origin's geographic coordinate or a fallback message if unavailable. - */ - def formattedOrigin: String = geoCoord match { - case Some(lat, lon) => - val latDir = if (lat >= 0) "N" else "S" - val lonDir = if (lon >= 0) "E" else "W" - f"${math.abs(lat)}%.2f°$latDir, ${math.abs(lon)}%.2f°$lonDir" - case None => - "Origin Not Available" - } - - import scala.math.BigDecimal - - /** - * Estimates the coverage depth of the genome based on the number of reads and the read length. - * If either the number of reads or the read length is unavailable, returns None. - * - * @return An optional `Long` value representing the estimated coverage depth. If either input is missing, returns None. - */ - def estimateCoverageDepth: Option[Long] = (reads, readLen) match { - case (Some(reads), Some(readLen)) => - val totalBases = BigDecimal(reads) * BigDecimal(readLen) - val genomeSize = BigDecimal(3_099_441_038L) - Some((totalBases / genomeSize).toLong) - case _ => None - } - - /** - * Formats the date range in a human-readable format based on sample type. - * For Ancient samples: Represents archaeological date (negative for BCE, positive for CE) - * For other types: Represents birth/death years in CE - * - * @return A formatted string representing the date range or "N/A" if not available - */ - def formattedDateRange: String = { - def formatYear(year: Int, isAncient: Boolean): String = { - if (isAncient) { - if (year < 0) s"${-year} BCE" - else if (year == 0) "1 BCE" // There is no year 0 - else s"$year CE" - } else { - s"$year" // Modern dates are always CE - } - } - - def formatRange(start: Option[Int], end: Option[Int], isAncient: Boolean): String = { - (start, end) match { - case (Some(s), Some(e)) if s == e => formatYear(s, isAncient) - case (Some(s), Some(e)) => s"${formatYear(s, isAncient)} - ${formatYear(e, isAncient)}" - case (Some(s), None) => s"${formatYear(s, isAncient)}+" - case (None, Some(e)) => s"Until ${formatYear(e, isAncient)}" - case _ => "Date unknown" - } - } - - (dateRangeStart, dateRangeEnd) match { - case (None, None) => "N/A" - case _ => sampleType match { - case BiosampleType.Ancient => formatRange(dateRangeStart, dateRangeEnd, isAncient = true) - case _ => formatRange(dateRangeStart, dateRangeEnd, isAncient = false) - } - } - } - - def sampleTypeDescription: String = sampleType match { - case BiosampleType.Ancient => "Ancient" - case _ => "Modern" - } - -} - -/** - * Companion object for the PopulationInfo case class. - * - * Provides an implicit JSON formatter for serializing and deserializing - * PopulationInfo instances using the Play Framework's JSON library. - * - * This formatter can be utilized for converting PopulationInfo objects to JSON - * representation and vice versa in a type-safe and automated manner. - */ -object PopulationInfo { - implicit val populationInfoFormat: OFormat[PopulationInfo] = Json.format[PopulationInfo] -} - -/** - * Companion object for the `BiosampleWithOrigin` class. - * Provides an implicit JSON formatter for instances of `BiosampleWithOrigin`. - */ -object BiosampleWithOrigin { - implicit val biosampleWithOriginFormat: OFormat[BiosampleWithOrigin] = Json.format[BiosampleWithOrigin] -} - diff --git a/app/models/api/ExternalBiosampleRequest.scala b/app/models/api/ExternalBiosampleRequest.scala deleted file mode 100644 index 0d9ea52b..00000000 --- a/app/models/api/ExternalBiosampleRequest.scala +++ /dev/null @@ -1,207 +0,0 @@ -package models.api - -import models.domain.genomics.{BiologicalSex, BiosampleType, HaplogroupResult} -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDateTime - -/** - * Represents a request for an external biosample, containing the metadata and associated information - * related to the sample and its sequencing data. - * - * @param sampleAccession Native identifier provided by the client for the biosample. - * @param sourceSystem Origin system or data source associated with the biosample (e.g., "evolbio", "pgp"). - * @param description A textual description of the biosample. - * @param alias Optional alias for the biosample, provided by the client. - * @param centerName Name of the institution or center handling the biosample. - * @param sex Optional biological sex information for the biosample. - * @param latitude Optional geographical latitude information related to the biosample. - * @param longitude Optional geographical longitude information related to the biosample. - * @param citizenDid Optional decentralized identifier (DID) for linking to a citizen/PDS user. - * @param donorType Optional type of the donor (e.g., Citizen, PGP, Standard). - * @param publication Optional publication information related to the biosample, represented by the `PublicationInfo` structure. - * @param sequenceData Information regarding the sequencing data associated with the biosample, represented by the `SequenceDataInfo` structure. - */ -case class ExternalBiosampleRequest( - sampleAccession: String, // Client provides their native identifier - sourceSystem: String, // e.g., "evolbio", "pgp", etc. - description: String, - alias: Option[String], - centerName: String, - sex: Option[BiologicalSex], - latitude: Option[Double], - longitude: Option[Double], - citizenDid: Option[String], - atUri: Option[String], - donorIdentifier: Option[String], - donorType: Option[BiosampleType], - publication: Option[PublicationInfo], - haplogroups: Option[HaplogroupAssignments], - sequenceData: SequenceDataInfo, - atCid: Option[String] = None - ) - -object ExternalBiosampleRequest { - implicit val externalBiosampleRequest: OFormat[ExternalBiosampleRequest] = Json.format -} - -/** - * Represents publication-related information, including details such as DOI, PubMed ID, - * and original haplogroup data. - * - * @constructor Creates an instance of `PublicationInfo` to encapsulate key publication - * identifiers and data related to haplogroups. - * @param doi An optional DOI (Digital Object Identifier) for the publication. - * @param pubmedId An optional PubMed ID associated with the publication. - * @param originalHaplogroups Optionally represents original haplogroup information, - * encapsulated in a `HaplogroupInfo` instance. - */ -case class PublicationInfo( - doi: Option[String], - pubmedId: Option[String], - originalHaplogroups: Option[HaplogroupInfo] - ) - -object PublicationInfo { - implicit val publicationInfo: OFormat[PublicationInfo] = Json.format -} - -/** - * Represents information about Y-DNA and mitochondrial DNA (mtDNA) haplogroups, - * along with optional notes for additional context. - * - * @param yHaplogroup An optional string representing the Y-DNA haplogroup. - * This is typically associated with paternal lineage. - * @param mtHaplogroup An optional string representing the mitochondrial DNA (mtDNA) haplogroup. - * This is typically associated with maternal lineage. - * @param notes An optional string for any additional notes or descriptive information - * about the haplogroup or its context. - */ -case class HaplogroupInfo( - yHaplogroup: Option[HaplogroupResult], - mtHaplogroup: Option[HaplogroupResult], - notes: Option[String] - ) - -object HaplogroupInfo { - implicit val haplogroupInfo: OFormat[HaplogroupInfo] = Json.format -} - -/** - * Represents metadata and related information about a sequence dataset. - * - * This case class encapsulates information about sequencing data, including - * details such as the number of reads, read length, coverage, sequencing platform, - * test type, and associated files. - * - * @param reads An optional number of reads in the sequencing data. - * @param readLength An optional read length, indicating the length of individual reads. - * @param coverage An optional coverage value representing the depth of sequencing. - * @param platformName The name of the sequencing platform used to generate the data. - * @param testType The type of sequencing test performed. - * @param files A sequence of file metadata, represented by `FileInfo`, containing information - * about the files associated with the sequencing data. - */ -case class SequenceDataInfo( - reads: Option[Int], - readLength: Option[Int], - coverage: Option[Double], - platformName: String, - testType: String, - files: Seq[FileInfo] - ) - - -object SequenceDataInfo { - implicit val sequenceDataInfo: OFormat[SequenceDataInfo] = Json.format -} - -/** - * Represents the information of a library, typically used in a laboratory or sequencing context. - * - * @param lab The name of the laboratory or site where sequencing or processing occurred. - * @param testType The type of test or sequencing performed. - * @param runDate The timestamp for when the sequencing or test run took place. - * @param instrument The identifier or name of the instrument used in the sequencing process. - * @param reads The total number of reads generated during the sequencing. - * @param readLength The length of each read in base pairs. - * @param pairedEnd Indicates whether the sequencing was performed using paired-end reads. - * @param insertSize Optional parameter specifying the insert size for paired-end reads, if applicable. - */ -case class LibraryInfo( - lab: String, - testType: String, - runDate: LocalDateTime, - instrument: String, - reads: Long, - readLength: Int, - pairedEnd: Boolean, - insertSize: Option[Int] - ) - -object LibraryInfo { - implicit val libraryInfo: OFormat[LibraryInfo] = Json.format -} - -/** - * Represents location information including a file URL and an optional file index URL. - * - * This class is useful for storing and managing metadata related to file locations, - * such as a primary file's URL and its associated index file's URL, if available. The - * `fileIndexUrl` is optional to accommodate cases where an index file is not provided. - * - * @param fileUrl The URL pointing to the primary file location. - * @param fileIndexUrl An optional URL pointing to the index file associated with the primary file. - */ -case class LocationInfo( - fileUrl: String, - fileIndexUrl: Option[String] - ) - -object LocationInfo { - implicit val libraryInfo: OFormat[LocationInfo] = Json.format -} - -/** - * Represents checksum information including the checksum value and the algorithm used. - * - * @param checksum The checksum value as a string. - * @param algorithm The algorithm used to generate the checksum. - */ -case class ChecksumInfo( - checksum: String, - algorithm: String - ) - -object ChecksumInfo { - implicit val checksumInfo: OFormat[ChecksumInfo] = Json.format -} - -/** - * Represents metadata for a file, including its name, size, format, aligner used, target reference, - * associated checksums, and its location. - * - * @param fileName The name of the file. - * @param fileSizeBytes The size of the file in bytes. - * @param fileFormat The format of the file, indicating the file type or extension. - * @param aligner The aligner used for processing or generating the file. - * @param targetReference The reference target associated with the file. - * @param checksums A sequence of checksum information objects associated with the file. - * @param location Information about the file's location, including its URL and optional index URL. - */ -case class FileInfo( - fileName: String, - fileSizeBytes: Long, - fileFormat: String, - aligner: String, - targetReference: String, - checksums: Seq[ChecksumInfo], - location: LocationInfo - ) - -object FileInfo { - implicit val fileInfo: OFormat[FileInfo] = Json.format -} - - - diff --git a/app/models/api/GeoCoord.scala b/app/models/api/GeoCoord.scala deleted file mode 100644 index 7dbb65d6..00000000 --- a/app/models/api/GeoCoord.scala +++ /dev/null @@ -1,25 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -/** - * Represents a geographical coordinate with latitude and longitude values. - * - * @param lat Latitude value of the geographical coordinate. Positive values represent the northern hemisphere, and negative values represent the southern hemisphere. - * @param lon Longitude value of the geographical coordinate. Positive values represent the eastern hemisphere, and negative values represent the western hemisphere. - */ -case class GeoCoord(lat: Double, lon: Double) - -/** - * Companion object for the `GeoCoord` case class. - * - * Provides an implicit JSON formatter for serializing and deserializing - * `GeoCoord` instances using the Play Framework's JSON library. - * - * This formatter can be used to automatically convert `GeoCoord` objects - * to and from their JSON representation, enabling seamless integration - * with APIs or storage systems that utilize JSON. - */ -object GeoCoord { - implicit val geoCoordFormat: OFormat[GeoCoord] = Json.format[GeoCoord] -} \ No newline at end of file diff --git a/app/models/api/HaplogroupAssignments.scala b/app/models/api/HaplogroupAssignments.scala deleted file mode 100644 index d1617d25..00000000 --- a/app/models/api/HaplogroupAssignments.scala +++ /dev/null @@ -1,13 +0,0 @@ -package models.api - -import models.domain.genomics.HaplogroupResult -import play.api.libs.json.{Json, OFormat} - -case class HaplogroupAssignments( - yDna: Option[HaplogroupResult], - mtDna: Option[HaplogroupResult] - ) - -object HaplogroupAssignments { - implicit val format: OFormat[HaplogroupAssignments] = Json.format[HaplogroupAssignments] -} diff --git a/app/models/api/PaginatedResult.scala b/app/models/api/PaginatedResult.scala deleted file mode 100644 index 5e99781f..00000000 --- a/app/models/api/PaginatedResult.scala +++ /dev/null @@ -1,39 +0,0 @@ -package models.api - -/** - * Represents a paginated result set for a collection of items. - * - * @tparam T The type of items contained in the result set. - * @param items The items for the current page. - * @param currentPage The current page number (1-based index). - * @param pageSize The maximum number of items per page. - * @param totalItems The total number of items across all pages. - */ -case class PaginatedResult[T]( - items: Seq[T], - currentPage: Int, - pageSize: Int, - totalItems: Long - ) { - /** - * Computes the total number of pages based on the total number of items and the page size. - * - * @return The total number of pages as an integer, calculated by dividing the total number of items - * by the page size and rounding up to the nearest whole number. - */ - def totalPages: Int = Math.ceil(totalItems.toDouble / pageSize).toInt - - /** - * Determines whether there is a next page available in the paginated result set. - * - * @return True if the current page number is less than the total number of pages, indicating that there is a next page; otherwise, false. - */ - def hasNextPage: Boolean = currentPage < totalPages - - /** - * Determines whether there is a previous page available in the paginated result set. - * - * @return True if the current page number is greater than 1, indicating that there is a previous page; otherwise, false. - */ - def hasPreviousPage: Boolean = currentPage > 1 -} diff --git a/app/models/api/PgpBiosampleRequest.scala b/app/models/api/PgpBiosampleRequest.scala deleted file mode 100644 index 741adab8..00000000 --- a/app/models/api/PgpBiosampleRequest.scala +++ /dev/null @@ -1,28 +0,0 @@ -package models.api - -import models.domain.genomics.BiologicalSex -import play.api.libs.json.{Json, OFormat} - -/** - * Request model for creating a PGP biosample. - * - * @param participantId The unique identifier of the PGP participant - * @param description Detailed description of the sample, including sequencing center and other metadata - * @param centerName The name of the PGP center (e.g., "PGP Harvard", "PGP UK") that manages the participant - * @param sex Optional biological sex of the participant - * @param latitude Optional latitude coordinate of the sample's origin - * @param longitude Optional longitude coordinate of the sample's origin - */ -case class PgpBiosampleRequest( - participantId: String, - description: String, - centerName: String, - sex: Option[BiologicalSex] = None, - latitude: Option[Double] = None, - longitude: Option[Double] = None - ) - - -object PgpBiosampleRequest { - implicit val format: OFormat[PgpBiosampleRequest] = Json.format -} diff --git a/app/models/api/ProjectRequest.scala b/app/models/api/ProjectRequest.scala deleted file mode 100644 index c6a7bf6f..00000000 --- a/app/models/api/ProjectRequest.scala +++ /dev/null @@ -1,16 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -import java.util.UUID - -case class ProjectRequest( - name: String, - description: Option[String] = None, - atUri: Option[String] = None, - atCid: Option[String] = None - ) - -object ProjectRequest { - implicit val format: OFormat[ProjectRequest] = Json.format -} diff --git a/app/models/api/ProjectResponse.scala b/app/models/api/ProjectResponse.scala deleted file mode 100644 index e292ea7e..00000000 --- a/app/models/api/ProjectResponse.scala +++ /dev/null @@ -1,20 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDateTime -import java.util.UUID - -case class ProjectResponse( - projectGuid: UUID, - name: String, - description: Option[String], - ownerDid: String, - createdAt: LocalDateTime, - updatedAt: LocalDateTime, - atCid: Option[String] - ) - -object ProjectResponse { - implicit val format: OFormat[ProjectResponse] = Json.format -} diff --git a/app/models/api/PublicationWithEnaStudiesAndSampleCount.scala b/app/models/api/PublicationWithEnaStudiesAndSampleCount.scala deleted file mode 100644 index 824a1cec..00000000 --- a/app/models/api/PublicationWithEnaStudiesAndSampleCount.scala +++ /dev/null @@ -1,32 +0,0 @@ -package models.api - -import models.domain.publications.{GenomicStudy, Publication} -import play.api.libs.json.{Json, OFormat} - -/** - * Represents a publication along with its associated ENA studies and the total number of samples. - * - * @param publication The publication containing metadata such as title, authors, journal, and identifiers. - * @param bioStudies A collection of ENA studies related to the publication, each described with details - * such as accession, title, and institution. - * @param sampleCount The total number of samples associated with the ENA studies in this publication. - */ -case class PublicationWithEnaStudiesAndSampleCount( - publication: Publication, - bioStudies: Seq[GenomicStudy], - sampleCount: Int, - ) - -/** - * Companion object for the `PublicationWithEnaStudiesAndSampleCount` case class. - * - * Provides an implicit JSON formatter for serializing and deserializing - * instances of `PublicationWithEnaStudiesAndSampleCount` using the Play Framework's JSON library. - * - * This formatter enables seamless conversion of `PublicationWithEnaStudiesAndSampleCount` objects - * to and from JSON representation, ensuring compatibility with APIs and storage systems that rely on JSON. - */ -object PublicationWithEnaStudiesAndSampleCount { - implicit val publicationWithEnaStudiesAndSampleCountFormat: OFormat[PublicationWithEnaStudiesAndSampleCount] = - Json.format[PublicationWithEnaStudiesAndSampleCount] -} diff --git a/app/models/api/SequencerLabInfo.scala b/app/models/api/SequencerLabInfo.scala deleted file mode 100644 index 930b3013..00000000 --- a/app/models/api/SequencerLabInfo.scala +++ /dev/null @@ -1,26 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -/** - * Response model containing sequencer instrument and lab information. - * - * @param instrumentId The unique instrument ID from BAM/CRAM headers - * @param labName Name of the sequencing laboratory - * @param isD2c Whether the lab offers direct-to-consumer services - * @param manufacturer Optional manufacturer name (e.g., 'Illumina', 'PacBio') - * @param model Optional model name (e.g., 'NovaSeq 6000', 'MiSeq') - * @param websiteUrl Optional URL to the lab's official website - */ -case class SequencerLabInfo( - instrumentId: String, - labName: String, - isD2c: Boolean, - manufacturer: Option[String] = None, - model: Option[String] = None, - websiteUrl: Option[String] = None - ) - -object SequencerLabInfo { - implicit val format: OFormat[SequencerLabInfo] = Json.format[SequencerLabInfo] -} \ No newline at end of file diff --git a/app/models/api/SequencerLabInstrumentsResponse.scala b/app/models/api/SequencerLabInstrumentsResponse.scala deleted file mode 100644 index 5865f7e8..00000000 --- a/app/models/api/SequencerLabInstrumentsResponse.scala +++ /dev/null @@ -1,18 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -/** - * Response model for the list of lab-instrument associations. - * - * @param data List of lab-instrument associations - * @param count Total count of associations - */ -case class SequencerLabInstrumentsResponse( - data: Seq[SequencerLabInfo], - count: Int - ) - -object SequencerLabInstrumentsResponse { - implicit val format: OFormat[SequencerLabInstrumentsResponse] = Json.format[SequencerLabInstrumentsResponse] -} \ No newline at end of file diff --git a/app/models/api/SequencerLabLookupResponse.scala b/app/models/api/SequencerLabLookupResponse.scala deleted file mode 100644 index 4c03a113..00000000 --- a/app/models/api/SequencerLabLookupResponse.scala +++ /dev/null @@ -1,32 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -case class SequencerLabLookupResponse( - instrumentId: String, - labName: Option[String] = None, - isD2c: Option[Boolean] = None, - manufacturer: Option[String] = None, - model: Option[String] = None, - websiteUrl: Option[String] = None, - source: String = "CURATOR", - confidenceScore: Double = 1.0, - observationCount: Int = 0, - pendingProposal: Option[PendingProposalSummary] = None - ) - -object SequencerLabLookupResponse { - implicit val format: OFormat[SequencerLabLookupResponse] = Json.format[SequencerLabLookupResponse] -} - -case class PendingProposalSummary( - proposalId: Int, - proposedLabName: String, - observationCount: Int, - confidenceScore: Double, - status: String - ) - -object PendingProposalSummary { - implicit val format: OFormat[PendingProposalSummary] = Json.format[PendingProposalSummary] -} diff --git a/app/models/api/StudyWithHaplogroups.scala b/app/models/api/StudyWithHaplogroups.scala deleted file mode 100644 index 3150465c..00000000 --- a/app/models/api/StudyWithHaplogroups.scala +++ /dev/null @@ -1,29 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -case class StudyWithHaplogroups( - accession: String, - title: String, - centerName: String, - source: String, - yHaplogroup: Option[String], - mtHaplogroup: Option[String], - notes: Option[String] - ) - -object StudyWithHaplogroups { - implicit val format: OFormat[StudyWithHaplogroups] = Json.format -} - -case class SampleWithStudies( - sampleName: Option[String], - accession: String, - sex: Option[String], - geoCoord: Option[GeoCoord], - studies: List[StudyWithHaplogroups] - ) - -object SampleWithStudies { - implicit val format: OFormat[SampleWithStudies] = Json.format -} diff --git a/app/models/api/TreeDTO.scala b/app/models/api/TreeDTO.scala deleted file mode 100644 index b69db2f4..00000000 --- a/app/models/api/TreeDTO.scala +++ /dev/null @@ -1,201 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -import java.time.ZonedDateTime - -/** - * Represents a tree structure with a name, breadcrumbs, and an optional subclade. - * - * @param name The name of the tree. - * @param crumbs A list of breadcrumbs (navigational links) associated with the tree. - * @param subclade An optional subclade represented as another tree node within the hierarchy. - */ -case class TreeDTO(name: String, crumbs: List[CrumbDTO], subclade: Option[TreeNodeDTO]) - -case class SubcladeDTO(name: String, parentName: Option[String], variants: Seq[VariantDTO], lastUpdated: ZonedDateTime, isBackbone: Boolean = false) - -/** - * Represents a breadcrumb with a label and a URL. - * - * This class is used to define navigational links in the form of breadcrumbs, providing a label - * for display purposes and a corresponding URL for navigation. - * - * @param label The text displayed for the breadcrumb. - * @param url The URL associated with the breadcrumb, representing the navigation target. - */ -case class CrumbDTO(label: String, url: String) - -/** - * Represents a tree node data transfer object (DTO) used to model hierarchical structures with metadata and variants. - * - * @param name The name of the tree node. - * @param variants A sequence of associated `VariantDTO` objects representing different variations related to this tree node. - * @param children A list of child `TreeNodeDTO` representing the hierarchical relationship of the tree structure. - * @param updated The timestamp at which the node or its content was last updated. - * @param isBackbone A boolean flag indicating whether this node is part of the backbone structure of the tree. Defaults to `false`. - */ -case class TreeNodeDTO( - name: String, - variants: Seq[VariantDTO], - children: List[TreeNodeDTO], - updated: ZonedDateTime, - isBackbone: Boolean = false, - variantCount: Option[Int] = None, - formedYbp: Option[Int] = None, - tmrcaYbp: Option[Int] = None - ) { - /** - * Calculates the weight of the current tree node. - * - * The weight is determined by adding 1 (representing the current node) to the sum - * of the weights of all child nodes in the hierarchical structure. - * - * @return The total weight as an integer, which is the sum of the weight - * of the current node and the weights of all child nodes. - */ - def weight: Int = 1 + children.map(_.weight).sum - - def sortedVariants: Seq[VariantDTO] = TreeNodeDTO.sortVariants(this.variants) -} - -/** - * Represents a genomic coordinate and mutation details for a specific region. - * - * @constructor Creates an instance of `GenomicCoordinate` with the specified start and stop positions, - * ancestral allele, and derived allele. - * @param start The start position of the genomic region. - * @param stop The stop position of the genomic region. - * @param anc The ancestral allele at this genomic coordinate. - * @param der The derived allele at this genomic coordinate. - * - * The class defines a genomic region along with the transition details from an ancestral allele (`anc`) - * to a derived allele (`der`) and allows for string representation of the coordinate in the format: - * `" ->"`. - */ -case class GenomicCoordinate(start: Int, stop: Int, anc: String, der: String) { - /** - * Converts the object into its string representation. - * - * @return A string combining the `start`, `anc`, and `der` values in the format "start anc->der". - */ - @Override - override def toString: String = s"$start $anc->$der" -} - -/** - * Represents a genomic variant along with its name, coordinates, and variant type. - * - * @param name The name of the variant (primary display name). - * @param coordinates A mapping of reference genomes to their respective genomic coordinates. - * Each `GenomicCoordinate` represents the specific start and stop positions - * along with the ancestral and derived alleles for the region. - * @param variantType The type of the variant, indicating the nature or classification of the mutation. - * @param aliases Alternative names for this variant, grouped by source/type. - * Keys are alias types (e.g., "common_name", "rs_id", "isogg", "yfull"). - * Values are lists of alias values from that source. - */ -case class VariantDTO( - name: String, - coordinates: Map[String, GenomicCoordinate], - variantType: String, - aliases: Map[String, Seq[String]] = Map.empty - ) - -/** - * Companion object for the `GenomicCoordinate` case class. - * - * Provides an implicit JSON formatter for serializing and deserializing - * `GenomicCoordinate` instances using the Play Framework's JSON library. - * - * The formatter enables seamless transformation of `GenomicCoordinate` objects - * to their JSON representations and vice versa. This is particularly useful - * for applications that involve API communication or storage in a JSON-based format. - */ -object GenomicCoordinate { - implicit val featureCoordFormats: OFormat[GenomicCoordinate] = Json.format[GenomicCoordinate] -} - -/** - * Companion object for the `VariantDTO` case class. - * - * Provides an implicit JSON formatter for serializing and deserializing `VariantDTO` instances - * using the Play Framework's JSON library. This formatter allows seamless conversion between - * `VariantDTO` objects and their JSON representation for APIs or other JSON-based integrations. - */ -object VariantDTO { - implicit val treeFeatureFormats: OFormat[VariantDTO] = Json.format[VariantDTO] -} - -/** - * Companion object for the `TreeNodeDTO` case class. - * - * Provides an implicit JSON formatter for serializing and deserializing - * `TreeNodeDTO` instances using the Play Framework's JSON library. - * - * This formatter enables seamless conversion of `TreeNodeDTO` objects - * to and from their JSON representation, simplifying data interchange - * in applications that utilize hierarchical tree structures. - */ -object TreeNodeDTO { - implicit val treeNodeFormats: OFormat[TreeNodeDTO] = Json.format[TreeNodeDTO] - - private def extractComponents(s: String): (String, Option[Long]) = { - val pattern = """([A-Za-z]+)?(\d+)""".r - pattern.findFirstMatchIn(s) match - case Some(m) => - val prefix = Option(m.group(1)).getOrElse("") - val number = Some(m.group(2).toLong) - (prefix, number) - case None => (s, None) - } - - def sortVariants(variants: Seq[VariantDTO]): Seq[VariantDTO] = - variants.sortWith { (a, b) => - (a.name.contains(":"), b.name.contains(":")) match - case (true, false) => false // a has chrY: prefix, b doesn't -> a comes after - case (false, true) => true // b has chrY: prefix, a doesn't -> a comes before - case _ => // both have or both don't have chrY: prefix - val (prefixA, numA) = extractComponents(a.name) - val (prefixB, numB) = extractComponents(b.name) - if prefixA != prefixB then - prefixA < prefixB - else - (numA, numB) match - case (Some(n1), Some(n2)) => n1 < n2 - case _ => a.name < b.name - } - -} - -/** - * Companion object for the `CrumbDTO` case class. - * - * Provides an implicit JSON formatter for serializing and deserializing - * `CrumbDTO` instances using the Play Framework's JSON library. - * - * This formatter enables seamless conversion of `CrumbDTO` objects to and from - * their JSON representation, facilitating integration with APIs or systems that - * utilize JSON data. - */ -object CrumbDTO { - implicit val crumbFormats: OFormat[CrumbDTO] = Json.format[CrumbDTO] -} - -/** - * Companion object for the `TreeDTO` case class. - * - * Provides an implicit JSON formatter for serializing and deserializing - * instances of `TreeDTO` using the Play Framework's JSON library. - * - * This formatter enables automatic conversion of `TreeDTO` objects - * to and from their JSON representation, facilitating seamless integration - * with APIs or systems that utilize JSON. - */ -object TreeDTO { - implicit val treeFormats: OFormat[TreeDTO] = Json.format[TreeDTO] -} - -object SubcladeDTO { - implicit val subcladeFormats: OFormat[SubcladeDTO] = Json.format[SubcladeDTO] -} \ No newline at end of file diff --git a/app/models/api/VariantApiModels.scala b/app/models/api/VariantApiModels.scala deleted file mode 100644 index 80d52059..00000000 --- a/app/models/api/VariantApiModels.scala +++ /dev/null @@ -1,241 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -/** - * Request to add a reference build for an existing variant. - * Matches on commonName or rsId to find the variant group, - * then adds the new build coordinates. - * - * @param name The variant name (commonName) to match - * @param rsId Optional rsId to match (used if name not found) - * @param contig The contig name (e.g., "chrY") - * @param position The position on the contig - * @param refAllele The reference allele - * @param altAllele The alternate allele - * @param refGenome The reference genome (e.g., "GRCh37", "GRCh38", "hs1") - * @param variantType The variant type (e.g., "SNP", "INDEL") - */ -case class AddVariantBuildRequest( - name: Option[String], - rsId: Option[String], - contig: String, - position: Int, - refAllele: String, - altAllele: String, - refGenome: String, - variantType: String = "SNP" -) - -object AddVariantBuildRequest { - implicit val format: OFormat[AddVariantBuildRequest] = Json.format[AddVariantBuildRequest] -} - -/** - * Bulk request to add builds to multiple variants. - */ -case class BulkAddVariantBuildsRequest( - variants: Seq[AddVariantBuildRequest] -) - -object BulkAddVariantBuildsRequest { - implicit val format: OFormat[BulkAddVariantBuildsRequest] = Json.format[BulkAddVariantBuildsRequest] -} - -/** - * Request to update the rsId of a variant. - * - * @param name The variant name (commonName) to match - * @param rsId The rsId to set - */ -case class UpdateVariantRsIdRequest( - name: String, - rsId: String -) - -object UpdateVariantRsIdRequest { - implicit val format: OFormat[UpdateVariantRsIdRequest] = Json.format[UpdateVariantRsIdRequest] -} - -/** - * Bulk request to update rsIds for multiple variants. - */ -case class BulkUpdateRsIdsRequest( - variants: Seq[UpdateVariantRsIdRequest] -) - -object BulkUpdateRsIdsRequest { - implicit val format: OFormat[BulkUpdateRsIdsRequest] = Json.format[BulkUpdateRsIdsRequest] -} - -/** - * Result of a single variant operation. - */ -case class VariantOperationResult( - name: Option[String], - rsId: Option[String], - status: String, - message: Option[String] = None, - variantId: Option[Int] = None -) - -object VariantOperationResult { - implicit val format: OFormat[VariantOperationResult] = Json.format[VariantOperationResult] -} - -/** - * Response for bulk variant operations. - */ -case class BulkVariantOperationResponse( - total: Int, - succeeded: Int, - failed: Int, - results: Seq[VariantOperationResult] -) - -object BulkVariantOperationResponse { - implicit val format: OFormat[BulkVariantOperationResponse] = Json.format[BulkVariantOperationResponse] -} - -// ============================================================================ -// Alias Source Management Models -// ============================================================================ - -/** - * Request to update the source for aliases matching a prefix pattern. - * - * @param aliasPrefix The prefix to match (e.g., "FGC" matches "FGC29071") - * @param newSource The new source value (e.g., "FGC") - * @param oldSource Optional: only update aliases with this current source (e.g., "migration") - */ -case class UpdateAliasSourceRequest( - aliasPrefix: String, - newSource: String, - oldSource: Option[String] = None -) - -object UpdateAliasSourceRequest { - implicit val format: OFormat[UpdateAliasSourceRequest] = Json.format[UpdateAliasSourceRequest] -} - -/** - * Bulk request to update sources for multiple prefix patterns. - */ -case class BulkUpdateAliasSourcesRequest( - updates: Seq[UpdateAliasSourceRequest] -) - -object BulkUpdateAliasSourcesRequest { - implicit val format: OFormat[BulkUpdateAliasSourcesRequest] = Json.format[BulkUpdateAliasSourcesRequest] -} - -/** - * Result of a single alias source update operation. - */ -case class AliasSourceUpdateResult( - aliasPrefix: String, - newSource: String, - aliasesUpdated: Int, - status: String, - message: Option[String] = None -) - -object AliasSourceUpdateResult { - implicit val format: OFormat[AliasSourceUpdateResult] = Json.format[AliasSourceUpdateResult] -} - -/** - * Response for bulk alias source update operations. - */ -case class BulkAliasSourceUpdateResponse( - total: Int, - totalAliasesUpdated: Int, - results: Seq[AliasSourceUpdateResult] -) - -object BulkAliasSourceUpdateResponse { - implicit val format: OFormat[BulkAliasSourceUpdateResponse] = Json.format[BulkAliasSourceUpdateResponse] -} - -/** - * Summary of alias sources in the database. - */ -case class AliasSourceSummary( - source: String, - count: Int -) - -object AliasSourceSummary { - implicit val format: OFormat[AliasSourceSummary] = Json.format[AliasSourceSummary] -} - -/** - * Response for alias source statistics. - */ -case class AliasSourceStatsResponse( - sources: Seq[AliasSourceSummary], - totalAliases: Int -) - -object AliasSourceStatsResponse { - implicit val format: OFormat[AliasSourceStatsResponse] = Json.format[AliasSourceStatsResponse] -} - -// ============================================================================ -// DU Naming Authority Models -// ============================================================================ - -/** - * Request to assign a DU name to a variant. - * Variant is identified by variantId. - * - * @param variantId The variant ID to assign a DU name to - */ -case class AssignDuNameRequest( - variantId: Int -) - -object AssignDuNameRequest { - implicit val format: OFormat[AssignDuNameRequest] = Json.format[AssignDuNameRequest] -} - -/** - * Bulk request to assign DU names to multiple variants. - */ -case class BulkAssignDuNamesRequest( - variantIds: Seq[Int] -) - -object BulkAssignDuNamesRequest { - implicit val format: OFormat[BulkAssignDuNamesRequest] = Json.format[BulkAssignDuNamesRequest] -} - -/** - * Result of a DU name assignment operation. - */ -case class DuNameAssignmentResult( - variantId: Int, - duName: Option[String], - previousName: Option[String], - status: String, - message: Option[String] = None -) - -object DuNameAssignmentResult { - implicit val format: OFormat[DuNameAssignmentResult] = Json.format[DuNameAssignmentResult] -} - -/** - * Response for bulk DU name assignment. - */ -case class BulkDuNameAssignmentResponse( - total: Int, - succeeded: Int, - failed: Int, - skipped: Int, - results: Seq[DuNameAssignmentResult] -) - -object BulkDuNameAssignmentResponse { - implicit val format: OFormat[BulkDuNameAssignmentResponse] = Json.format[BulkDuNameAssignmentResponse] -} diff --git a/app/models/api/VariantPublicApiModels.scala b/app/models/api/VariantPublicApiModels.scala deleted file mode 100644 index dad16dbd..00000000 --- a/app/models/api/VariantPublicApiModels.scala +++ /dev/null @@ -1,108 +0,0 @@ -package models.api - -import play.api.libs.json.{Json, OFormat} - -/** - * Public API response for a variant coordinate in a specific reference assembly. - * Includes ref/alt alleles per assembly to handle strand differences (e.g., hs1 reverse complements). - * - * @param contig The contig/chromosome name (e.g., "chrY") - * @param position The 1-based position on the contig - * @param ref The reference allele in THIS assembly (may differ from other assemblies) - * @param alt The alternate/derived allele in THIS assembly - */ -case class VariantCoordinateDTO( - contig: String, - position: Int, - ref: String, - alt: String -) - -object VariantCoordinateDTO { - implicit val format: OFormat[VariantCoordinateDTO] = Json.format[VariantCoordinateDTO] -} - -/** - * Public API response for variant aliases grouped by source. - * - * @param commonNames SNP names from various sources (e.g., ["M269", "S21"]) - * @param rsIds dbSNP identifiers (e.g., ["rs9786076"]) - * @param sources Names grouped by source (e.g., {"ybrowse": ["M269"], "ftdna": ["S312"]}) - */ -case class VariantAliasesDTO( - commonNames: Seq[String] = Seq.empty, - rsIds: Seq[String] = Seq.empty, - sources: Map[String, Seq[String]] = Map.empty -) - -object VariantAliasesDTO { - implicit val format: OFormat[VariantAliasesDTO] = Json.format[VariantAliasesDTO] -} - -/** - * Public API response for a variant's haplogroup association. - * Included when the variant defines a haplogroup. - * - * @param haplogroupId The haplogroup database ID - * @param haplogroupName The haplogroup name (e.g., "R-M269", "I-L21") - */ -case class DefiningHaplogroupDTO( - haplogroupId: Int, - haplogroupName: String -) - -object DefiningHaplogroupDTO { - implicit val format: OFormat[DefiningHaplogroupDTO] = Json.format[DefiningHaplogroupDTO] -} - -/** - * Public API response for a single variant. - * - * Designed to be forward-compatible with the proposed variant_v2 schema: - * - canonicalName as primary identifier (nullable for unnamed variants) - * - coordinates as map with per-assembly alleles (handles strand differences) - * - aliases as structured object (not separate table) - * - definingHaplogroup for parallel mutation support - * - * @param variantId Internal variant ID (for reference/linking) - * @param canonicalName Primary name (e.g., "M269"); null for unnamed/novel variants - * @param variantType Mutation type: "SNP", "INDEL", "MNP" - * @param namingStatus Status: "NAMED", "UNNAMED", or "PENDING_REVIEW" - * @param coordinates Map of reference genome -> coordinates with alleles - * @param aliases Alternative names grouped by type/source - * @param definingHaplogroup The haplogroup this variant defines (if any) - */ -case class PublicVariantDTO( - variantId: Int, - canonicalName: Option[String], - variantType: String, - namingStatus: String, - coordinates: Map[String, VariantCoordinateDTO], - aliases: VariantAliasesDTO, - definingHaplogroup: Option[DefiningHaplogroupDTO] = None -) - -object PublicVariantDTO { - implicit val format: OFormat[PublicVariantDTO] = Json.format[PublicVariantDTO] -} - -/** - * Paginated response for variant search/list. - * - * @param items The variants for the current page - * @param currentPage Current page number (1-based) - * @param pageSize Items per page - * @param totalItems Total matching variants - * @param totalPages Total pages available - */ -case class VariantSearchResponse( - items: Seq[PublicVariantDTO], - currentPage: Int, - pageSize: Int, - totalItems: Long, - totalPages: Int -) - -object VariantSearchResponse { - implicit val format: OFormat[VariantSearchResponse] = Json.format[VariantSearchResponse] -} diff --git a/app/models/api/genomics/AssociateLabWithInstrumentRequest.scala b/app/models/api/genomics/AssociateLabWithInstrumentRequest.scala deleted file mode 100644 index 44d44f2d..00000000 --- a/app/models/api/genomics/AssociateLabWithInstrumentRequest.scala +++ /dev/null @@ -1,22 +0,0 @@ -package models.api.genomics - -import play.api.libs.json.{Json, OFormat} - -/** - * Request model for associating a lab with an instrument ID. - * - * @param instrumentId The unique instrument ID from BAM/CRAM headers (e.g., 'A00123') - * @param labName The name of the sequencing lab to associate with this instrument - * @param manufacturer Optional manufacturer name (e.g., 'Illumina', 'PacBio') - * @param model Optional model name (e.g., 'NovaSeq 6000', 'MiSeq') - */ -case class AssociateLabWithInstrumentRequest( - instrumentId: String, - labName: String, - manufacturer: Option[String] = None, - model: Option[String] = None - ) - -object AssociateLabWithInstrumentRequest { - implicit val format: OFormat[AssociateLabWithInstrumentRequest] = Json.format[AssociateLabWithInstrumentRequest] -} \ No newline at end of file diff --git a/app/models/api/genomics/AssociateLabWithInstrumentResponse.scala b/app/models/api/genomics/AssociateLabWithInstrumentResponse.scala deleted file mode 100644 index 2dd461c5..00000000 --- a/app/models/api/genomics/AssociateLabWithInstrumentResponse.scala +++ /dev/null @@ -1,28 +0,0 @@ -package models.api.genomics - -import play.api.libs.json.{Json, OFormat} - -/** - * Response model for the lab-instrument association operation. - * - * @param instrumentId The instrument ID that was associated - * @param labId The ID of the lab (newly created or existing) - * @param labName The name of the associated lab - * @param manufacturer The manufacturer name of the instrument - * @param model The model name of the instrument - * @param isNewLab Whether a new lab placeholder was created - * @param message Status message describing the result - */ -case class AssociateLabWithInstrumentResponse( - instrumentId: String, - labId: Int, - labName: String, - manufacturer: Option[String] = None, - model: Option[String] = None, - isNewLab: Boolean, - message: String - ) - -object AssociateLabWithInstrumentResponse { - implicit val format: OFormat[AssociateLabWithInstrumentResponse] = Json.format[AssociateLabWithInstrumentResponse] -} \ No newline at end of file diff --git a/app/models/api/genomics/GenomeRegionsApiModels.scala b/app/models/api/genomics/GenomeRegionsApiModels.scala deleted file mode 100644 index 0a13c1f9..00000000 --- a/app/models/api/genomics/GenomeRegionsApiModels.scala +++ /dev/null @@ -1,151 +0,0 @@ -package models.api.genomics - -import play.api.libs.json.{Json, OFormat} - -/** - * API DTOs for the Genome Regions API. - * These models match the specification for the Navigator client. - */ - -/** - * A genomic position range. - */ -case class RegionDto( - start: Long, - end: Long, - `type`: Option[String] = None, - modifier: Option[Double] = None -) - -object RegionDto { - implicit val format: OFormat[RegionDto] = Json.format[RegionDto] -} - -/** - * A named genomic region (e.g., palindrome P1). - */ -case class NamedRegionDto( - name: String, - start: Long, - end: Long, - `type`: String, - modifier: Option[Double] = None -) - -object NamedRegionDto { - implicit val format: OFormat[NamedRegionDto] = Json.format[NamedRegionDto] -} - -/** - * Telomere regions at chromosome ends. - */ -case class TelomeresDto( - p: Option[RegionDto] = None, - q: Option[RegionDto] = None -) - -object TelomeresDto { - implicit val format: OFormat[TelomeresDto] = Json.format[TelomeresDto] -} - -/** - * Cytoband annotation for ideogram display. - */ -case class CytobandDto( - name: String, - start: Long, - end: Long, - stain: String -) - -object CytobandDto { - implicit val format: OFormat[CytobandDto] = Json.format[CytobandDto] -} - -/** - * STR marker position. - */ -case class StrMarkerDto( - name: String, - start: Long, - end: Long, - period: Int, - verified: Boolean, - note: Option[String] = None -) - -object StrMarkerDto { - implicit val format: OFormat[StrMarkerDto] = Json.format[StrMarkerDto] -} - -/** - * Y-chromosome specific regions grouped by type. - */ -case class YChromosomeRegionsDto( - par1: Option[RegionDto] = None, - par2: Option[RegionDto] = None, - xtr: Seq[RegionDto] = Seq.empty, - ampliconic: Seq[RegionDto] = Seq.empty, - palindromes: Seq[NamedRegionDto] = Seq.empty, - heterochromatin: Option[RegionDto] = None, - xDegenerate: Seq[RegionDto] = Seq.empty -) - -object YChromosomeRegionsDto { - implicit val format: OFormat[YChromosomeRegionsDto] = Json.format[YChromosomeRegionsDto] -} - -/** - * All region data for a single chromosome. - */ -case class ChromosomeRegionsDto( - length: Long, - centromere: Option[RegionDto] = None, - telomeres: Option[TelomeresDto] = None, - cytobands: Seq[CytobandDto] = Seq.empty, - regions: Option[YChromosomeRegionsDto] = None, - strMarkers: Seq[StrMarkerDto] = Seq.empty -) - -object ChromosomeRegionsDto { - implicit val format: OFormat[ChromosomeRegionsDto] = Json.format[ChromosomeRegionsDto] -} - -/** - * Complete response for the genome regions API. - */ -case class GenomeRegionsResponse( - build: String, - version: String, - generatedAt: String, - chromosomes: Map[String, ChromosomeRegionsDto] -) - -object GenomeRegionsResponse { - implicit val format: OFormat[GenomeRegionsResponse] = Json.format[GenomeRegionsResponse] -} - -/** - * Error response for unknown builds or other failures. - */ -case class GenomeRegionsError( - error: String, - message: String, - supportedBuilds: Seq[String] -) - -object GenomeRegionsError { - implicit val format: OFormat[GenomeRegionsError] = Json.format[GenomeRegionsError] -} - -/** - * Response for listing supported builds. - */ -case class SupportedBuildsResponse( - supportedBuilds: Seq[String], - version: String -) - -object SupportedBuildsResponse { - implicit val format: OFormat[SupportedBuildsResponse] = Json.format[SupportedBuildsResponse] -} diff --git a/app/models/api/genomics/GenomeRegionsManagementModels.scala b/app/models/api/genomics/GenomeRegionsManagementModels.scala deleted file mode 100644 index 0fb5d76b..00000000 --- a/app/models/api/genomics/GenomeRegionsManagementModels.scala +++ /dev/null @@ -1,110 +0,0 @@ -package models.api.genomics - -import play.api.libs.json.{JsValue, Json, OFormat} - -/** - * API DTOs for Genome Regions Management (CRUD operations). - */ - -case class RegionCoordinateDto( - contig: String, - start: Long, - end: Long -) - -object RegionCoordinateDto { - implicit val format: OFormat[RegionCoordinateDto] = Json.format[RegionCoordinateDto] -} - -// ============================================================================ -// Request DTOs -// ============================================================================ - -case class CreateGenomeRegionRequest( - regionType: String, - name: Option[String] = None, - coordinates: Map[String, RegionCoordinateDto], - properties: Option[JsValue] = None -) - -object CreateGenomeRegionRequest { - implicit val format: OFormat[CreateGenomeRegionRequest] = Json.format[CreateGenomeRegionRequest] -} - -case class UpdateGenomeRegionRequest( - regionType: Option[String] = None, - name: Option[String] = None, - coordinates: Option[Map[String, RegionCoordinateDto]] = None, - properties: Option[JsValue] = None -) - -object UpdateGenomeRegionRequest { - implicit val format: OFormat[UpdateGenomeRegionRequest] = Json.format[UpdateGenomeRegionRequest] -} - -// ============================================================================ -// Bulk Request DTOs -// ============================================================================ - -case class BulkCreateGenomeRegionsRequest(regions: Seq[CreateGenomeRegionRequest]) - -object BulkCreateGenomeRegionsRequest { - implicit val format: OFormat[BulkCreateGenomeRegionsRequest] = Json.format[BulkCreateGenomeRegionsRequest] -} - -// ============================================================================ -// Response DTOs -// ============================================================================ - -case class GenomeRegionDetailDto( - id: Int, - regionType: String, - name: Option[String], - coordinates: Map[String, RegionCoordinateDto], - properties: JsValue -) - -object GenomeRegionDetailDto { - implicit val format: OFormat[GenomeRegionDetailDto] = Json.format[GenomeRegionDetailDto] -} - -// ============================================================================ -// List Response DTOs -// ============================================================================ - -case class GenomeRegionListResponse( - regions: Seq[GenomeRegionDetailDto], - total: Int, - page: Int, - pageSize: Int -) - -object GenomeRegionListResponse { - implicit val format: OFormat[GenomeRegionListResponse] = Json.format[GenomeRegionListResponse] -} - -// ============================================================================ -// Bulk Operation Response -// ============================================================================ - -case class BulkOperationResult( - index: Int, - status: String, - id: Option[Int] = None, - error: Option[String] = None -) - -object BulkOperationResult { - implicit val format: OFormat[BulkOperationResult] = Json.format[BulkOperationResult] -} - -case class BulkOperationResponse( - total: Int, - succeeded: Int, - failed: Int, - results: Seq[BulkOperationResult] -) - -object BulkOperationResponse { - implicit val format: OFormat[BulkOperationResponse] = Json.format[BulkOperationResponse] -} \ No newline at end of file diff --git a/app/models/api/genomics/SequencingLabCrud.scala b/app/models/api/genomics/SequencingLabCrud.scala deleted file mode 100644 index 98b9c5cd..00000000 --- a/app/models/api/genomics/SequencingLabCrud.scala +++ /dev/null @@ -1,25 +0,0 @@ -package models.api.genomics - -import play.api.libs.json.{Json, OFormat} - -case class SequencingLabCreateRequest( - name: String, - isD2c: Option[Boolean] = None, - websiteUrl: Option[String] = None, - descriptionMarkdown: Option[String] = None - ) - -object SequencingLabCreateRequest { - implicit val format: OFormat[SequencingLabCreateRequest] = Json.format[SequencingLabCreateRequest] -} - -case class SequencingLabUpdateRequest( - name: Option[String] = None, - isD2c: Option[Boolean] = None, - websiteUrl: Option[String] = None, - descriptionMarkdown: Option[String] = None - ) - -object SequencingLabUpdateRequest { - implicit val format: OFormat[SequencingLabUpdateRequest] = Json.format[SequencingLabUpdateRequest] -} diff --git a/app/models/api/genomics/SpecimanDonorMergeRequest.scala b/app/models/api/genomics/SpecimanDonorMergeRequest.scala deleted file mode 100644 index 997e129c..00000000 --- a/app/models/api/genomics/SpecimanDonorMergeRequest.scala +++ /dev/null @@ -1,141 +0,0 @@ -package models.api.genomics - -import play.api.libs.json.* - -/** - * Represents the strategy for resolving conflicts or handling decisions during a merge operation - * involving multiple specimen donors. - * - * The `MergeStrategy` enum provides the following options: - * - `PreferTarget`: Conflicts are resolved by preferring the data already present in the target donor. - * - `PreferSource`: Conflicts are resolved by preferring the data from the source donors over the target donor. - * - `MostComplete`: Conflicts are resolved by selecting the most complete data based on the aggregation of - * values from all donors involved in the merge. - */ -enum MergeStrategy derives CanEqual { - case PreferTarget, PreferSource, MostComplete -} - -/** - * Object providing JSON format support for the `MergeStrategy` enumeration. - * - * The implicit `Format` instance handles serialization and deserialization - * of `MergeStrategy` values to and from JSON. The JSON representation of a - * `MergeStrategy` is a string corresponding to the enumeration values: - * - `PreferTarget` - * - `PreferSource` - * - `MostComplete` - * - * Reads operations interpret the JSON string and convert it into the appropriate - * `MergeStrategy` value. If an unrecognized string is encountered, an - * `IllegalArgumentException` is thrown. - * - * Writes operations serialize a `MergeStrategy` value into its corresponding - * string representation for JSON output. - */ -object MergeStrategy { - implicit val format: Format[MergeStrategy] = new Format[MergeStrategy] { - def reads(json: JsValue): JsResult[MergeStrategy] = json.validate[String].map { - case "PreferTarget" => PreferTarget - case "PreferSource" => PreferSource - case "MostComplete" => MostComplete - case other => throw new IllegalArgumentException(s"Unknown merge strategy: $other") - } - - def writes(strategy: MergeStrategy): JsValue = JsString(strategy.toString) - } -} - -/** - * Represents a request to merge specimen donors in a data management system. - * - * This case class encapsulates the details necessary to perform a donor merge operation, - * including the target donor ID, source donor IDs, and the strategy to handle conflicts or - * determine how data should be aggregated during the merge process. - * - * @param targetId the unique identifier of the target donor into which the source donors will be merged - * @param sourceIds a list of unique identifiers of the source donors to be merged into the target donor - * @param mergeStrategy specifies the strategy to use when resolving conflicts or combining data during the merge - */ -case class SpecimenDonorMergeRequest( - targetId: Int, - sourceIds: List[Int], - mergeStrategy: MergeStrategy - ) - -/** - * Companion object for the `SpecimenDonorMergeRequest` case class. - * - * This object provides the implicit JSON format needed for serializing and - * deserializing instances of `SpecimenDonorMergeRequest`. - */ -object SpecimenDonorMergeRequest { - implicit val format: OFormat[SpecimenDonorMergeRequest] = Json.format[SpecimenDonorMergeRequest] -} - -/** - * Represents the result of a donor merge operation in a specimen management system. - * - * This case class captures the details of the outcome when multiple donor records are merged - * into a single record. It contains information about the resulting merged donor ID, - * the number of biosamples updated, the IDs of removed donors, and a list of any conflicts - * encountered during the merge operation. - * - * @param mergedDonorId the unique identifier of the donor record that results from the merge operation - * @param updatedBiosamples the number of biosamples that were selected or reassigned to the merged donor - * @param removedDonors a list of unique identifiers of source donors that were removed as part of the merge process - * @param conflicts a list of conflicts encountered during the merge process, with details about the conflicting fields and resolutions - */ -case class SpecimenDonorMergeResult( - mergedDonorId: Int, - updatedBiosamples: Int, - removedDonors: List[Int], - conflicts: List[MergeConflict] - ) - -/** - * Companion object for the `SpecimenDonorMergeResult` case class. - * - * Provides JSON serialization and deserialization support for `SpecimenDonorMergeResult`. - * This is achieved using Play's JSON library to define the implicit `OFormat`. - * - * The `SpecimenDonorMergeResult` class represents the outcome of merging multiple donor - * records into a unified donor record. It includes details such as the merged donor ID, - * updated biosample count, removed donor IDs, and any merge conflicts encountered. - */ -object SpecimenDonorMergeResult { - implicit val format: OFormat[SpecimenDonorMergeResult] = Json.format[SpecimenDonorMergeResult] -} - -/** - * Represents a conflict encountered during a merge operation between a target donor - * and one or more source donors. A merge conflict occurs when the values of a specific - * field differ between the target and source donors. - * - * @param field the name of the field where the conflict was detected - * @param targetValue the value of the field in the target donor - * @param sourceValue the value of the field in the source donor - * @param resolution the resolved value for the conflicting field, derived from the merge strategy - */ -case class MergeConflict( - field: String, - targetValue: String, - sourceValue: String, - resolution: String - ) - -/** - * Companion object for the `MergeConflict` case class. This object provides - * implicit JSON serialization and deserialization functionality for `MergeConflict` - * instances. - * - * The `MergeConflict` case class represents a conflict that occurs during the - * process of merging donor data, with details about the conflicting field, - * the values in the target and source donors, and the resolved value. - * - * The implicit `OFormat` provided by this object is used to convert `MergeConflict` - * instances to and from JSON, facilitating their handling in APIs or persistent storage. - */ -object MergeConflict { - implicit val format: OFormat[MergeConflict] = Json.format[MergeConflict] -} \ No newline at end of file diff --git a/app/models/api/haplogroups/TreeMergeModels.scala b/app/models/api/haplogroups/TreeMergeModels.scala deleted file mode 100644 index 1f4879a1..00000000 --- a/app/models/api/haplogroups/TreeMergeModels.scala +++ /dev/null @@ -1,305 +0,0 @@ -package models.api.haplogroups - -import models.HaplogroupType -import play.api.libs.json.{Format, Json, OFormat, Reads, Writes} - -/** - * API DTOs for Haplogroup Tree Merge operations. - * - * Supports merging external haplogroup trees from sources like ISOGG, ytree.net, - * and other researchers into the DecodingUs baseline tree. - */ - -// ============================================================================ -// Input Tree Structure -// ============================================================================ - -/** - * A variant with its primary name and optional aliases. - * Aliases represent alternative names for the same SNP from different labs/sources. - * Example: M207 (primary) with aliases Page37, UTY2 - */ -case class VariantInput( - name: String, - aliases: List[String] = List.empty -) - -object VariantInput { - implicit val format: OFormat[VariantInput] = Json.format[VariantInput] -} - -/** - * A node in the input phylogenetic tree for merging. - * Matching is done by variants, not names, to handle different naming conventions. - */ -case class PhyloNodeInput( - name: String, - variants: List[VariantInput] = List.empty, - formedYbp: Option[Int] = None, - formedYbpLower: Option[Int] = None, - formedYbpUpper: Option[Int] = None, - tmrcaYbp: Option[Int] = None, - tmrcaYbpLower: Option[Int] = None, - tmrcaYbpUpper: Option[Int] = None, - children: List[PhyloNodeInput] = List.empty -) - -object PhyloNodeInput { - implicit val format: OFormat[PhyloNodeInput] = Json.format[PhyloNodeInput] -} - -// ============================================================================ -// Merge Configuration -// ============================================================================ - -/** - * Configuration for source priority during merge. - * Lower index = higher priority. - */ -case class SourcePriorityConfig( - sourcePriorities: List[String], - defaultPriority: Int = 100 -) - -object SourcePriorityConfig { - implicit val format: OFormat[SourcePriorityConfig] = Json.format[SourcePriorityConfig] -} - -/** - * Strategy for handling conflicts during merge. - */ -sealed trait ConflictStrategy - -object ConflictStrategy { - case object HigherPriorityWins extends ConflictStrategy - case object KeepExisting extends ConflictStrategy - case object AlwaysUpdate extends ConflictStrategy - - implicit val reads: Reads[ConflictStrategy] = Reads.StringReads.map { - case "higher_priority_wins" => HigherPriorityWins - case "keep_existing" => KeepExisting - case "always_update" => AlwaysUpdate - case other => throw new IllegalArgumentException(s"Unknown conflict strategy: $other") - } - - implicit val writes: Writes[ConflictStrategy] = Writes.StringWrites.contramap { - case HigherPriorityWins => "higher_priority_wins" - case KeepExisting => "keep_existing" - case AlwaysUpdate => "always_update" - } - - implicit val format: Format[ConflictStrategy] = Format(reads, writes) -} - -// ============================================================================ -// Request DTOs -// ============================================================================ - -/** - * Request for full tree merge (replace entire Y-DNA or mtDNA tree). - * - * @param stagingMode When true (default), changes are recorded in a change set for curator review - * but NOT applied to production. When false, changes are applied directly. - */ -case class TreeMergeRequest( - haplogroupType: HaplogroupType, - sourceTree: PhyloNodeInput, - sourceName: String, - priorityConfig: Option[SourcePriorityConfig] = None, - conflictStrategy: Option[ConflictStrategy] = None, - dryRun: Boolean = false, - stagingMode: Boolean = true -) - -object TreeMergeRequest { - implicit val format: OFormat[TreeMergeRequest] = Json.format[TreeMergeRequest] -} - -/** - * Request for subtree merge (merge under a specific anchor node). - * - * @param stagingMode When true (default), changes are recorded in a change set for curator review - * but NOT applied to production. When false, changes are applied directly. - */ -case class SubtreeMergeRequest( - haplogroupType: HaplogroupType, - anchorHaplogroupName: String, - sourceTree: PhyloNodeInput, - sourceName: String, - priorityConfig: Option[SourcePriorityConfig] = None, - conflictStrategy: Option[ConflictStrategy] = None, - dryRun: Boolean = false, - stagingMode: Boolean = true -) - -object SubtreeMergeRequest { - implicit val format: OFormat[SubtreeMergeRequest] = Json.format[SubtreeMergeRequest] -} - -/** - * Request for merge preview. - */ -case class MergePreviewRequest( - haplogroupType: HaplogroupType, - anchorHaplogroupName: Option[String] = None, - sourceTree: PhyloNodeInput, - sourceName: String, - priorityConfig: Option[SourcePriorityConfig] = None -) - -object MergePreviewRequest { - implicit val format: OFormat[MergePreviewRequest] = Json.format[MergePreviewRequest] -} - -// ============================================================================ -// Response DTOs -// ============================================================================ - -/** - * Statistics from a merge operation. - */ -case class MergeStatistics( - nodesProcessed: Int, - nodesCreated: Int, - nodesUpdated: Int, - nodesUnchanged: Int, - variantsAdded: Int, - variantsUpdated: Int, - relationshipsCreated: Int, - relationshipsUpdated: Int, - splitOperations: Int = 0 -) - -object MergeStatistics { - implicit val format: OFormat[MergeStatistics] = Json.format[MergeStatistics] - - val empty: MergeStatistics = MergeStatistics(0, 0, 0, 0, 0, 0, 0, 0, 0) - - def combine(a: MergeStatistics, b: MergeStatistics): MergeStatistics = MergeStatistics( - nodesProcessed = a.nodesProcessed + b.nodesProcessed, - nodesCreated = a.nodesCreated + b.nodesCreated, - nodesUpdated = a.nodesUpdated + b.nodesUpdated, - nodesUnchanged = a.nodesUnchanged + b.nodesUnchanged, - variantsAdded = a.variantsAdded + b.variantsAdded, - variantsUpdated = a.variantsUpdated + b.variantsUpdated, - relationshipsCreated = a.relationshipsCreated + b.relationshipsCreated, - relationshipsUpdated = a.relationshipsUpdated + b.relationshipsUpdated, - splitOperations = a.splitOperations + b.splitOperations - ) -} - -/** - * Details of a conflict encountered during merge. - */ -case class MergeConflict( - haplogroupName: String, - field: String, - existingValue: String, - newValue: String, - resolution: String, - existingSource: String, - newSource: String -) - -object MergeConflict { - implicit val format: OFormat[MergeConflict] = Json.format[MergeConflict] -} - -/** - * Details of a split operation performed during merge. - */ -case class SplitOperation( - parentName: String, - newIntermediateName: String, - variantsRedistributed: List[String], - childrenReassigned: List[String], - source: String -) - -object SplitOperation { - implicit val format: OFormat[SplitOperation] = Json.format[SplitOperation] -} - -/** - * Records an ambiguous or inconsistent placement during merge. - * - * Phylogenetics often involves uncertain placement. This flag alerts human - * curators to branches where SNP data is contradictory, possibly due to: - * - Sequencing errors in source data - * - Recurrent mutations (homoplasy) - * - Missing intermediate nodes - * - Nomenclature mismatches between sources - * - * @param nodeName The node with ambiguous placement - * @param ambiguityType Classification of the ambiguity - * @param description Human-readable explanation - * @param sharedVariants Variants that matched (overlap) - * @param conflictingVariants Variants that conflict (present in one but not expected) - * @param candidateMatches Other nodes that could have been matches - * @param resolution How the algorithm resolved the ambiguity - * @param confidence Score from 0.0 (very uncertain) to 1.0 (confident) - */ -case class PlacementAmbiguity( - nodeName: String, - ambiguityType: String, - description: String, - sharedVariants: List[String] = List.empty, - conflictingVariants: List[String] = List.empty, - candidateMatches: List[String] = List.empty, - resolution: String, - confidence: Double -) - -object PlacementAmbiguity { - implicit val format: OFormat[PlacementAmbiguity] = Json.format[PlacementAmbiguity] - - // Ambiguity type constants - val PARTIAL_MATCH = "PARTIAL_MATCH" // Some SNPs match, others don't - val MULTIPLE_CANDIDATES = "MULTIPLE_CANDIDATES" // Multiple nodes could be the match - val RECURRENT_SNP = "RECURRENT_SNP" // SNP appears in multiple lineages - val ORPHAN_PLACEMENT = "ORPHAN_PLACEMENT" // Node placed without strong variant evidence - val NAME_VARIANT_MISMATCH = "NAME_VARIANT_MISMATCH" // Name matches but variants differ -} - -/** - * Result of a merge operation. - */ -case class TreeMergeResponse( - success: Boolean, - message: String, - statistics: MergeStatistics, - conflicts: List[MergeConflict] = List.empty, - splits: List[SplitOperation] = List.empty, - ambiguities: List[PlacementAmbiguity] = List.empty, - errors: List[String] = List.empty, - ambiguityReportPath: Option[String] = None -) - -object TreeMergeResponse { - implicit val format: OFormat[TreeMergeResponse] = Json.format[TreeMergeResponse] - - def failure(message: String, errors: List[String] = List.empty): TreeMergeResponse = - TreeMergeResponse( - success = false, - message = message, - statistics = MergeStatistics.empty, - errors = errors - ) -} - -/** - * Preview of merge results (without applying changes). - */ -case class MergePreviewResponse( - statistics: MergeStatistics, - conflicts: List[MergeConflict], - splits: List[SplitOperation], - ambiguities: List[PlacementAmbiguity], - newNodes: List[String], - updatedNodes: List[String], - unchangedNodes: List[String] -) - -object MergePreviewResponse { - implicit val format: OFormat[MergePreviewResponse] = Json.format[MergePreviewResponse] -} diff --git a/app/models/atmosphere/AtmosphereRecords.scala b/app/models/atmosphere/AtmosphereRecords.scala deleted file mode 100644 index 32956a7d..00000000 --- a/app/models/atmosphere/AtmosphereRecords.scala +++ /dev/null @@ -1,809 +0,0 @@ -package models.atmosphere - -import play.api.libs.json.* - -import java.time.Instant - -// --- Common Definitions --- - -case class RecordMeta( - version: Int, - createdAt: Instant, - updatedAt: Option[Instant], - lastModifiedField: Option[String] - ) - -object RecordMeta { - implicit val format: OFormat[RecordMeta] = Json.format[RecordMeta] -} - -case class FileInfo( - fileName: String, - fileSizeBytes: Option[Long], - fileFormat: String, - checksum: Option[String], - checksumAlgorithm: Option[String], - location: Option[String] - ) - -object FileInfo { - implicit val format: OFormat[FileInfo] = Json.format[FileInfo] -} - -case class VariantCall( - contigAccession: String, - position: Int, - referenceAllele: String, - alternateAllele: String, - rsId: Option[String], - variantName: Option[String], - genotype: Option[String], - quality: Option[Double], - depth: Option[Int] - ) - -object VariantCall { - implicit val format: OFormat[VariantCall] = Json.format[VariantCall] -} - -case class PrivateVariantData( - variants: Option[Seq[VariantCall]], - analysisVersion: Option[String], - referenceTree: Option[String] - ) - -object PrivateVariantData { - implicit val format: OFormat[PrivateVariantData] = Json.format[PrivateVariantData] -} - -case class HaplogroupResult( - haplogroupName: String, - score: Double, - matchingSnps: Option[Int], - mismatchingSnps: Option[Int], - ancestralMatches: Option[Int], - treeDepth: Option[Int], - lineagePath: Option[Seq[String]], - privateVariants: Option[PrivateVariantData] - ) - -object HaplogroupResult { - implicit val format: OFormat[HaplogroupResult] = Json.format[HaplogroupResult] -} - -case class HaplogroupAssignments( - yDna: Option[HaplogroupResult], - mtDna: Option[HaplogroupResult] - ) - -object HaplogroupAssignments { - implicit val format: OFormat[HaplogroupAssignments] = Json.format[HaplogroupAssignments] -} - -case class ContigMetrics( - contigName: String, - callableBases: Int, - meanCoverage: Option[Double], - poorMappingQuality: Option[Int], - lowCoverage: Option[Int], - noCoverage: Option[Int] - ) - -object ContigMetrics { - implicit val format: OFormat[ContigMetrics] = Json.format[ContigMetrics] -} - -case class AlignmentMetrics( - genomeTerritory: Option[Long], - meanCoverage: Option[Double], - medianCoverage: Option[Double], - sdCoverage: Option[Double], - pctExcDupe: Option[Double], - pctExcMapq: Option[Double], - pct10x: Option[Double], - pct20x: Option[Double], - pct30x: Option[Double], - hetSnpSensitivity: Option[Double], - contigs: Option[Seq[ContigMetrics]] - ) - -object AlignmentMetrics { - implicit val format: OFormat[AlignmentMetrics] = Json.format[AlignmentMetrics] -} - -case class PopulationComponent( - populationCode: String, - populationName: Option[String], - superPopulation: Option[String], // Continental grouping (European, African, etc.) - percentage: Double, - confidenceInterval: Option[Map[String, Double]], // "lower", "upper" - rank: Option[Int] // Display rank by percentage (1 = highest) - ) - -object PopulationComponent { - implicit val format: OFormat[PopulationComponent] = Json.format[PopulationComponent] -} - -case class SuperPopulationSummary( - superPopulation: String, // European, African, East Asian, etc. - percentage: Double, // Combined percentage 0.0-100.0 - populations: Seq[String] // Contributing population codes - ) - -object SuperPopulationSummary { - implicit val format: OFormat[SuperPopulationSummary] = Json.format[SuperPopulationSummary] -} - -case class IbdSegment( - chromosome: String, - startPosition: Int, - endPosition: Int, - lengthCm: Double, - snpCount: Option[Int], - isHalfIdentical: Option[Boolean] - ) - -object IbdSegment { - implicit val format: OFormat[IbdSegment] = Json.format[IbdSegment] -} - -// STR structures are complex (Unions). Simplified for now or handling as JsValue if too complex. -// The lexicon defines strValue as a union of simple, multiCopy, complex. -// We can use a sealed trait. - -sealed trait StrValue - -case class SimpleStrValue(`type`: String = "simple", repeats: Int) extends StrValue - -case class MultiCopyStrValue(`type`: String = "multiCopy", copies: Seq[Int]) extends StrValue - -case class StrAllele(repeats: Double, count: Int, designation: Option[String]) - -object StrAllele { - implicit val format: OFormat[StrAllele] = Json.format[StrAllele] -} - -case class ComplexStrValue(`type`: String = "complex", alleles: Seq[StrAllele], rawNotation: Option[String]) extends StrValue - -object StrValue { - implicit val simpleFormat: OFormat[SimpleStrValue] = Json.format[SimpleStrValue] - implicit val multiCopyFormat: OFormat[MultiCopyStrValue] = Json.format[MultiCopyStrValue] - implicit val complexFormat: OFormat[ComplexStrValue] = Json.format[ComplexStrValue] - - implicit val reads: Reads[StrValue] = (json: JsValue) => { - (json \ "type").asOpt[String] match { - case Some("simple") => simpleFormat.reads(json) - case Some("multiCopy") => multiCopyFormat.reads(json) - case Some("complex") => complexFormat.reads(json) - case _ => JsError("Unknown or missing StrValue type") - } - } - - implicit val writes: Writes[StrValue] = { - case s: SimpleStrValue => simpleFormat.writes(s) - case m: MultiCopyStrValue => multiCopyFormat.writes(m) - case c: ComplexStrValue => complexFormat.writes(c) - } -} - -case class StrMarkerValue( - marker: String, - value: StrValue, - panel: Option[String], - quality: Option[String], - readDepth: Option[Int] - ) - -object StrMarkerValue { - implicit val format: OFormat[StrMarkerValue] = Json.format[StrMarkerValue] -} - -case class StrPanel( - panelName: String, - markerCount: Int, - provider: Option[String], - testDate: Option[Instant] - ) - -object StrPanel { - implicit val format: OFormat[StrPanel] = Json.format[StrPanel] -} - -case class AncestralStrState( - marker: String, - ancestralValue: StrValue, - confidence: Option[Double], - supportingSamples: Option[Int], - variance: Option[Double], - method: Option[String] - ) - -object AncestralStrState { - implicit val format: OFormat[AncestralStrState] = Json.format[AncestralStrState] -} - -case class StrBranchMutation( - marker: String, - fromValue: StrValue, - toValue: StrValue, - stepChange: Option[Int], - confidence: Option[Double] - ) - -object StrBranchMutation { - implicit val format: OFormat[StrBranchMutation] = Json.format[StrBranchMutation] -} - -// --- Core Records --- - -case class WorkspaceRecord( - atUri: Option[String], // Not in schema but good to have for event handling - meta: RecordMeta, - sampleRefs: Seq[String], - projectRefs: Seq[String] - ) - -object WorkspaceRecord { - implicit val format: OFormat[WorkspaceRecord] = Json.format[WorkspaceRecord] -} - -case class BiosampleRecord( - atUri: String, - meta: RecordMeta, - sampleAccession: Option[String], - donorIdentifier: String, - citizenDid: String, - description: Option[String], - centerName: String, - sex: Option[String], - haplogroups: Option[HaplogroupAssignments], - sequenceRunRefs: Option[Seq[String]], - genotypeRefs: Option[Seq[String]], - populationBreakdownRef: Option[String], - strProfileRef: Option[String] - ) - -object BiosampleRecord { - implicit val format: OFormat[BiosampleRecord] = Json.format[BiosampleRecord] -} - -case class SequenceRunRecord( - atUri: String, - meta: RecordMeta, - biosampleRef: String, - platformName: String, - instrumentModel: Option[String], - instrumentId: Option[String], - testType: String, - libraryLayout: Option[String], - totalReads: Option[Int], - readLength: Option[Int], - meanInsertSize: Option[Double], - flowcellId: Option[String], - runDate: Option[Instant], - files: Option[Seq[FileInfo]], - alignmentRefs: Option[Seq[String]] - ) - -object SequenceRunRecord { - implicit val format: OFormat[SequenceRunRecord] = Json.format[SequenceRunRecord] -} - -case class AlignmentRecord( - atUri: String, - meta: RecordMeta, - sequenceRunRef: String, - biosampleRef: Option[String], - referenceBuild: String, - aligner: String, - variantCaller: Option[String], - files: Option[Seq[FileInfo]], - metrics: Option[AlignmentMetrics] - ) - -object AlignmentRecord { - implicit val format: OFormat[AlignmentRecord] = Json.format[AlignmentRecord] -} - -case class GenotypeRecord( - atUri: String, - meta: RecordMeta, - biosampleRef: String, - testTypeCode: String, // ARRAY_23ANDME_V5, ARRAY_ANCESTRYDNA_V2, etc. - provider: String, // 23andMe, AncestryDNA, FTDNA, etc. - chipType: Option[String], // Deprecated, use testTypeCode - chipVersion: Option[String], - totalMarkersCalled: Option[Int], // Markers with valid calls - totalMarkersPossible: Option[Int], // Total markers on chip - callRate: Option[Double], // % markers with valid call - noCallRate: Option[Double], // % markers with no call - yMarkersCalled: Option[Int], // Y-DNA markers with calls - yMarkersTotal: Option[Int], // Total Y-DNA markers - mtMarkersCalled: Option[Int], // mtDNA markers with calls - mtMarkersTotal: Option[Int], // Total mtDNA markers - autosomalMarkersCalled: Option[Int], // Autosomal markers called - hetRate: Option[Double], // Heterozygosity rate (QC metric) - testDate: Option[Instant], - processedAt: Option[Instant], // When processed by Navigator - buildVersion: Option[String], // GRCh37, GRCh38 - sourceFileHash: Option[String], // SHA-256 for deduplication - derivedHaplogroups: Option[HaplogroupAssignments], // Chip-derived haplogroups - populationBreakdownRef: Option[String], // AT URI to ancestry breakdown - files: Option[Seq[FileInfo]], - imputationRef: Option[String] - ) - -object GenotypeRecord { - implicit val format: OFormat[GenotypeRecord] = Json.format[GenotypeRecord] -} - -case class ImputationRecord( - atUri: String, - meta: RecordMeta, - genotypeRef: String, - biosampleRef: Option[String], - referencePanel: String, - imputationTool: String, - imputedVariantCount: Option[Int], - averageInfoScore: Option[Double], - files: Option[Seq[FileInfo]] - ) - -object ImputationRecord { - implicit val format: OFormat[ImputationRecord] = Json.format[ImputationRecord] -} - -case class ProjectRecord( - atUri: String, - meta: RecordMeta, - projectName: String, - description: Option[String], - administrator: String, - memberRefs: Seq[String] - ) - -object ProjectRecord { - implicit val format: OFormat[ProjectRecord] = Json.format[ProjectRecord] -} - -case class PopulationBreakdownRecord( - atUri: String, - meta: RecordMeta, - biosampleRef: String, - analysisMethod: String, // PCA_PROJECTION_GMM, ADMIXTURE, etc. - panelType: Option[String], // "aims" (~5k SNPs) or "genome-wide" (~500k SNPs) - referencePopulations: Option[String], // '1000G_HGDP_v1', etc. - referenceVersion: Option[String], // Reference panel version - kValue: Option[Int], - snpsAnalyzed: Option[Int], // Total SNPs in analysis panel - snpsWithGenotype: Option[Int], // SNPs with valid calls - snpsMissing: Option[Int], // SNPs with no call - confidenceLevel: Option[Double], // Overall confidence 0.0-1.0 - pcaCoordinates: Option[Seq[Double]], // First 3 PCA coordinates [x, y, z] - components: Seq[PopulationComponent], - superPopulationSummary: Option[Seq[SuperPopulationSummary]], // 9 continental groups - analysisDate: Option[Instant], - pipelineVersion: Option[String] - ) - -object PopulationBreakdownRecord { - implicit val format: OFormat[PopulationBreakdownRecord] = Json.format[PopulationBreakdownRecord] -} - -case class InstrumentObservationRecord( - atUri: String, - meta: RecordMeta, - instrumentId: String, - labName: String, - biosampleRef: String, - sequenceRunRef: Option[String], - platform: Option[String], - instrumentModel: Option[String], - flowcellId: Option[String], - runDate: Option[Instant], - confidence: Option[String] // KNOWN, INFERRED, GUESSED - ) - -object InstrumentObservationRecord { - implicit val format: OFormat[InstrumentObservationRecord] = Json.format[InstrumentObservationRecord] -} - -case class MatchConsentRecord( - atUri: String, - meta: RecordMeta, - biosampleRef: String, - consentLevel: String, // FULL, ANONYMOUS, PROJECT_ONLY - allowedMatchTypes: Option[Seq[String]], - minimumSegmentCm: Option[Double], - shareContactInfo: Option[Boolean], - consentedAt: Option[Instant], - expiresAt: Option[Instant] - ) - -object MatchConsentRecord { - implicit val format: OFormat[MatchConsentRecord] = Json.format[MatchConsentRecord] -} - -case class MatchEntry( - matchedBiosampleRef: String, - matchedCitizenDid: Option[String], - relationshipEstimate: Option[String], - totalSharedCm: Double, - longestSegmentCm: Option[Double], - segmentCount: Int, - sharedSegments: Option[Seq[IbdSegment]], - matchedAt: Option[Instant], - xMatchSharedCm: Option[Double] - ) - -object MatchEntry { - implicit val format: OFormat[MatchEntry] = Json.format[MatchEntry] -} - -case class MatchListRecord( - atUri: String, - meta: RecordMeta, - biosampleRef: String, - matchCount: Int, - lastCalculatedAt: Option[Instant], - matches: Seq[MatchEntry], - continuationToken: Option[String] - ) - -object MatchListRecord { - implicit val format: OFormat[MatchListRecord] = Json.format[MatchListRecord] -} - -case class MatchRequestRecord( - atUri: String, - meta: RecordMeta, - fromBiosampleRef: String, - toBiosampleRef: String, - status: String, // PENDING, ACCEPTED, DECLINED, EXPIRED, WITHDRAWN - message: Option[String], - sharedAncestorHint: Option[String], - expiresAt: Option[Instant], - respondedAt: Option[Instant] - ) - -object MatchRequestRecord { - implicit val format: OFormat[MatchRequestRecord] = Json.format[MatchRequestRecord] -} - -case class StrProfileRecord( - atUri: String, - meta: RecordMeta, - biosampleRef: String, - sequenceRunRef: Option[String], - panels: Option[Seq[StrPanel]], - markers: Seq[StrMarkerValue], - totalMarkers: Option[Int], - source: Option[String], - importedFrom: Option[String], - derivationMethod: Option[String], - files: Option[Seq[FileInfo]] - ) - -object StrProfileRecord { - implicit val format: OFormat[StrProfileRecord] = Json.format[StrProfileRecord] -} - -case class HaplogroupAncestralStrRecord( - atUri: String, - meta: RecordMeta, - haplogroup: String, - haplogroupTreeRef: Option[String], - parentHaplogroup: Option[String], - ancestralMarkers: Seq[AncestralStrState], - sampleCount: Option[Int], - computedAt: Instant, - method: Option[String], - softwareVersion: Option[String], - mutationRateModel: Option[String], - tmrcaEstimate: Option[Map[String, Double]], // Simplified for now - branchMutations: Option[Seq[StrBranchMutation]] - ) - -object HaplogroupAncestralStrRecord { - implicit val format: OFormat[HaplogroupAncestralStrRecord] = Json.format[HaplogroupAncestralStrRecord] -} - -// --- Group Project Records --- - -case class GroupProjectGovernance( - administrators: Seq[ProjectRoleAssignment], - coAdministrators: Option[Seq[ProjectRoleAssignment]] = None, - moderators: Option[Seq[ProjectRoleAssignment]] = None, - curators: Option[Seq[ProjectRoleAssignment]] = None, - successionPolicy: Option[String] = None - ) - -object GroupProjectGovernance { - implicit val format: OFormat[GroupProjectGovernance] = Json.format[GroupProjectGovernance] -} - -case class ProjectRoleAssignment( - citizenDid: String, - role: String, - permissions: Option[Seq[String]] = None, - appointedAt: Option[Instant] = None, - appointedBy: Option[String] = None - ) - -object ProjectRoleAssignment { - implicit val format: OFormat[ProjectRoleAssignment] = Json.format[ProjectRoleAssignment] -} - -case class ProjectVisibilityPolicy( - publicTreeView: Option[Boolean] = None, - memberListVisibility: Option[String] = None, - strPolicy: Option[String] = None, - snpPolicy: Option[String] = None, - ancestorPolicy: Option[String] = None - ) - -object ProjectVisibilityPolicy { - implicit val format: OFormat[ProjectVisibilityPolicy] = Json.format[ProjectVisibilityPolicy] -} - -case class ProjectSubgroup( - subgroupId: String, - name: String, - description: Option[String] = None, - haplogroupFilter: Option[String] = None, - color: Option[String] = None, - sortOrder: Option[Int] = None - ) - -object ProjectSubgroup { - implicit val format: OFormat[ProjectSubgroup] = Json.format[ProjectSubgroup] -} - -case class ProjectLink( - title: Option[String] = None, - url: Option[String] = None, - category: Option[String] = None - ) - -object ProjectLink { - implicit val format: OFormat[ProjectLink] = Json.format[ProjectLink] -} - -case class GroupProjectRecord( - atUri: String, - meta: RecordMeta, - projectName: String, - projectType: String, - targetHaplogroup: Option[String] = None, - targetLineage: Option[String] = None, - description: Option[String] = None, - backgroundInfo: Option[String] = None, - governance: GroupProjectGovernance, - visibilityPolicy: Option[ProjectVisibilityPolicy] = None, - joinPolicy: Option[String] = None, - haplogroupRequirement: Option[String] = None, - subgroups: Option[Seq[ProjectSubgroup]] = None, - links: Option[Seq[ProjectLink]] = None, - createdAt: Option[Instant] = None - ) - -object GroupProjectRecord { - implicit val format: OFormat[GroupProjectRecord] = Json.format[GroupProjectRecord] -} - -case class AncestorInfo( - name: Option[String] = None, - surname: Option[String] = None, - birthYear: Option[Int] = None, - birthCentury: Option[String] = None, - birthDecade: Option[String] = None, - birthCountry: Option[String] = None, - birthRegion: Option[String] = None, - birthPlace: Option[String] = None, - additionalInfo: Option[String] = None - ) - -object AncestorInfo { - implicit val format: OFormat[AncestorInfo] = Json.format[AncestorInfo] -} - -case class MemberVisibilityPrefs( - showInMemberList: Option[Boolean] = None, - showInTree: Option[Boolean] = None, - shareTerminalHaplogroup: Option[Boolean] = None, - shareFullLineagePath: Option[Boolean] = None, - sharePrivateVariants: Option[Boolean] = None, - ancestorVisibility: Option[String] = None, - strVisibility: Option[String] = None, - allowDirectContact: Option[Boolean] = None, - showDisplayName: Option[Boolean] = None - ) - -object MemberVisibilityPrefs { - implicit val format: OFormat[MemberVisibilityPrefs] = Json.format[MemberVisibilityPrefs] -} - -case class ProjectMembershipRecord( - atUri: String, - meta: RecordMeta, - projectRef: String, - biosampleRef: String, - status: String, - displayName: Option[String] = None, - kitId: Option[String] = None, - visibility: Option[MemberVisibilityPrefs] = None, - subgroupAssignments: Option[Seq[String]] = None, - earliestKnownAncestor: Option[AncestorInfo] = None, - contributionLevel: Option[String] = None, - joinedAt: Option[Instant] = None, - notes: Option[String] = None - ) - -object ProjectMembershipRecord { - implicit val format: OFormat[ProjectMembershipRecord] = Json.format[ProjectMembershipRecord] -} - -// --- Reconciliation Types --- - -case class ReconciliationStatus( - compatibilityLevel: String, // COMPATIBLE, MINOR_DIVERGENCE, MAJOR_DIVERGENCE, INCOMPATIBLE - consensusHaplogroup: String, - confidence: Option[Double], - divergencePoint: Option[String], // Where branches split in tree - branchCompatibilityScore: Option[Double], // LCA_depth / max(depth_A, depth_B) - snpConcordance: Option[Double], // % SNP agreement across runs - runCount: Option[Int], - warnings: Option[Seq[String]] - ) - -object ReconciliationStatus { - implicit val format: OFormat[ReconciliationStatus] = Json.format[ReconciliationStatus] -} - -case class AlternativePrediction( - haplogroup: String, - probability: Double - ) - -object AlternativePrediction { - implicit val format: OFormat[AlternativePrediction] = Json.format[AlternativePrediction] -} - -case class ModalMatch( - haplogroup: String, - geneticDistance: Int, - sampleCount: Option[Int] - ) - -object ModalMatch { - implicit val format: OFormat[ModalMatch] = Json.format[ModalMatch] -} - -case class StrHaplogroupPrediction( - predictedHaplogroup: String, - probability: Double, - predictionMethod: Option[String], // NEVGEN, HAPEST, YHAPLO, SAPP, BAYESIAN - alternativePredictions: Option[Seq[AlternativePrediction]], - markersUsed: Option[Int], - panelName: Option[String], - predictionDepth: Option[String], // MAJOR_CLADE, SUBCLADE, TERMINAL - modalMatch: Option[ModalMatch], - limitations: Option[Seq[String]] - ) - -object StrHaplogroupPrediction { - implicit val format: OFormat[StrHaplogroupPrediction] = Json.format[StrHaplogroupPrediction] -} - -case class RunHaplogroupCall( - sourceRef: String, // AT URI of run/alignment/STR profile - haplogroup: String, - confidence: Double, - callMethod: String, // SNP_PHYLOGENETIC, STR_PREDICTION, VENDOR_REPORTED - score: Option[Double], - supportingSnps: Option[Int], - conflictingSnps: Option[Int], - noCalls: Option[Int], - technology: Option[String], // WGS, WES, BIG_Y, SNP_ARRAY, STR_PANEL - meanCoverage: Option[Double], - treeVersion: Option[String], - strPrediction: Option[StrHaplogroupPrediction] - ) - -object RunHaplogroupCall { - implicit val format: OFormat[RunHaplogroupCall] = Json.format[RunHaplogroupCall] -} - -case class SnpCallFromRun( - runRef: String, - allele: String, - quality: Option[Double], - depth: Option[Int], - variantAlleleFrequency: Option[Double] - ) - -object SnpCallFromRun { - implicit val format: OFormat[SnpCallFromRun] = Json.format[SnpCallFromRun] -} - -case class SnpConflict( - position: Int, - snpName: Option[String], - contigAccession: Option[String], - calls: Seq[SnpCallFromRun], - resolution: Option[String], // ACCEPT_MAJORITY, ACCEPT_HIGHER_QUALITY, etc. - resolvedValue: Option[String] - ) - -object SnpConflict { - implicit val format: OFormat[SnpConflict] = Json.format[SnpConflict] -} - -case class HeteroplasmyObservation( - position: Int, - majorAllele: String, - minorAllele: String, - majorAlleleFrequency: Double, - depth: Option[Int], - isDefiningSnp: Option[Boolean], - affectedHaplogroup: Option[String] - ) - -object HeteroplasmyObservation { - implicit val format: OFormat[HeteroplasmyObservation] = Json.format[HeteroplasmyObservation] -} - -case class IdentityVerification( - kinshipCoefficient: Option[Double], - fingerprintSnpConcordance: Option[Double], - yStrDistance: Option[Int], - verificationStatus: Option[String], // VERIFIED_SAME, LIKELY_SAME, etc. - verificationMethod: Option[String] // AUTOSOMAL_KINSHIP, Y_STR, etc. - ) - -object IdentityVerification { - implicit val format: OFormat[IdentityVerification] = Json.format[IdentityVerification] -} - -case class ManualOverride( - overriddenHaplogroup: String, - reason: String, - overriddenAt: Instant, - overriddenBy: String // DID of user - ) - -object ManualOverride { - implicit val format: OFormat[ManualOverride] = Json.format[ManualOverride] -} - -case class AuditEntry( - timestamp: Instant, - action: String, // INITIAL_RECONCILIATION, RUN_ADDED, RUN_REMOVED, MANUAL_OVERRIDE, etc. - previousConsensus: Option[String], - newConsensus: Option[String], - runRef: Option[String], - notes: Option[String] - ) - -object AuditEntry { - implicit val format: OFormat[AuditEntry] = Json.format[AuditEntry] -} - -case class HaplogroupReconciliationRecord( - atUri: String, - meta: RecordMeta, - specimenDonorRef: String, // AT URI or identifier of specimen donor - dnaType: String, // Y_DNA or MT_DNA - status: ReconciliationStatus, - runCalls: Seq[RunHaplogroupCall], - snpConflicts: Option[Seq[SnpConflict]], - heteroplasmyObservations: Option[Seq[HeteroplasmyObservation]], - identityVerification: Option[IdentityVerification], - lastReconciliationAt: Option[Instant], - manualOverride: Option[ManualOverride], - auditLog: Option[Seq[AuditEntry]] - ) - -object HaplogroupReconciliationRecord { - implicit val format: OFormat[HaplogroupReconciliationRecord] = Json.format[HaplogroupReconciliationRecord] -} diff --git a/app/models/auth/ATProtocolAuthorizationServer.scala b/app/models/auth/ATProtocolAuthorizationServer.scala deleted file mode 100644 index 08fd49d4..00000000 --- a/app/models/auth/ATProtocolAuthorizationServer.scala +++ /dev/null @@ -1,18 +0,0 @@ -package models.auth - -import java.time.ZonedDateTime -import java.util.UUID - -case class ATProtocolAuthorizationServer( - id: Option[UUID], - issuerUrl: String, - authorizationEndpoint: Option[String], - tokenEndpoint: Option[String], - pushedAuthorizationRequestEndpoint: Option[String], - dpopSigningAlgValuesSupported: Option[String], - scopesSupported: Option[String], - clientIdMetadataDocumentSupported: Option[Boolean], - metadataFetchedAt: ZonedDateTime, - createdAt: ZonedDateTime, - updatedAt: ZonedDateTime - ) \ No newline at end of file diff --git a/app/models/auth/ATProtocolClientMetadata.scala b/app/models/auth/ATProtocolClientMetadata.scala deleted file mode 100644 index d31b4edd..00000000 --- a/app/models/auth/ATProtocolClientMetadata.scala +++ /dev/null @@ -1,17 +0,0 @@ -package models.auth - -import java.time.ZonedDateTime -import java.util.UUID - -case class ATProtocolClientMetadata( - id: Option[UUID], - clientIdUrl: String, - clientName: Option[String], - clientUri: Option[String], - logoUri: Option[String], - tosUri: Option[String], - policyUri: Option[String], - redirectUris: Option[String], - createdAt: ZonedDateTime, - updatedAt: ZonedDateTime - ) \ No newline at end of file diff --git a/app/models/auth/Permission.scala b/app/models/auth/Permission.scala deleted file mode 100644 index e2eff4f8..00000000 --- a/app/models/auth/Permission.scala +++ /dev/null @@ -1,12 +0,0 @@ -package models.auth - -import java.time.ZonedDateTime -import java.util.UUID - -case class Permission( - id: Option[UUID], - name: String, - description: Option[String], - createdAt: ZonedDateTime, - updatedAt: ZonedDateTime - ) \ No newline at end of file diff --git a/app/models/auth/Role.scala b/app/models/auth/Role.scala deleted file mode 100644 index deaed528..00000000 --- a/app/models/auth/Role.scala +++ /dev/null @@ -1,12 +0,0 @@ -package models.auth - -import java.time.ZonedDateTime -import java.util.UUID - -case class Role( - id: Option[UUID], - name: String, - description: Option[String], - createdAt: ZonedDateTime, - updatedAt: ZonedDateTime - ) diff --git a/app/models/auth/RolePermission.scala b/app/models/auth/RolePermission.scala deleted file mode 100644 index 2e77033c..00000000 --- a/app/models/auth/RolePermission.scala +++ /dev/null @@ -1,8 +0,0 @@ -package models.auth - -import java.util.UUID - -case class RolePermission( - roleId: UUID, - permissionId: UUID - ) \ No newline at end of file diff --git a/app/models/auth/UserLoginInfo.scala b/app/models/auth/UserLoginInfo.scala deleted file mode 100644 index 7a606787..00000000 --- a/app/models/auth/UserLoginInfo.scala +++ /dev/null @@ -1,13 +0,0 @@ -package models.auth - -import java.time.ZonedDateTime -import java.util.UUID - -case class UserLoginInfo( - id: Option[UUID], - userId: UUID, - providerId: String, - providerKey: String, - createdAt: ZonedDateTime, - updatedAt: ZonedDateTime - ) \ No newline at end of file diff --git a/app/models/auth/UserOauth2Info.scala b/app/models/auth/UserOauth2Info.scala deleted file mode 100644 index 69b5d91d..00000000 --- a/app/models/auth/UserOauth2Info.scala +++ /dev/null @@ -1,16 +0,0 @@ -package models.auth - -import java.time.ZonedDateTime -import java.util.UUID - -case class UserOauth2Info( - id: Option[UUID], - loginInfoId: UUID, - accessToken: String, - tokenType: Option[String], - expiresIn: Option[Long], - refreshToken: Option[String], - createdAt: ZonedDateTime, - updatedAt: ZonedDateTime, - scope: Option[String] - ) \ No newline at end of file diff --git a/app/models/auth/UserRole.scala b/app/models/auth/UserRole.scala deleted file mode 100644 index 6dadf47f..00000000 --- a/app/models/auth/UserRole.scala +++ /dev/null @@ -1,8 +0,0 @@ -package models.auth - -import java.util.UUID - -case class UserRole( - userId: UUID, - roleId: UUID - ) \ No newline at end of file diff --git a/app/models/dal/DatabaseSchema.scala b/app/models/dal/DatabaseSchema.scala deleted file mode 100644 index 8a2ac7a2..00000000 --- a/app/models/dal/DatabaseSchema.scala +++ /dev/null @@ -1,210 +0,0 @@ -package models.dal - -import models.dal.auth.* -import models.dal.domain.* - -/** - * Provides the database schema definition, which includes table queries for various domain-specific entities. - * - * This object serves as an entry point for interacting with the database, defining all table queries using Slick's - * `TableQuery` mechanism. Through these queries, operations such as retrieval, insertion, and modifications can - * be performed on the corresponding database tables. Additionally, custom mappers or implicit conversions can be - * defined here. - * - * Key functionalities: - * - Defines the mapping of domain entities to database tables using Slick's `TableQuery`. - * database-compatible string format and vice versa. - * - * Table queries defined in this schema include: - * - Analysis methods - * - Biosamples and related metadata - * - Ancestry analyses - * - Haplogroups, variants, and their relationships - * - Population and publication records - * - Quality metrics - * - Sequence files, libraries, and locations - * - Specimen donors - * - Studies and reported variants - * - * These queries enable streamlined interaction with the database while maintaining a clear separation between - * the database schema and application logic. - */ -object DatabaseSchema { - - import models.dal.MyPostgresProfile.api.* - - object domain { - - import models.dal.domain.discovery.* - import models.dal.domain.genomics.* - import models.dal.domain.haplogroups.* - import models.dal.domain.ibd.* - import models.dal.domain.pangenome.* - import models.dal.domain.publications.* - import models.dal.domain.social.* - import models.dal.domain.user.* // This needs to be here for social tables - - // User-related tables directly under domain - val users = TableQuery[UsersTable] - - // Social-related tables within a social object - object social { - val userBlocks = TableQuery[UserBlocksTable] - val conversations = TableQuery[ConversationsTable] - val conversationParticipants = TableQuery[ConversationParticipantsTable] - val messages = TableQuery[MessagesTable] - val feedPosts = TableQuery[FeedPostsTable] - val reputationEvents = TableQuery[ReputationEventsTable] - val reputationEventTypes = TableQuery[ReputationEventTypesTable] - val userReputationScores = TableQuery[UserReputationScoresTable] - } - - object genomics { - val analysisMethods = TableQuery[AnalysisMethodTable] - val ancestryAnalyses = TableQuery[AncestryAnalysisTable] - val alignmentMetadata = TableQuery[AlignmentMetadataTable] - - val assemblyMetadata = TableQuery[AssemblyMetadataTable] - val biosampleHaplogroups = TableQuery[BiosampleHaplogroupsTable] - val biosamples = TableQuery[BiosamplesTable] - val citizenBiosamples = TableQuery[CitizenBiosamplesTable] - val genbankContigs = TableQuery[GenbankContigsTable] - val geneAnnotations = TableQuery[GeneAnnotationsTable] - val populations = TableQuery[PopulationsTable] - val sequenceFiles = TableQuery[SequenceFilesTable] - - val sequenceLibraries = TableQuery[SequenceLibrariesTable] - val sequencingLabs = TableQuery[SequencingLabsTable] - val sequencerInstruments = TableQuery[SequencerInstrumentsTable] - val instrumentObservations = TableQuery[InstrumentObservationTable] - val instrumentAssociationProposals = TableQuery[InstrumentAssociationProposalTable] - val specimenDonors = TableQuery[SpecimenDonorsTable] - val validationServices = TableQuery[ValidationServicesTable] - val testTypeDefinition = TableQuery[TestTypeTable] - val testTypeTargetRegions = TableQuery[TestTypeTargetRegionTable] - val coverageExpectationProfiles = TableQuery[CoverageExpectationProfileTable] - - // Consolidated variant schema (replaces variant + variant_alias) - val variantsV2 = TableQuery[VariantV2Table] - val haplogroupCharacterStates = TableQuery[HaplogroupCharacterStateTable] - val branchMutations = TableQuery[BranchMutationTable] - val biosampleVariantCalls = TableQuery[BiosampleVariantCallTable] - val strMutationRates = TableQuery[StrMutationRateTable] - - // New tables for Atmosphere Lexicon sync - val populationBreakdowns = TableQuery[PopulationBreakdownTable] - val populationComponents = TableQuery[PopulationComponentTable] - val superPopulationSummaries = TableQuery[SuperPopulationSummaryTable] - val genotypeData = TableQuery[GenotypeDataTable] - val haplogroupReconciliations = TableQuery[HaplogroupReconciliationTable] - - // Per-sample callable loci for age estimation - val biosampleCallableLoci = TableQuery[BiosampleCallableLociTable] - - // Genome regions API tables - val genomeRegions = TableQuery[GenomeRegionTable] - val genomeRegionVersions = TableQuery[GenomeRegionVersionTable] - } - - object haplogroups { - val haplogroupRelationships = TableQuery[HaplogroupRelationshipsTable] - val relationshipRevisionMetadata = TableQuery[RelationshipRevisionMetadataTable] - val haplogroups = TableQuery[HaplogroupsTable] - val haplogroupVariantMetadata = TableQuery[HaplogroupVariantMetadataTable] - val haplogroupVariants = TableQuery[HaplogroupVariantsTable] - - // Tree Versioning System (Production/WIP) - val changeSets = TableQuery[ChangeSetsTable] - val treeChanges = TableQuery[TreeChangesTable] - val changeSetComments = TableQuery[ChangeSetCommentsTable] - - // WIP Shadow Tables for staging merge changes - val wipHaplogroups = TableQuery[WipHaplogroupTable] - val wipRelationships = TableQuery[WipRelationshipTable] - val wipHaplogroupVariants = TableQuery[WipHaplogroupVariantTable] - val wipReparents = TableQuery[WipReparentTable] - val wipResolutions = TableQuery[WipResolutionTable] - - // Age estimation - val genealogicalAnchors = TableQuery[GenealogicalAnchorTable] - val haplogroupAncestralStrs = TableQuery[HaplogroupAncestralStrTable] - - // Discovery System - val biosamplePrivateVariants = TableQuery[BiosamplePrivateVariantTable] - val proposedBranches = TableQuery[ProposedBranchTable] - val proposedBranchVariants = TableQuery[ProposedBranchVariantTable] - val proposedBranchEvidence = TableQuery[ProposedBranchEvidenceTable] - val curatorActions = TableQuery[CuratorActionTable] - val discoveryConfig = TableQuery[DiscoveryConfigTable] - } - - object pangenome { - val canonicalPangenomeVariants = TableQuery[CanonicalPangenomeVariantsTable] - val pangenomeAlignmentCoverages = TableQuery[PangenomeAlignmentCoverageTable] - val pangenomeAlignmentMetadata = TableQuery[PangenomeAlignmentMetadataTable] - val pangenomeGraphs = TableQuery[PangenomeGraphsTable] - val pangenomeNodes = TableQuery[PangenomeNodesTable] - val pangenomePathsTable = TableQuery[PangenomePathsTable] - val pangenomeVariantLinks = TableQuery[PangenomeVariantLinksTable] - val reportedVariantPangenomesTable = TableQuery[ReportedVariantPangenomesTable] - } - - object publications { - val genomicStudies = TableQuery[GenomicStudiesTable] - val publications = TableQuery[PublicationsTable] - val publicationCandidates = TableQuery[PublicationCandidatesTable] - val publicationSearchConfigs = TableQuery[PublicationSearchConfigsTable] - val publicationSearchRuns = TableQuery[PublicationSearchRunsTable] - val publicationBiosamples = TableQuery[PublicationBiosamplesTable] - val publicationCitizenBiosamples = TableQuery[PublicationCitizenBiosamplesTable] - val publicationGenomicStudies = TableQuery[PublicationEnaStudiesTable] - - - } - - object ibd { - val ibdDiscoveryIndices = TableQuery[IbdDiscoveryIndicesTable] - val ibdPdsAttestationsTable = TableQuery[IbdPdsAttestationsTable] - val matchSuggestions = TableQuery[MatchSuggestionsTable] - val populationBreakdownCache = TableQuery[PopulationBreakdownCacheTable] - val populationOverlapScores = TableQuery[PopulationOverlapScoresTable] - val matchRequestTracking = TableQuery[MatchRequestTrackingTable] - val matchConsentTracking = TableQuery[MatchConsentTrackingTable] - } - - object project { - val projects = TableQuery[ProjectTable] - val groupProjects = TableQuery[GroupProjectTable] - val groupProjectMembers = TableQuery[GroupProjectMemberTable] - } - } - - object auth { - val atProtocolClientMetadata = TableQuery[ATProtocolClientMetadataTable] - val atProtocolAuthorizationServers = TableQuery[ATProtocolAuthorizationServersTable] - val cookieConsents = TableQuery[CookieConsentsTable] - val permissions = TableQuery[PermissionsTable] - val rolePermissionsTable = TableQuery[RolePermissionsTable] - val roles = TableQuery[RolesTable] - val userLoginInfos = TableQuery[UserLoginInfoTable] - val userOauth2Infos = TableQuery[UserOauth2InfoTable] - val userPdsInfos = TableQuery[UserPdsInfoTable] - val userRoles = TableQuery[UserRolesTable] - } - - object support { - import models.dal.support.* - val contactMessages = TableQuery[ContactMessagesTable] - val messageReplies = TableQuery[MessageRepliesTable] - } - - object curator { - import models.dal.curator.* - val auditLog = TableQuery[AuditLogTable] - } - - object billing { - import models.dal.domain.billing.* - val patronSubscriptions = TableQuery[PatronSubscriptionTable] - } -} \ No newline at end of file diff --git a/app/models/dal/MetadataSchema.scala b/app/models/dal/MetadataSchema.scala deleted file mode 100644 index 8328bb72..00000000 --- a/app/models/dal/MetadataSchema.scala +++ /dev/null @@ -1,47 +0,0 @@ -package models.dal - -import models.PDSRegistration -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.pds.* -import slick.lifted.ProvenShape - -import java.time.ZonedDateTime - -object MetadataSchema { - - val pdsNodes = TableQuery[PdsNodeTable] - val pdsHeartbeatLogs = TableQuery[PdsHeartbeatLogTable] - val pdsFleetConfigs = TableQuery[PdsFleetConfigTable] - val pdsSubmissions = TableQuery[PdsSubmissionTable] - - class PDSRegistrationsTable(tag: Tag) extends Table[PDSRegistration](tag, "pds_registrations") { - def did = column[String]("did", O.PrimaryKey) - - def pdsUrl = column[String]("pds_url") - - def handle = column[String]("handle") - - def lastCommitCid = column[Option[String]]("last_commit_cid") - - def lastCommitSeq = column[Option[Long]]("last_commit_seq") - - def cursor = column[Long]("cursor") - - def createdAt = column[ZonedDateTime]("created_at") - - def updatedAt = column[ZonedDateTime]("updated_at") - - def leasedByInstanceId = column[Option[String]]("leased_by_instance_id") - - def leaseExpiresAt = column[Option[ZonedDateTime]]("lease_expires_at") - - def processingStatus = column[String]("processing_status") - - def * : ProvenShape[PDSRegistration] = ( - did, pdsUrl, handle, lastCommitCid, lastCommitSeq, cursor, createdAt, updatedAt, - leasedByInstanceId, leaseExpiresAt, processingStatus - ) <> ((PDSRegistration.apply _).tupled, PDSRegistration.unapply) - } - - val pdsRegistrations = TableQuery[PDSRegistrationsTable] -} diff --git a/app/models/dal/MyPostgresProfile.scala b/app/models/dal/MyPostgresProfile.scala deleted file mode 100644 index 44036b20..00000000 --- a/app/models/dal/MyPostgresProfile.scala +++ /dev/null @@ -1,383 +0,0 @@ -package models.dal - -import com.github.tminglei.slickpg.* -import models.dal.domain.genomics.MinHashSketch.{bytesToLongArray, longArrayToBytes} -import models.domain.genomics.* -import models.domain.publications.StudySource -import slick.basic.Capability -import slick.jdbc.{JdbcCapabilities, JdbcType} - -import java.time.LocalDateTime -import scala.language.higherKinds - -trait MyPostgresProfile extends ExPostgresProfile - with PgArraySupport - with PgDate2Support - with PgRangeSupport - with PgHStoreSupport - with PgSearchSupport - with PgPostGISSupport - with PgNetSupport - with PgLTreeSupport - with PgPlayJsonSupport // For JSON/JSONB support with Play JSON - with PgEnumSupport // Added PgEnumSupport - with array.PgArrayJdbcTypes { - def pgjson = "jsonb" // jsonb support is in postgres 9.4.0 onward; for 9.3.x use "json" - - import slick.ast.* - import slick.ast.Library.* - import slick.lifted.FunctionSymbolExtensionMethods.* - - override protected def computeCapabilities: Set[Capability] = - super.computeCapabilities + JdbcCapabilities.insertOrUpdate - - override val api: MyAPI.type = MyAPI // MyAPI is an inner object - - object MyAPI extends ExtPostgresAPI with ArrayImplicits - with Date2DateTimeImplicitsDuration - with NetImplicits - with LTreeImplicits - with RangeImplicits - with HStoreImplicits - with PostGISPlainImplicits - with JsonImplicits // Bring in JSON implicits, including jsonType and jsonbListType - with PostGISImplicits - with PostGISAssistants - with SearchImplicits - with SearchAssistants { - - import models.HaplogroupType - import models.domain.genomics.{DataGenerationMethod, HaplogroupResult, TargetType, TestType} - import play.api.libs.json.* - - // Implicit JSON formatters for the new JSONB case classes - implicit val sequenceFileChecksumJsonbFormat: OFormat[SequenceFileChecksumJsonb] = Json.format[SequenceFileChecksumJsonb] - implicit val sequenceFileHttpLocationJsonbFormat: OFormat[SequenceFileHttpLocationJsonb] = Json.format[SequenceFileHttpLocationJsonb] - implicit val sequenceFileAtpLocationJsonbFormat: OFormat[SequenceFileAtpLocationJsonb] = Json.format[SequenceFileAtpLocationJsonb] - - implicit val haplogroupResultJsonTypeMapper: JdbcType[HaplogroupResult] with BaseTypedType[HaplogroupResult] = - MappedJdbcType.base[HaplogroupResult, JsValue](Json.toJson(_), _.as[HaplogroupResult]) - - implicit val haplogroupTypeMapper: BaseColumnType[HaplogroupType] = - MappedColumnType.base[HaplogroupType, String]( - ht => ht.toString, - str => HaplogroupType.fromString(str).getOrElse( - throw new IllegalArgumentException(s"Invalid haplogroup type: $str") - ) - ) - - implicit val studySourceTypeMapper: JdbcType[StudySource] = - MappedColumnType.base[StudySource, String]( - st => st.toString, - s => StudySource.valueOf(s) - ) - - implicit val biosampleTypeMapper: JdbcType[BiosampleType] = - MappedColumnType.base[BiosampleType, String]( // Fixed BiologicalSex[String] to BiosampleType - bt => bt.toString, // converts BiosampleType to String for storage - s => BiosampleType.valueOf(s) // converts String back to BiosampleType - ) - - implicit val biologicalSexTypeMapper: JdbcType[BiologicalSex] = - MappedColumnType.base[BiologicalSex, String]( // Fixed BiologicalSex[String] to BiologicalSex - bs => bs.toString, - s => BiologicalSex.valueOf(s) - ) - - implicit val metricLevelTypeMapper: JdbcType[MetricLevel] = - MappedColumnType.base[MetricLevel, String]( - ml => ml.toString, - s => MetricLevel.valueOf(s) - ) - - // Import for TestType - import models.domain.genomics.TestType - - implicit val testTypeMapper: JdbcType[TestType] = - MappedColumnType.base[TestType, String]( - tt => tt.toString, - s => TestType.fromString(s).getOrElse( - throw new IllegalArgumentException(s"Invalid TestType value: $s") - ) - ) - - implicit val dataGenerationMethodTypeMapper: JdbcType[DataGenerationMethod] = - createEnumJdbcType("data_generation_method", _.toString, s => DataGenerationMethod.fromString(s).getOrElse( - throw new IllegalArgumentException(s"Invalid DataGenerationMethod value: $s") - ), quoteName = false) - - implicit val targetTypeTypeMapper: JdbcType[TargetType] = - createEnumJdbcType("target_type", _.toString, s => TargetType.fromString(s).getOrElse( - throw new IllegalArgumentException(s"Invalid TargetType value: $s") - ), quoteName = false) - - // --- Discovery System Enum Mappers --- - import models.domain.discovery.* - - implicit val biosampleSourceTypeMapper: JdbcType[BiosampleSourceType] = - MappedColumnType.base[BiosampleSourceType, String]( - bst => bst.toString, - s => BiosampleSourceType.fromString(s).getOrElse( - throw new IllegalArgumentException(s"Invalid BiosampleSourceType: $s") - ) - ) - - implicit val privateVariantStatusMapper: JdbcType[PrivateVariantStatus] = - MappedColumnType.base[PrivateVariantStatus, String]( - pvs => pvs.toString, - s => PrivateVariantStatus.fromString(s).getOrElse( - throw new IllegalArgumentException(s"Invalid PrivateVariantStatus: $s") - ) - ) - - implicit val proposedBranchStatusMapper: JdbcType[ProposedBranchStatus] = - MappedColumnType.base[ProposedBranchStatus, String]( - pbs => pbs.toString, - s => ProposedBranchStatus.fromString(s).getOrElse( - throw new IllegalArgumentException(s"Invalid ProposedBranchStatus: $s") - ) - ) - - implicit val curatorActionTypeMapper: JdbcType[CuratorActionType] = - MappedColumnType.base[CuratorActionType, String]( - cat => cat.toString, - s => CuratorActionType.fromString(s).getOrElse( - throw new IllegalArgumentException(s"Invalid CuratorActionType: $s") - ) - ) - - implicit val curatorTargetTypeMapper: JdbcType[CuratorTargetType] = - MappedColumnType.base[CuratorTargetType, String]( - ctt => ctt.toString, - s => CuratorTargetType.fromString(s).getOrElse( - throw new IllegalArgumentException(s"Invalid CuratorTargetType: $s") - ) - ) - - // Custom Slick mapper for Array[Long] <-> bytea - implicit val longArrayTypeMapper: BaseColumnType[Array[Long]] = - MappedColumnType.base[Array[Long], Array[Byte]]( - longArrayToBytes, - bytesToLongArray - ) - - // Array type mappers (from PgArraySupport) - implicit val strListTypeMapper: DriverJdbcType[List[String]] = new SimpleArrayJdbcType[String]("text").to(_.toList) - implicit val intListTypeMapper: DriverJdbcType[List[Int]] = new SimpleArrayJdbcType[Int]("int4").to(_.toList) - - // Play JSON array type mapper - implicit val playJsonArrayTypeMapper: DriverJdbcType[List[JsValue]] = - new AdvancedArrayJdbcType[JsValue]( - MyPostgresProfile.this.pgjson, // Use PgPlayJsonSupport.this.pgjson - s => utils.SimpleArrayUtils.fromString[JsValue](Json.parse(_))(s).orNull, - v => utils.SimpleArrayUtils.mkString[JsValue](_.toString())(v) - ).to(_.toList) - - // JSONB Type Mappers for the new SequenceFile fields - implicit val sequenceFileChecksumsJsonbTypeMapper: JdbcType[List[SequenceFileChecksumJsonb]] with BaseTypedType[List[SequenceFileChecksumJsonb]] = - MappedJdbcType.base[List[SequenceFileChecksumJsonb], JsValue](Json.toJson(_), _.as[List[SequenceFileChecksumJsonb]]) - - implicit val sequenceFileHttpLocationsJsonbTypeMapper: JdbcType[List[SequenceFileHttpLocationJsonb]] with BaseTypedType[List[SequenceFileHttpLocationJsonb]] = - MappedJdbcType.base[List[SequenceFileHttpLocationJsonb], JsValue](Json.toJson(_), _.as[List[SequenceFileHttpLocationJsonb]]) - - implicit val sequenceFileAtpLocationJsonbTypeMapper: JdbcType[Option[SequenceFileAtpLocationJsonb]] with BaseTypedType[Option[SequenceFileAtpLocationJsonb]] = { - // Import JsNull, JsObject locally for clarity - import play.api.libs.json.{JsNull, JsObject} - MappedJdbcType.base[Option[SequenceFileAtpLocationJsonb], JsValue]( - { - case Some(atp) => Json.toJson(atp) - case None => JsNull - }, - { jsValue => - if (jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None - else Some(jsValue.as[SequenceFileAtpLocationJsonb]) - } - ) - } - - // --- Population Breakdown JSONB Type Mappers --- - import models.domain.genomics.{PcaCoordinatesJsonb, SuperPopulationListJsonb} - - implicit val pcaCoordinatesJsonbTypeMapper: JdbcType[Option[PcaCoordinatesJsonb]] with BaseTypedType[Option[PcaCoordinatesJsonb]] = { - import play.api.libs.json.{JsNull, JsObject} - MappedJdbcType.base[Option[PcaCoordinatesJsonb], JsValue]( - { - case Some(pca) => Json.toJson(pca) - case None => JsNull - }, - { jsValue => - if (jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None - else Some(jsValue.as[PcaCoordinatesJsonb]) - } - ) - } - - implicit val superPopulationListJsonbTypeMapper: JdbcType[Option[SuperPopulationListJsonb]] with BaseTypedType[Option[SuperPopulationListJsonb]] = { - import play.api.libs.json.{JsNull, JsObject} - MappedJdbcType.base[Option[SuperPopulationListJsonb], JsValue]( - { - case Some(sp) => Json.toJson(sp) - case None => JsNull - }, - { jsValue => - if (jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None - else Some(jsValue.as[SuperPopulationListJsonb]) - } - ) - } - - // --- Genotype Data JSONB Type Mappers --- - import models.atmosphere.FileInfo - import models.domain.genomics.GenotypeMetrics - - implicit val genotypeMetricsJsonbTypeMapper: JdbcType[GenotypeMetrics] with BaseTypedType[GenotypeMetrics] = - MappedJdbcType.base[GenotypeMetrics, JsValue](Json.toJson(_), _.as[GenotypeMetrics]) - - implicit val fileInfoSeqJsonbTypeMapper: JdbcType[Option[Seq[FileInfo]]] with BaseTypedType[Option[Seq[FileInfo]]] = { - import play.api.libs.json.{JsNull, JsArray} - MappedJdbcType.base[Option[Seq[FileInfo]], JsValue]( - { - case Some(files) => Json.toJson(files) - case None => JsNull - }, - { jsValue => - if (jsValue == JsNull) None - else Some(jsValue.as[Seq[FileInfo]]) - } - ) - } - - - // --- Haplogroup Reconciliation JSONB Type Mappers --- - import models.domain.genomics.{DnaType, CompatibilityLevel, ReconciliationStatus} - import models.atmosphere.{RunHaplogroupCall, SnpConflict, HeteroplasmyObservation, IdentityVerification, ManualOverride, AuditEntry} - - implicit val dnaTypeMapper: JdbcType[DnaType] = - createEnumJdbcType("dna_type", _.toString, s => DnaType.fromString(s).getOrElse( - throw new IllegalArgumentException(s"Invalid DnaType value: $s") - ), quoteName = false) - - implicit val reconciliationStatusJsonbTypeMapper: JdbcType[ReconciliationStatus] with BaseTypedType[ReconciliationStatus] = - MappedJdbcType.base[ReconciliationStatus, JsValue](Json.toJson(_), _.as[ReconciliationStatus]) - - implicit val compatibilityLevelMapper: JdbcType[CompatibilityLevel] = - createEnumJdbcType("compatibility_level", _.toString, s => CompatibilityLevel.fromString(s).getOrElse( - throw new IllegalArgumentException(s"Invalid CompatibilityLevel value: $s") - ), quoteName = false) - - - implicit val warningsSeqJsonbTypeMapper: JdbcType[Option[Seq[String]]] with BaseTypedType[Option[Seq[String]]] = { - import play.api.libs.json.{JsNull, JsArray} - MappedJdbcType.base[Option[Seq[String]], JsValue]( - { - case Some(warnings) => Json.toJson(warnings) - case None => JsNull - }, - { jsValue => - if (jsValue == JsNull) None - else Some(jsValue.as[Seq[String]]) - } - ) - } - - implicit val runHaplogroupCallsJsonbTypeMapper: JdbcType[Seq[RunHaplogroupCall]] with BaseTypedType[Seq[RunHaplogroupCall]] = - MappedJdbcType.base[Seq[RunHaplogroupCall], JsValue](Json.toJson(_), _.as[Seq[RunHaplogroupCall]]) - - implicit val snpConflictsJsonbTypeMapper: JdbcType[Option[Seq[SnpConflict]]] with BaseTypedType[Option[Seq[SnpConflict]]] = { - import play.api.libs.json.JsNull - MappedJdbcType.base[Option[Seq[SnpConflict]], JsValue]( - { - case Some(conflicts) => Json.toJson(conflicts) - case None => JsNull - }, - { jsValue => - if (jsValue == JsNull) None - else Some(jsValue.as[Seq[SnpConflict]]) - } - ) - } - - implicit val heteroplasmyObservationsJsonbTypeMapper: JdbcType[Option[Seq[HeteroplasmyObservation]]] with BaseTypedType[Option[Seq[HeteroplasmyObservation]]] = { - import play.api.libs.json.JsNull - MappedJdbcType.base[Option[Seq[HeteroplasmyObservation]], JsValue]( - { - case Some(obs) => Json.toJson(obs) - case None => JsNull - }, - { jsValue => - if (jsValue == JsNull) None - else Some(jsValue.as[Seq[HeteroplasmyObservation]]) - } - ) - } - - implicit val identityVerificationJsonbTypeMapper: JdbcType[Option[IdentityVerification]] with BaseTypedType[Option[IdentityVerification]] = { - import play.api.libs.json.{JsNull, JsObject} - MappedJdbcType.base[Option[IdentityVerification], JsValue]( - { - case Some(iv) => Json.toJson(iv) - case None => JsNull - }, - { jsValue => - // Handle database NULL (Java null), JSON null, or empty object - if (jsValue == null || jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None - else Some(jsValue.as[IdentityVerification]) - } - ) - } - - implicit val manualOverrideJsonbTypeMapper: JdbcType[Option[ManualOverride]] with BaseTypedType[Option[ManualOverride]] = { - import play.api.libs.json.{JsNull, JsObject} - MappedJdbcType.base[Option[ManualOverride], JsValue]( - { - case Some(mo) => Json.toJson(mo) - case None => JsNull - }, - { jsValue => - // Handle database NULL (Java null), JSON null, or empty object - if (jsValue == null || jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None - else Some(jsValue.as[ManualOverride]) - } - ) - } - - implicit val auditLogJsonbTypeMapper: JdbcType[Option[Seq[AuditEntry]]] with BaseTypedType[Option[Seq[AuditEntry]]] = { - import play.api.libs.json.JsNull - MappedJdbcType.base[Option[Seq[AuditEntry]], JsValue]( - { - case Some(entries) => Json.toJson(entries) - case None => JsNull - }, - { jsValue => - // Handle database NULL (Java null) or JSON null - if (jsValue == null || jsValue == JsNull) None - else Some(jsValue.as[Seq[AuditEntry]]) - } - ) - } - - // --- Haplogroup Provenance JSONB Type Mapper --- - // Maps HaplogroupProvenance directly to JsValue. For nullable columns, use column[Option[HaplogroupProvenance]] - // and Slick will handle NULL automatically. - import models.domain.haplogroups.HaplogroupProvenance - - implicit val haplogroupProvenanceJsonbTypeMapper: JdbcType[HaplogroupProvenance] with BaseTypedType[HaplogroupProvenance] = - MappedJdbcType.base[HaplogroupProvenance, JsValue](Json.toJson(_), _.as[HaplogroupProvenance]) - - // --- Genome Region JSONB Type Mappers --- - import models.domain.genomics.RegionCoordinate - - implicit val regionCoordinatesJsonbTypeMapper: JdbcType[Map[String, RegionCoordinate]] with BaseTypedType[Map[String, RegionCoordinate]] = - MappedJdbcType.base[Map[String, RegionCoordinate], JsValue](Json.toJson(_), _.as[Map[String, RegionCoordinate]]) - - // Declare the name of an aggregate function: - val ArrayAgg = new SqlAggregateFunction("array_agg") - - // Implement the aggregate function as an extension method: - implicit class ArrayAggColumnQueryExtensionMethods[P, C[_]](val q: Query[Rep[P], _, C]) { - def arrayAgg[B](implicit tm: TypedType[List[B]]) = ArrayAgg.column[List[B]](q.toNode) - } - } -} - -object MyPostgresProfile extends MyPostgresProfile \ No newline at end of file diff --git a/app/models/dal/auth/ATProtocolClientMetadataTable.scala b/app/models/dal/auth/ATProtocolClientMetadataTable.scala deleted file mode 100644 index fe6c7f14..00000000 --- a/app/models/dal/auth/ATProtocolClientMetadataTable.scala +++ /dev/null @@ -1,43 +0,0 @@ -package models.dal.auth - -import models.auth.ATProtocolClientMetadata -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.ZonedDateTime -import java.util.UUID - -class ATProtocolClientMetadataTable(tag: Tag) extends Table[ATProtocolClientMetadata](tag, Some("auth"), "atprotocol_client_metadata") { - def id = column[UUID]("id", O.PrimaryKey) - - def clientIdUrl = column[String]("client_id_url", O.Unique) - - def clientName = column[Option[String]]("client_name") - - def clientUri = column[Option[String]]("client_uri") - - def logoUri = column[Option[String]]("logo_uri") - - def tosUri = column[Option[String]]("tos_uri") - - def policyUri = column[Option[String]]("policy_uri") - - def redirectUris = column[Option[String]]("redirect_uris") - - def createdAt = column[ZonedDateTime]("created_at") - - def updatedAt = column[ZonedDateTime]("updated_at") - - def * : ProvenShape[ATProtocolClientMetadata] = ( - id.?, - clientIdUrl, - clientName, - clientUri, - logoUri, - tosUri, - policyUri, - redirectUris, - createdAt, - updatedAt - ).mapTo[ATProtocolClientMetadata] -} \ No newline at end of file diff --git a/app/models/dal/auth/ATProtocoloAuthorizationServersTable.scala b/app/models/dal/auth/ATProtocoloAuthorizationServersTable.scala deleted file mode 100644 index 0f09aae9..00000000 --- a/app/models/dal/auth/ATProtocoloAuthorizationServersTable.scala +++ /dev/null @@ -1,47 +0,0 @@ -package models.dal.auth - -import models.auth.ATProtocolAuthorizationServer -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.ZonedDateTime -import java.util.UUID - -class ATProtocolAuthorizationServersTable(tag: Tag) extends Table[ATProtocolAuthorizationServer](tag, Some("auth"), "atprotocol_authorization_servers") { - def id = column[UUID]("id", O.PrimaryKey) - - def issuerUrl = column[String]("issuer_url", O.Unique) - - def authorizationEndpoint = column[Option[String]]("authorization_endpoint") - - def tokenEndpoint = column[Option[String]]("token_endpoint") - - def pushedAuthorizationRequestEndpoint = column[Option[String]]("pushed_authorization_request_endpoint") - - def dpopSigningAlgValuesSupported = column[Option[String]]("dpop_signing_alg_values_supported") - - def scopesSupported = column[Option[String]]("scopes_supported") - - def clientIdMetadataDocumentSupported = column[Option[Boolean]]("client_id_metadata_document_supported") - - def metadataFetchedAt = column[ZonedDateTime]("metadata_fetched_at") - - def createdAt = column[ZonedDateTime]("created_at") - - def updatedAt = column[ZonedDateTime]("updated_at") - - // Projection for the case class - def * : ProvenShape[ATProtocolAuthorizationServer] = ( - id.?, // Optional for inserts, DB generates UUID - issuerUrl, - authorizationEndpoint, - tokenEndpoint, - pushedAuthorizationRequestEndpoint, - dpopSigningAlgValuesSupported, - scopesSupported, - clientIdMetadataDocumentSupported, - metadataFetchedAt, - createdAt, - updatedAt - ).mapTo[ATProtocolAuthorizationServer] -} \ No newline at end of file diff --git a/app/models/dal/auth/CookieConsentsTable.scala b/app/models/dal/auth/CookieConsentsTable.scala deleted file mode 100644 index 9abc7f58..00000000 --- a/app/models/dal/auth/CookieConsentsTable.scala +++ /dev/null @@ -1,35 +0,0 @@ -package models.dal.auth - -import models.dal.MyPostgresProfile.api.* -import models.domain.auth.CookieConsent -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -/** - * DAL table for auth.cookie_consents - tracks cookie policy acceptance for GDPR compliance. - */ -class CookieConsentsTable(tag: Tag) extends Table[CookieConsent](tag, Some("auth"), "cookie_consents") { - def id = column[UUID]("id", O.PrimaryKey) - def userId = column[Option[UUID]]("user_id") - def sessionId = column[Option[String]]("session_id") - def ipAddressHash = column[Option[String]]("ip_address_hash") - def consentGiven = column[Boolean]("consent_given") - def consentTimestamp = column[LocalDateTime]("consent_timestamp") - def policyVersion = column[String]("policy_version") - def userAgent = column[Option[String]]("user_agent") - def createdAt = column[LocalDateTime]("created_at") - - def * : ProvenShape[CookieConsent] = ( - id.?, - userId, - sessionId, - ipAddressHash, - consentGiven, - consentTimestamp, - policyVersion, - userAgent, - createdAt - ).mapTo[CookieConsent] -} diff --git a/app/models/dal/auth/PermissionsTable.scala b/app/models/dal/auth/PermissionsTable.scala deleted file mode 100644 index 50c1f22b..00000000 --- a/app/models/dal/auth/PermissionsTable.scala +++ /dev/null @@ -1,29 +0,0 @@ -package models.dal.auth - -import models.auth.Permission -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.ZonedDateTime -import java.util.UUID - -class PermissionsTable(tag: Tag) extends Table[Permission](tag, Some("auth"), "permissions") { - def id = column[UUID]("id", O.PrimaryKey) - - def name = column[String]("name", O.Unique) - - def description = column[Option[String]]("description") - - def createdAt = column[ZonedDateTime]("created_at") - - def updatedAt = column[ZonedDateTime]("updated_at") - - - def * : ProvenShape[Permission] = ( - id.?, - name, - description, - createdAt, - updatedAt - ).mapTo[Permission] -} \ No newline at end of file diff --git a/app/models/dal/auth/RolePermission.scala b/app/models/dal/auth/RolePermission.scala deleted file mode 100644 index 7550552e..00000000 --- a/app/models/dal/auth/RolePermission.scala +++ /dev/null @@ -1,26 +0,0 @@ -package models.dal.auth - -import models.auth.{Permission, Role, RolePermission} -import models.dal.MyPostgresProfile.api.* -import models.dal.auth.RolesTable -import slick.lifted.ProvenShape - -import java.util.UUID - - -class RolePermissionsTable(tag: Tag) extends Table[RolePermission](tag, Some("auth"), "role_permissions") { - def roleId = column[UUID]("role_id") - - def permissionId = column[UUID]("permission_id") - - - override def * : ProvenShape[RolePermission] = (roleId, permissionId).mapTo[RolePermission] - - def pk = primaryKey("pk_auth_role_permissions", (roleId, permissionId)) - - - def roleFk = foreignKey("fk_auth_role_permissions_role_id", roleId, TableQuery[RolesTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) - - - def permissionFk = foreignKey("fk_auth_role_permissions_permission_id", permissionId, TableQuery[PermissionsTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) -} \ No newline at end of file diff --git a/app/models/dal/auth/RolesTable.scala b/app/models/dal/auth/RolesTable.scala deleted file mode 100644 index bd3b7a90..00000000 --- a/app/models/dal/auth/RolesTable.scala +++ /dev/null @@ -1,28 +0,0 @@ -package models.dal.auth - -import models.auth.Role -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.ZonedDateTime -import java.util.UUID - -class RolesTable(tag: Tag) extends Table[Role](tag, Some("auth"), "roles") { - def id = column[UUID]("id", O.PrimaryKey) - - def name = column[String]("name", O.Unique) - - def description = column[Option[String]]("description") - - def createdAt = column[ZonedDateTime]("created_at") - - def updatedAt = column[ZonedDateTime]("updated_at") - - def * : ProvenShape[Role] = ( - id.?, - name, - description, - createdAt, - updatedAt - ).mapTo[Role] -} \ No newline at end of file diff --git a/app/models/dal/auth/UserLoginInfoTable.scala b/app/models/dal/auth/UserLoginInfoTable.scala deleted file mode 100644 index 4e3a6258..00000000 --- a/app/models/dal/auth/UserLoginInfoTable.scala +++ /dev/null @@ -1,38 +0,0 @@ -package models.dal.auth - -import models.auth.UserLoginInfo -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.user.UsersTable -import models.domain.user.User -import slick.lifted.ProvenShape - -import java.time.ZonedDateTime -import java.util.UUID - -class UserLoginInfoTable(tag: Tag) extends Table[UserLoginInfo](tag, Some("auth"), "user_login_info") { - def id = column[UUID]("id", O.PrimaryKey) - - def userId = column[UUID]("user_id") - - def providerId = column[String]("provider_id") - - def providerKey = column[String]("provider_key") - - def createdAt = column[ZonedDateTime]("created_at") - - def updatedAt = column[ZonedDateTime]("updated_at") - - def uniqueProvider = index("uq_auth_provider_id_key", (providerId, providerKey), unique = true) - - // Projection for the case class - def * : ProvenShape[UserLoginInfo] = ( - id.?, - userId, - providerId, - providerKey, - createdAt, - updatedAt - ).mapTo[UserLoginInfo] - - def userFk = foreignKey("fk_auth_user_login_info_user_id", userId, TableQuery[UsersTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) -} \ No newline at end of file diff --git a/app/models/dal/auth/UserOath2InfoTable.scala b/app/models/dal/auth/UserOath2InfoTable.scala deleted file mode 100644 index b2c48dbf..00000000 --- a/app/models/dal/auth/UserOath2InfoTable.scala +++ /dev/null @@ -1,46 +0,0 @@ -package models.dal.auth - -import models.auth.{UserLoginInfo, UserOauth2Info} -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.ZonedDateTime -import java.util.UUID - - -class UserOauth2InfoTable(tag: Tag) extends Table[UserOauth2Info](tag, Some("auth"), "user_oauth2_info") { - def id = column[UUID]("id", O.PrimaryKey) - - // Foreign key to auth.user_login_info (id) - def loginInfoId = column[UUID]("login_info_id", O.Unique) - - def accessToken = column[String]("access_token") - - def tokenType = column[Option[String]]("token_type") - - def expiresIn = column[Option[Long]]("expires_in") - - def refreshToken = column[Option[String]]("refresh_token") - - def createdAt = column[ZonedDateTime]("created_at") - - def updatedAt = column[ZonedDateTime]("updated_at") - - def scope = column[Option[String]]("scope") - - // Projection for the case class - def * : ProvenShape[UserOauth2Info] = ( - id.?, - loginInfoId, - accessToken, - tokenType, - expiresIn, - refreshToken, - createdAt, - updatedAt, - scope - ).mapTo[UserOauth2Info] - - - def loginInfoFk = foreignKey("fk_auth_user_oauth2_info_login_info_id", loginInfoId, TableQuery[UserLoginInfoTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) -} \ No newline at end of file diff --git a/app/models/dal/auth/UserPdsInfoTable.scala b/app/models/dal/auth/UserPdsInfoTable.scala deleted file mode 100644 index fcbe58e5..00000000 --- a/app/models/dal/auth/UserPdsInfoTable.scala +++ /dev/null @@ -1,41 +0,0 @@ -package models.dal.auth - -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.user.UsersTable -import models.domain.user.UserPdsInfo -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -/** - * DAL table for auth.user_pds_info - stores the home PDS URL for each user's AT Protocol identity. - */ -class UserPdsInfoTable(tag: Tag) extends Table[UserPdsInfo](tag, Some("auth"), "user_pds_info") { - def id = column[UUID]("id", O.PrimaryKey) - - def userId = column[UUID]("user_id", O.Unique) - - def pdsUrl = column[String]("pds_url") - - def did = column[String]("did", O.Unique) - - def handle = column[Option[String]]("handle") - - def createdAt = column[LocalDateTime]("created_at") - - def updatedAt = column[LocalDateTime]("updated_at") - - def * : ProvenShape[UserPdsInfo] = ( - id.?, - userId, - pdsUrl, - did, - handle, - createdAt, - updatedAt - ).mapTo[UserPdsInfo] - - // Foreign key definition to users.id - def userFk = foreignKey("fk_auth_user_pds_info_user_id", userId, TableQuery[UsersTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) -} diff --git a/app/models/dal/auth/UserRolesTable.scala b/app/models/dal/auth/UserRolesTable.scala deleted file mode 100644 index 9dbe1ff7..00000000 --- a/app/models/dal/auth/UserRolesTable.scala +++ /dev/null @@ -1,25 +0,0 @@ -package models.dal.auth - -import models.auth.{Role, UserRole} -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.user.UsersTable -import models.domain.user.User -import slick.lifted.ProvenShape - -import java.util.UUID - - -class UserRolesTable(tag: Tag) extends Table[UserRole](tag, Some("auth"), "user_roles") { - def userId = column[UUID]("user_id") - - def roleId = column[UUID]("role_id") - - - override def * : ProvenShape[UserRole] = (userId, roleId).mapTo[UserRole] - - def pk = primaryKey("pk_auth_user_roles", (userId, roleId)) - - def userFk = foreignKey("fk_auth_user_roles_user_id", userId, TableQuery[UsersTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) - - def roleFk = foreignKey("fk_auth_user_roles_role_id", roleId, TableQuery[RolesTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) -} \ No newline at end of file diff --git a/app/models/dal/curator/AuditLogTable.scala b/app/models/dal/curator/AuditLogTable.scala deleted file mode 100644 index 5eb0ce0c..00000000 --- a/app/models/dal/curator/AuditLogTable.scala +++ /dev/null @@ -1,37 +0,0 @@ -package models.dal.curator - -import models.dal.MyPostgresProfile.api.* -import models.domain.curator.AuditLogEntry -import play.api.libs.json.JsValue -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -/** - * DAL table for curator.audit_log - */ -class AuditLogTable(tag: Tag) extends Table[AuditLogEntry](tag, Some("curator"), "audit_log") { - - def id = column[UUID]("id", O.PrimaryKey) - def userId = column[UUID]("user_id") - def entityType = column[String]("entity_type") - def entityId = column[Int]("entity_id") - def action = column[String]("action") - def oldValue = column[Option[JsValue]]("old_value") - def newValue = column[Option[JsValue]]("new_value") - def comment = column[Option[String]]("comment") - def createdAt = column[LocalDateTime]("created_at") - - def * : ProvenShape[AuditLogEntry] = ( - id.?, - userId, - entityType, - entityId, - action, - oldValue, - newValue, - comment, - createdAt - ).mapTo[AuditLogEntry] -} diff --git a/app/models/dal/domain/GroupProjectMemberTable.scala b/app/models/dal/domain/GroupProjectMemberTable.scala deleted file mode 100644 index b095e7f7..00000000 --- a/app/models/dal/domain/GroupProjectMemberTable.scala +++ /dev/null @@ -1,58 +0,0 @@ -package models.dal.domain - -import models.dal.MyPostgresProfile.api.* -import models.domain.{GroupProjectMember, MemberVisibility} -import play.api.libs.json.{JsValue, Json} -import slick.lifted.{ProvenShape, Tag} - -import java.time.LocalDateTime - -class GroupProjectMemberTable(tag: Tag) extends Table[GroupProjectMember](tag, "group_project_member") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def groupProjectId = column[Int]("group_project_id") - def citizenDid = column[String]("citizen_did") - def biosampleAtUri = column[Option[String]]("biosample_at_uri") - def role = column[String]("role") - def status = column[String]("status") - def displayName = column[Option[String]]("display_name") - def kitId = column[Option[String]]("kit_id") - def visibility = column[JsValue]("visibility") - def subgroupIds = column[List[String]]("subgroup_ids") - def contributionLevel = column[Option[String]]("contribution_level") - def joinedAt = column[Option[LocalDateTime]]("joined_at") - def atUri = column[Option[String]]("at_uri") - def atCid = column[Option[String]]("at_cid") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - private def visibilityToJson(v: MemberVisibility): JsValue = Json.toJson(v) - private def jsonToVisibility(j: JsValue): MemberVisibility = j.asOpt[MemberVisibility].getOrElse(MemberVisibility()) - - override def * : ProvenShape[GroupProjectMember] = ( - id.?, - groupProjectId, - citizenDid, - biosampleAtUri, - role, - status, - displayName, - kitId, - visibility, - subgroupIds, - contributionLevel, - joinedAt, - atUri, - atCid, - createdAt, - updatedAt - ).shaped.<>( - { case (id, gpId, did, bio, role, status, dn, kit, vis, subs, cl, ja, aUri, aCid, ca, ua) => - GroupProjectMember(id, gpId, did, bio, role, status, dn, kit, jsonToVisibility(vis), subs, cl, ja, aUri, aCid, ca, ua) - }, - { (m: GroupProjectMember) => - Some((m.id, m.groupProjectId, m.citizenDid, m.biosampleAtUri, m.role, m.status, m.displayName, - m.kitId, visibilityToJson(m.visibility), m.subgroupIds, m.contributionLevel, m.joinedAt, - m.atUri, m.atCid, m.createdAt, m.updatedAt)) - } - ) -} diff --git a/app/models/dal/domain/GroupProjectTable.scala b/app/models/dal/domain/GroupProjectTable.scala deleted file mode 100644 index d05dd1c0..00000000 --- a/app/models/dal/domain/GroupProjectTable.scala +++ /dev/null @@ -1,56 +0,0 @@ -package models.dal.domain - -import models.dal.MyPostgresProfile.api.* -import models.domain.GroupProject -import slick.lifted.{ProvenShape, Tag} - -import java.time.LocalDateTime -import java.util.UUID - -class GroupProjectTable(tag: Tag) extends Table[GroupProject](tag, "group_project") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def projectGuid = column[UUID]("project_guid", O.Unique) - def projectName = column[String]("project_name") - def projectType = column[String]("project_type") - def targetHaplogroup = column[Option[String]]("target_haplogroup") - def targetLineage = column[Option[String]]("target_lineage") - def description = column[Option[String]]("description") - def backgroundInfo = column[Option[String]]("background_info") - def joinPolicy = column[String]("join_policy") - def haplogroupRequirement = column[Option[String]]("haplogroup_requirement") - def memberListVisibility = column[String]("member_list_visibility") - def strPolicy = column[String]("str_policy") - def snpPolicy = column[String]("snp_policy") - def publicTreeView = column[Boolean]("public_tree_view") - def successionPolicy = column[Option[String]]("succession_policy") - def ownerDid = column[String]("owner_did") - def atUri = column[Option[String]]("at_uri") - def atCid = column[Option[String]]("at_cid") - def deleted = column[Boolean]("deleted") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - override def * : ProvenShape[GroupProject] = ( - id.?, - projectGuid, - projectName, - projectType, - targetHaplogroup, - targetLineage, - description, - backgroundInfo, - joinPolicy, - haplogroupRequirement, - memberListVisibility, - strPolicy, - snpPolicy, - publicTreeView, - successionPolicy, - ownerDid, - atUri, - atCid, - deleted, - createdAt, - updatedAt - ).mapTo[GroupProject] -} diff --git a/app/models/dal/domain/ProjectTable.scala b/app/models/dal/domain/ProjectTable.scala deleted file mode 100644 index 36070a4b..00000000 --- a/app/models/dal/domain/ProjectTable.scala +++ /dev/null @@ -1,42 +0,0 @@ -package models.dal.domain - -import models.domain.Project -import models.dal.MyPostgresProfile.api.* - -import java.time.LocalDateTime -import java.util.UUID - -class ProjectTable(tag: Tag) extends Table[Project](tag, "project") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def projectGuid = column[UUID]("project_guid", O.Unique) - - def name = column[String]("name") - - def description = column[Option[String]]("description") - - def ownerDid = column[String]("owner_did") - - def createdAt = column[LocalDateTime]("created_at") - - def updatedAt = column[LocalDateTime]("updated_at") - - def deleted = column[Boolean]("deleted", O.Default(false)) - - def atUri = column[Option[String]]("at_uri") - - def atCid = column[Option[String]]("at_cid") - - def * = ( - id.?, - projectGuid, - name, - description, - ownerDid, - createdAt, - updatedAt, - deleted, - atUri, - atCid - ).mapTo[Project] -} diff --git a/app/models/dal/domain/billing/PatronSubscriptionTable.scala b/app/models/dal/domain/billing/PatronSubscriptionTable.scala deleted file mode 100644 index e6351192..00000000 --- a/app/models/dal/domain/billing/PatronSubscriptionTable.scala +++ /dev/null @@ -1,44 +0,0 @@ -package models.dal.domain.billing - -import models.dal.MyPostgresProfile.api.* -import models.domain.billing.PatronSubscription -import slick.lifted.{ProvenShape, Tag} - -import java.time.LocalDateTime -import java.util.UUID - -class PatronSubscriptionTable(tag: Tag) extends Table[PatronSubscription](tag, Some("billing"), "patron_subscription") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def userId = column[UUID]("user_id") - def patronTier = column[String]("patron_tier") - def status = column[String]("status") - def paymentProvider = column[String]("payment_provider") - def providerSubscriptionId = column[Option[String]]("provider_subscription_id") - def providerCustomerId = column[Option[String]]("provider_customer_id") - def amountCents = column[Int]("amount_cents") - def currency = column[String]("currency") - def billingInterval = column[String]("billing_interval") - def currentPeriodStart = column[Option[LocalDateTime]]("current_period_start") - def currentPeriodEnd = column[Option[LocalDateTime]]("current_period_end") - def cancelledAt = column[Option[LocalDateTime]]("cancelled_at") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - override def * : ProvenShape[PatronSubscription] = ( - id.?, - userId, - patronTier, - status, - paymentProvider, - providerSubscriptionId, - providerCustomerId, - amountCents, - currency, - billingInterval, - currentPeriodStart, - currentPeriodEnd, - cancelledAt, - createdAt, - updatedAt - ).mapTo[PatronSubscription] -} diff --git a/app/models/dal/domain/discovery/DiscoveryTables.scala b/app/models/dal/domain/discovery/DiscoveryTables.scala deleted file mode 100644 index e9e4bdfd..00000000 --- a/app/models/dal/domain/discovery/DiscoveryTables.scala +++ /dev/null @@ -1,139 +0,0 @@ -package models.dal.domain.discovery - -import models.HaplogroupType -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.haplogroups.HaplogroupsTable -import models.domain.discovery.* -import play.api.libs.json.JsValue -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -class BiosamplePrivateVariantTable(tag: Tag) extends Table[BiosamplePrivateVariant](tag, Some("tree"), "biosample_private_variant") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def sampleType = column[BiosampleSourceType]("sample_type") - def sampleId = column[Int]("sample_id") - def sampleGuid = column[UUID]("sample_guid") - def variantId = column[Int]("variant_id") - def haplogroupType = column[HaplogroupType]("haplogroup_type") - def terminalHaplogroupId = column[Int]("terminal_haplogroup_id") - def discoveredAt = column[LocalDateTime]("discovered_at") - def status = column[PrivateVariantStatus]("status") - - def * : ProvenShape[BiosamplePrivateVariant] = ( - id.?, sampleType, sampleId, sampleGuid, variantId, - haplogroupType, terminalHaplogroupId, discoveredAt, status - ).mapTo[BiosamplePrivateVariant] - - def terminalHaplogroupFK = foreignKey("bpv_terminal_hg_fk", terminalHaplogroupId, TableQuery[HaplogroupsTable])( - _.haplogroupId, onDelete = ForeignKeyAction.Restrict - ) - - def uniqueSampleVariant = index("idx_bpv_unique", (sampleType, sampleId, variantId, haplogroupType), unique = true) -} - -class ProposedBranchTable(tag: Tag) extends Table[ProposedBranch](tag, Some("tree"), "proposed_branch") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def parentHaplogroupId = column[Int]("parent_haplogroup_id") - def proposedName = column[Option[String]]("proposed_name") - def haplogroupType = column[HaplogroupType]("haplogroup_type") - def status = column[ProposedBranchStatus]("status") - def consensusCount = column[Int]("consensus_count") - def confidenceScore = column[Double]("confidence_score") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - def reviewedAt = column[Option[LocalDateTime]]("reviewed_at") - def reviewedBy = column[Option[String]]("reviewed_by") - def notes = column[Option[String]]("notes") - def promotedHaplogroupId = column[Option[Int]]("promoted_haplogroup_id") - - def * : ProvenShape[ProposedBranch] = ( - id.?, parentHaplogroupId, proposedName, haplogroupType, status, - consensusCount, confidenceScore, createdAt, updatedAt, - reviewedAt, reviewedBy, notes, promotedHaplogroupId - ).mapTo[ProposedBranch] - - def parentHaplogroupFK = foreignKey("pb_parent_hg_fk", parentHaplogroupId, TableQuery[HaplogroupsTable])( - _.haplogroupId, onDelete = ForeignKeyAction.Restrict - ) - def promotedHaplogroupFK = foreignKey("pb_promoted_hg_fk", promotedHaplogroupId, TableQuery[HaplogroupsTable])( - _.haplogroupId.?, onDelete = ForeignKeyAction.SetNull - ) -} - -class ProposedBranchVariantTable(tag: Tag) extends Table[ProposedBranchVariant](tag, Some("tree"), "proposed_branch_variant") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def proposedBranchId = column[Int]("proposed_branch_id") - def variantId = column[Int]("variant_id") - def isDefining = column[Boolean]("is_defining") - def evidenceCount = column[Int]("evidence_count") - def firstObservedAt = column[LocalDateTime]("first_observed_at") - def lastObservedAt = column[LocalDateTime]("last_observed_at") - - def * : ProvenShape[ProposedBranchVariant] = ( - id.?, proposedBranchId, variantId, isDefining, evidenceCount, - firstObservedAt, lastObservedAt - ).mapTo[ProposedBranchVariant] - - def proposedBranchFK = foreignKey("pbv_branch_fk", proposedBranchId, TableQuery[ProposedBranchTable])( - _.id, onDelete = ForeignKeyAction.Cascade - ) - - def uniqueBranchVariant = index("idx_pbv_unique", (proposedBranchId, variantId), unique = true) -} - -class ProposedBranchEvidenceTable(tag: Tag) extends Table[ProposedBranchEvidence](tag, Some("tree"), "proposed_branch_evidence") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def proposedBranchId = column[Int]("proposed_branch_id") - def sampleType = column[BiosampleSourceType]("sample_type") - def sampleId = column[Int]("sample_id") - def sampleGuid = column[UUID]("sample_guid") - def addedAt = column[LocalDateTime]("added_at") - def variantMatchCount = column[Int]("variant_match_count") - def variantMismatchCount = column[Int]("variant_mismatch_count") - - def * : ProvenShape[ProposedBranchEvidence] = ( - id.?, proposedBranchId, sampleType, sampleId, sampleGuid, - addedAt, variantMatchCount, variantMismatchCount - ).mapTo[ProposedBranchEvidence] - - def proposedBranchFK = foreignKey("pbe_branch_fk", proposedBranchId, TableQuery[ProposedBranchTable])( - _.id, onDelete = ForeignKeyAction.Cascade - ) - - def uniqueBranchSample = index("idx_pbe_unique", (proposedBranchId, sampleType, sampleId), unique = true) -} - -class CuratorActionTable(tag: Tag) extends Table[CuratorAction](tag, Some("tree"), "curator_action") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def curatorId = column[String]("curator_id") - def actionType = column[CuratorActionType]("action_type") - def targetType = column[CuratorTargetType]("target_type") - def targetId = column[Int]("target_id") - def previousState = column[Option[JsValue]]("previous_state") - def newState = column[Option[JsValue]]("new_state") - def reason = column[Option[String]]("reason") - def createdAt = column[LocalDateTime]("created_at") - - def * : ProvenShape[CuratorAction] = ( - id.?, curatorId, actionType, targetType, targetId, - previousState, newState, reason, createdAt - ).mapTo[CuratorAction] -} - -class DiscoveryConfigTable(tag: Tag) extends Table[DiscoveryConfig](tag, Some("tree"), "discovery_config") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def haplogroupType = column[HaplogroupType]("haplogroup_type") - def configKey = column[String]("config_key") - def configValue = column[String]("config_value") - def description = column[Option[String]]("description") - def updatedAt = column[LocalDateTime]("updated_at") - def updatedBy = column[Option[String]]("updated_by") - - def * : ProvenShape[DiscoveryConfig] = ( - id.?, haplogroupType, configKey, configValue, description, updatedAt, updatedBy - ).mapTo[DiscoveryConfig] - - def uniqueTypeKey = index("idx_dc_unique", (haplogroupType, configKey), unique = true) -} diff --git a/app/models/dal/domain/genomics/AlignmentMetadataTable.scala b/app/models/dal/domain/genomics/AlignmentMetadataTable.scala deleted file mode 100644 index dbe0591f..00000000 --- a/app/models/dal/domain/genomics/AlignmentMetadataTable.scala +++ /dev/null @@ -1,102 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{AlignmentMetadata, MetricLevel} -import play.api.libs.json.JsValue - -import java.time.LocalDateTime - -/** - * Slick table definition for alignment_metadata table. - */ -class AlignmentMetadataTable(tag: Tag) extends Table[AlignmentMetadata](tag, Some("public"), "alignment_metadata") { - - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def sequenceFileId = column[Long]("sequence_file_id") - - def genbankContigId = column[Int]("genbank_contig_id") - - def metricLevel = column[MetricLevel]("metric_level") - - def regionName = column[Option[String]]("region_name") - - def regionStartPos = column[Option[Long]]("region_start_pos") - - def regionEndPos = column[Option[Long]]("region_end_pos") - - def regionLengthBp = column[Option[Long]]("region_length_bp") - - def referenceBuild = column[Option[String]]("reference_build") - - def variantCaller = column[Option[String]]("variant_caller") - - def genomeTerritory = column[Option[Long]]("genome_territory") - - def meanCoverage = column[Option[Double]]("mean_coverage") - - def medianCoverage = column[Option[Double]]("median_coverage") - - def sdCoverage = column[Option[Double]]("sd_coverage") - - def pctExcDupe = column[Option[Double]]("pct_exc_dupe") - - def pctExcMapq = column[Option[Double]]("pct_exc_mapq") - - def pct10x = column[Option[Double]]("pct_10x") - - def pct20x = column[Option[Double]]("pct_20x") - - def pct30x = column[Option[Double]]("pct_30x") - - def hetSnpSensitivity = column[Option[Double]]("het_snp_sensitivity") - - def metricsDate = column[LocalDateTime]("metrics_date") - - def analysisTool = column[String]("analysis_tool") - - def analysisToolVersion = column[Option[String]]("analysis_tool_version") - - def notes = column[Option[String]]("notes") - - def metadata = column[Option[JsValue]]("metadata") - - def coverage = column[Option[JsValue]]("coverage") - - def * = ( - (id.?, sequenceFileId, genbankContigId, metricLevel), - (regionName, regionStartPos, regionEndPos, regionLengthBp), - (referenceBuild, variantCaller, genomeTerritory, meanCoverage, medianCoverage, sdCoverage, pctExcDupe, pctExcMapq, pct10x, pct20x, pct30x, hetSnpSensitivity), - (metricsDate, analysisTool, analysisToolVersion, notes, metadata, coverage) - ).shaped <> ( { - case ((id, seqId, contigId, lvl), (rName, rStart, rEnd, rLen), (refBuild, vCaller, gTerr, meanCov, medCov, sdCov, pDupe, pMapq, p10, p20, p30, hetSens), (mDate, tool, toolVer, notes, meta, cov)) => - AlignmentMetadata( - id, seqId, contigId, lvl, - rName, rStart, rEnd, rLen, - refBuild, vCaller, gTerr, meanCov, medCov, sdCov, pDupe, pMapq, p10, p20, p30, hetSens, - mDate, tool, toolVer, notes, meta, cov - ) - }, { (m: AlignmentMetadata) => - Some(( - (m.id, m.sequenceFileId, m.genbankContigId, m.metricLevel), - (m.regionName, m.regionStartPos, m.regionEndPos, m.regionLengthBp), - (m.referenceBuild, m.variantCaller, m.genomeTerritory, m.meanCoverage, m.medianCoverage, m.sdCoverage, m.pctExcDupe, m.pctExcMapq, m.pct10x, m.pct20x, m.pct30x, m.hetSnpSensitivity), - (m.metricsDate, m.analysisTool, m.analysisToolVersion, m.notes, m.metadata, m.coverage) - )) - } - ) - - // Foreign key constraints - def sequenceFileFk = foreignKey("alignment_metadata_sequence_file_fk", sequenceFileId, - TableQuery[SequenceFilesTable])(_.id.asInstanceOf[Rep[Long]], onDelete = ForeignKeyAction.Cascade) - - def genbankContigFk = foreignKey("alignment_metadata_genbank_contig_fk", genbankContigId, - TableQuery[GenbankContigsTable])(_.genbankContigId, onDelete = ForeignKeyAction.Cascade) - - // Indices - def idxSequenceFile = index("idx_alignment_metadata_sequence_file", sequenceFileId) - - def idxGenbankContig = index("idx_alignment_metadata_genbank_contig", genbankContigId) - - def idxMetricLevel = index("idx_alignment_metadata_metric_level", metricLevel) -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/AnalysisMethodTable.scala b/app/models/dal/domain/genomics/AnalysisMethodTable.scala deleted file mode 100644 index 2b958e24..00000000 --- a/app/models/dal/domain/genomics/AnalysisMethodTable.scala +++ /dev/null @@ -1,28 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.AnalysisMethod - -/** - * Represents the mapping for the "analysis_method" table in the database. - * - * This table is used to store various analysis methods that can be utilized - * in genetic or ancestry-related analyses. - * - * @constructor Creates a new `AnalysisMethodTable` instance. - * @param tag A Slick `Tag` object used to associate the table schema. - * - * Columns: - * - `id`: The primary key of the table, representing the unique identifier for an analysis method. - * - `methodName`: A string column that stores the name of the analysis method. This column must be unique. - * - * Mapping: - * The table schema maps to the `AnalysisMethod` case class. - */ -class AnalysisMethodTable(tag: Tag) extends Table[AnalysisMethod](tag, "analysis_method") { - def id = column[Int]("analysis_method_id", O.PrimaryKey, O.AutoInc) - - def methodName = column[String]("method_name", O.Unique) - - def * = (id.?, methodName).mapTo[AnalysisMethod] -} diff --git a/app/models/dal/domain/genomics/AncestryAnalysisTable.scala b/app/models/dal/domain/genomics/AncestryAnalysisTable.scala deleted file mode 100644 index 6bfbd785..00000000 --- a/app/models/dal/domain/genomics/AncestryAnalysisTable.scala +++ /dev/null @@ -1,30 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.AncestryAnalysis - -import java.util.UUID - -/** - * Represents the ancestry analysis table in the database. - * - * This table stores information about ancestry analyses performed on various samples, including the - * associated analysis method, population information, and probability outcomes. Each record corresponds - * to a single ancestry analysis with links to relevant samples and populations. - * - * @constructor Creates an instance of the AncestryAnalysisTable. - * @param tag Represents the table's context in Slick's query mechanism. - */ -class AncestryAnalysisTable(tag: Tag) extends Table[AncestryAnalysis](tag, "ancestry_analysis") { - def id = column[Int]("ancestry_analysis_id", O.PrimaryKey, O.AutoInc) - - def sampleGuid = column[UUID]("sample_guid") - - def analyticMethodId = column[Int]("analysis_method_id") - - def populationId = column[Int]("population_id") - - def probability = column[Double]("probability") - - def * = (id.?, sampleGuid, analyticMethodId, populationId, probability).mapTo[AncestryAnalysis] -} diff --git a/app/models/dal/domain/genomics/AssemblyMetadataTable.scala b/app/models/dal/domain/genomics/AssemblyMetadataTable.scala deleted file mode 100644 index ed8d7e46..00000000 --- a/app/models/dal/domain/genomics/AssemblyMetadataTable.scala +++ /dev/null @@ -1,33 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.AssemblyMetadata -import play.api.libs.json.JsValue - -import java.time.LocalDate - -class AssemblyMetadataTable(tag: Tag) extends Table[AssemblyMetadata](tag, "assembly_metadata") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def assemblyName = column[String]("assembly_name", O.Unique) - - def accession = column[Option[String]]("accession") - - def releaseDate = column[Option[LocalDate]]("release_date") - - def sourceOrganism = column[Option[String]]("source_organism") - - def assemblyLevel = column[Option[String]]("assembly_level") - - def metadata = column[Option[JsValue]]("metadata") - - def * = ( - id.?, - assemblyName, - accession, - releaseDate, - sourceOrganism, - assemblyLevel, - metadata - ).mapTo[AssemblyMetadata] -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/BiosampleCallableLociTable.scala b/app/models/dal/domain/genomics/BiosampleCallableLociTable.scala deleted file mode 100644 index bd2581b1..00000000 --- a/app/models/dal/domain/genomics/BiosampleCallableLociTable.scala +++ /dev/null @@ -1,29 +0,0 @@ -package models.dal.domain.genomics - -import models.domain.genomics.BiosampleCallableLoci -import models.dal.MyPostgresProfile.api.* - -import java.time.LocalDateTime -import java.util.UUID - -class BiosampleCallableLociTable(tag: Tag) extends Table[BiosampleCallableLoci](tag, Some("genomics"), "biosample_callable_loci") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def sampleType = column[String]("sample_type") - def sampleId = column[Int]("sample_id") - def sampleGuid = column[Option[UUID]]("sample_guid") - def chromosome = column[String]("chromosome") - def totalCallableBp = column[Long]("total_callable_bp") - def regionCount = column[Option[Int]]("region_count") - def bedFileHash = column[Option[String]]("bed_file_hash") - def computedAt = column[LocalDateTime]("computed_at") - def sourceTestTypeId = column[Option[Int]]("source_test_type_id") - def yXdegenCallableBp = column[Option[Long]]("y_xdegen_callable_bp") - def yAmpliconicCallableBp = column[Option[Long]]("y_ampliconic_callable_bp") - def yPalindromicCallableBp = column[Option[Long]]("y_palindromic_callable_bp") - - def * = ( - id.?, sampleType, sampleId, sampleGuid, chromosome, totalCallableBp, - regionCount, bedFileHash, computedAt, sourceTestTypeId, - yXdegenCallableBp, yAmpliconicCallableBp, yPalindromicCallableBp - ).mapTo[BiosampleCallableLoci] -} diff --git a/app/models/dal/domain/genomics/BiosampleHaplogroupsTable.scala b/app/models/dal/domain/genomics/BiosampleHaplogroupsTable.scala deleted file mode 100644 index 79c8f8a7..00000000 --- a/app/models/dal/domain/genomics/BiosampleHaplogroupsTable.scala +++ /dev/null @@ -1,29 +0,0 @@ -package models.dal.domain.genomics - -import models.domain.genomics.BiosampleHaplogroup -import models.dal.MyPostgresProfile.api.* - -import java.util.UUID - -/** - * Represents the mapping of biosamples to their respective haplogroups within the database. - * - * @constructor Creates a new instance of the `BiosampleHaplogroupsTable` mapping. - * @param tag A Slick tag identifying this table mapping for queries. - * - * The table `biosample_haplogroup` contains the following columns: - * - `sample_guid`: The primary key, representing the UUID that uniquely identifies a biosample. - * - `y_haplogroup_id`: An optional column that holds the identifier for the Y-haplogroup associated with the sample. - * - `mt_haplogroup_id`: An optional column that holds the identifier for the mitochondrial (MT) haplogroup associated with the sample. - * - * The mapping between columns and fields in the `BiosampleHaplogroup` case class is defined here. - */ -class BiosampleHaplogroupsTable(tag: Tag) extends Table[BiosampleHaplogroup](tag, "biosample_haplogroup") { - def sampleGuid = column[UUID]("sample_guid", O.PrimaryKey) - - def yHaplogroupId = column[Option[Int]]("y_haplogroup_id") - - def mtHaplogroupId = column[Option[Int]]("mt_haplogroup_id") - - def * = (sampleGuid, yHaplogroupId, mtHaplogroupId).mapTo[BiosampleHaplogroup] -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/BiosampleVariantCall.scala b/app/models/dal/domain/genomics/BiosampleVariantCall.scala deleted file mode 100644 index 643886b0..00000000 --- a/app/models/dal/domain/genomics/BiosampleVariantCall.scala +++ /dev/null @@ -1,97 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import play.api.libs.json.{Json, OFormat} - -import java.time.Instant - -/** - * Represents an observed variant call from a biosample. - * - * This is the input data for ASR - the actual observed states - * from sequenced samples. - * - * @param id Auto-generated primary key - * @param biosampleId FK to the biosample - * @param variantId FK to the variant - * @param observedState The observed state (allele, repeat count, "present"/"absent") - * @param qualityScore Phred-scale quality score - * @param readDepth Number of reads supporting the call - * @param confidence Confidence level: "high", "medium", "low" - * @param source Data source: "ftdna", "yfull", "user_upload", etc. - * @param createdAt When the call was recorded - */ -case class BiosampleVariantCall( - id: Option[Int] = None, - biosampleId: Int, - variantId: Int, - observedState: String, - qualityScore: Option[Int] = None, - readDepth: Option[Int] = None, - confidence: Option[String] = None, - source: Option[String] = None, - createdAt: Instant = Instant.now() -) - -object BiosampleVariantCall { - implicit val format: OFormat[BiosampleVariantCall] = Json.format[BiosampleVariantCall] - - object Confidence { - val HIGH = "high" - val MEDIUM = "medium" - val LOW = "low" - } -} - -/** - * Slick table definition for biosample_variant_call. - */ -class BiosampleVariantCallTable(tag: Tag) - extends Table[BiosampleVariantCall](tag, Some("public"), "biosample_variant_call") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def biosampleId = column[Int]("biosample_id") - - def variantId = column[Int]("variant_id") - - def observedState = column[String]("observed_state") - - def qualityScore = column[Option[Int]]("quality_score") - - def readDepth = column[Option[Int]]("read_depth") - - def confidence = column[Option[String]]("confidence") - - def source = column[Option[String]]("source") - - def createdAt = column[Instant]("created_at") - - def * = ( - id.?, - biosampleId, - variantId, - observedState, - qualityScore, - readDepth, - confidence, - source, - createdAt - ).mapTo[BiosampleVariantCall] - - // Note: biosample FK references public.biosample table - // We don't define the FK here to avoid circular dependencies - // The DB-level FK constraint handles referential integrity - - def variantFK = foreignKey( - "biosample_variant_call_variant_fk", - variantId, - TableQuery[VariantV2Table] - )(_.variantId, onDelete = ForeignKeyAction.Cascade) - - def uniqueBiosampleVariant = index( - "idx_biosample_variant_call_unique", - (biosampleId, variantId), - unique = true - ) -} diff --git a/app/models/dal/domain/genomics/BiosamplesTable.scala b/app/models/dal/domain/genomics/BiosamplesTable.scala deleted file mode 100644 index 573b003b..00000000 --- a/app/models/dal/domain/genomics/BiosamplesTable.scala +++ /dev/null @@ -1,48 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.Biosample -import play.api.libs.json.JsValue - -import java.util.UUID - -/** - * Defines the Slick table mapping for the `Biosample` entity. This table represents - * biological samples with their core identifiers and metadata. - * - * @param tag The Slick table tag used for binding the table to the database schema. - */ -class BiosamplesTable(tag: Tag) extends Table[Biosample](tag, "biosample") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def sampleAccession = column[String]("sample_accession", O.Unique) - - def description = column[String]("description") - - def alias = column[Option[String]]("alias") - - def centerName = column[String]("center_name") - - def specimenDonorId = column[Option[Int]]("specimen_donor_id") - - def sampleGuid = column[UUID]("sample_guid") - - def locked = column[Boolean]("locked", O.Default(false)) - - def sourcePlatform = column[Option[String]]("source_platform") - - def originalHaplogroups = column[Option[JsValue]]("original_haplogroups") - - def * = ( - id.?, - sampleGuid, - sampleAccession, - description, - alias, - centerName, - specimenDonorId, - locked, - sourcePlatform, - originalHaplogroups - ).mapTo[Biosample] -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/BranchMutation.scala b/app/models/dal/domain/genomics/BranchMutation.scala deleted file mode 100644 index ad345a4f..00000000 --- a/app/models/dal/domain/genomics/BranchMutation.scala +++ /dev/null @@ -1,93 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.haplogroups.HaplogroupsTable -import play.api.libs.json.{Json, OFormat} - -/** - * Represents a state transition along a tree branch. - * - * Records where mutations occurred in the phylogenetic tree, - * tracking the change from parent to child haplogroup state. - * - * @param id Auto-generated primary key - * @param variantId FK to the variant that changed - * @param parentHaplogroupId FK to parent haplogroup node - * @param childHaplogroupId FK to child haplogroup node - * @param fromState State at parent node (e.g., "G", "15") - * @param toState State at child node (e.g., "A", "16") - * @param stepDirection For STRs: +1 = expansion, -1 = contraction; NULL for SNPs - * @param confidence Confidence from ASR algorithm - */ -case class BranchMutation( - id: Option[Int] = None, - variantId: Int, - parentHaplogroupId: Int, - childHaplogroupId: Int, - fromState: String, - toState: String, - stepDirection: Option[Int] = None, - confidence: Option[BigDecimal] = None -) - -object BranchMutation { - implicit val format: OFormat[BranchMutation] = Json.format[BranchMutation] -} - -/** - * Slick table definition for branch_mutation. - */ -class BranchMutationTable(tag: Tag) - extends Table[BranchMutation](tag, Some("public"), "branch_mutation") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def variantId = column[Int]("variant_id") - - def parentHaplogroupId = column[Int]("parent_haplogroup_id") - - def childHaplogroupId = column[Int]("child_haplogroup_id") - - def fromState = column[String]("from_state") - - def toState = column[String]("to_state") - - def stepDirection = column[Option[Int]]("step_direction") - - def confidence = column[Option[BigDecimal]]("confidence") - - def * = ( - id.?, - variantId, - parentHaplogroupId, - childHaplogroupId, - fromState, - toState, - stepDirection, - confidence - ).mapTo[BranchMutation] - - def variantFK = foreignKey( - "branch_mutation_variant_fk", - variantId, - TableQuery[VariantV2Table] - )(_.variantId, onDelete = ForeignKeyAction.Cascade) - - def parentHaplogroupFK = foreignKey( - "branch_mutation_parent_haplogroup_fk", - parentHaplogroupId, - TableQuery[HaplogroupsTable] - )(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) - - def childHaplogroupFK = foreignKey( - "branch_mutation_child_haplogroup_fk", - childHaplogroupId, - TableQuery[HaplogroupsTable] - )(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) - - def uniqueBranchVariant = index( - "idx_branch_mutation_unique", - (variantId, parentHaplogroupId, childHaplogroupId), - unique = true - ) -} diff --git a/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala b/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala deleted file mode 100644 index ee98203f..00000000 --- a/app/models/dal/domain/genomics/CitizenBiosamplesTable.scala +++ /dev/null @@ -1,66 +0,0 @@ -package models.dal.domain.genomics - -import com.vividsolutions.jts.geom.Point -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{BiologicalSex, CitizenBiosample, HaplogroupResult} - -import java.time.{LocalDate, LocalDateTime} -import java.util.UUID - -class CitizenBiosamplesTable(tag: Tag) extends Table[CitizenBiosample](tag, "citizen_biosample") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def atUri = column[Option[String]]("at_uri", O.Unique) - - def accession = column[Option[String]]("accession") - - def alias = column[Option[String]]("alias") - - def sourcePlatform = column[Option[String]]("source_platform") - - def collectionDate = column[Option[LocalDate]]("collection_date") - - def sex = column[Option[BiologicalSex]]("sex") - - def geocoord = column[Option[Point]]("geocoord") - - def description = column[Option[String]]("description") - - def yHaplogroup = column[Option[HaplogroupResult]]("y_haplogroup") - - def mtHaplogroup = column[Option[HaplogroupResult]]("mt_haplogroup") - - def sampleGuid = column[UUID]("sample_guid") - - def deleted = column[Boolean]("deleted", O.Default(false)) - - def atCid = column[Option[String]]("at_cid") - - def createdAt = column[LocalDateTime]("created_at") - - def updatedAt = column[LocalDateTime]("updated_at") - - def specimenDonorId = column[Option[Int]]("specimen_donor_id") - - def specimenDonorFk = foreignKey("citizen_biosample_specimen_donor_fk", specimenDonorId, TableQuery[SpecimenDonorsTable])(_.id.?) - - def * = ( - id.?, - atUri, - accession, - alias, - sourcePlatform, - collectionDate, - sex, - geocoord, - description, - yHaplogroup, - mtHaplogroup, - sampleGuid, - deleted, - atCid, - createdAt, - updatedAt, - specimenDonorId - ).mapTo[CitizenBiosample] -} diff --git a/app/models/dal/domain/genomics/CoverageExpectationProfileTable.scala b/app/models/dal/domain/genomics/CoverageExpectationProfileTable.scala deleted file mode 100644 index 83d22be4..00000000 --- a/app/models/dal/domain/genomics/CoverageExpectationProfileTable.scala +++ /dev/null @@ -1,42 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.CoverageExpectationProfile -import slick.lifted.{ProvenShape, Tag} - -import java.time.LocalDateTime - -class CoverageExpectationProfileTable(tag: Tag) - extends MyPostgresProfile.api.Table[CoverageExpectationProfile](tag, "coverage_expectation_profile") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def testTypeId = column[Int]("test_type_id") - def contigName = column[String]("contig_name") - def variantClass = column[String]("variant_class") - def minDepthHigh = column[Double]("min_depth_high") - def minDepthMedium = column[Double]("min_depth_medium") - def minDepthLow = column[Double]("min_depth_low") - def minCoveragePct = column[Option[Double]]("min_coverage_pct") - def minMappingQuality = column[Option[Double]]("min_mapping_quality") - def minCallablePct = column[Option[Double]]("min_callable_pct") - def notes = column[Option[String]]("notes") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - override def * : ProvenShape[CoverageExpectationProfile] = ( - id.?, - testTypeId, - contigName, - variantClass, - minDepthHigh, - minDepthMedium, - minDepthLow, - minCoveragePct, - minMappingQuality, - minCallablePct, - notes, - createdAt, - updatedAt - ).mapTo[CoverageExpectationProfile] -} diff --git a/app/models/dal/domain/genomics/GenbankContigsTable.scala b/app/models/dal/domain/genomics/GenbankContigsTable.scala deleted file mode 100644 index 82fe93a0..00000000 --- a/app/models/dal/domain/genomics/GenbankContigsTable.scala +++ /dev/null @@ -1,40 +0,0 @@ -package models.dal.domain.genomics - -import models.domain.genomics.GenbankContig -import models.dal.MyPostgresProfile.api.* - -/** - * Represents the database table for GenBank contigs, mapping to the `GenbankContig` case class. - * - * This table stores information about DNA or RNA sequence segments, including their unique - * identifiers, accession numbers, optional common names, reference genomes, and sequence lengths. - * - * @constructor Creates a new instance of the `GenbankContigsTable` class. - * @param tag A Slick `Tag` object used for scoping and referencing the table in the database schema. - * - * Columns: - * - `genbankContigId`: Auto-incrementing primary key, uniquely identifying each contig. - * - `accession`: Unique accession number for the contig, serving as a reference to the external database. - * - `commonName`: Optional common name assigned to the contig for easier identification. - * - `referenceGenome`: Optional reference genome name or identifier associated with the contig. - * - `seqLength`: Length of the DNA or RNA sequence represented by the contig. - * - * Primary key: - * - `genbankContigId`. - * - * Mapping: - * - Maps to the `GenbankContig` case class using the corresponding columns. - */ -class GenbankContigsTable(tag: Tag) extends Table[GenbankContig](tag, "genbank_contig") { - def genbankContigId = column[Int]("genbank_contig_id", O.PrimaryKey, O.AutoInc) - - def accession = column[String]("accession", O.Unique) - - def commonName = column[Option[String]]("common_name") - - def referenceGenome = column[Option[String]]("reference_genome") - - def seqLength = column[Int]("seq_length") - - def * = (genbankContigId.?, accession, commonName, referenceGenome, seqLength).mapTo[GenbankContig] -} diff --git a/app/models/dal/domain/genomics/GeneAnnotationsTable.scala b/app/models/dal/domain/genomics/GeneAnnotationsTable.scala deleted file mode 100644 index ef829f7b..00000000 --- a/app/models/dal/domain/genomics/GeneAnnotationsTable.scala +++ /dev/null @@ -1,24 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.GeneAnnotation - -class GeneAnnotationsTable(tag: Tag) extends Table[GeneAnnotation](tag, "gene_annotation") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def geneSymbol = column[Option[String]]("gene_symbol") - - def geneId = column[Option[String]]("gene_id") - - def description = column[Option[String]]("description") - - def representativeSequenceNodeId = column[Option[Int]]("representative_sequence_node_id") - - def * = ( - id.?, - geneSymbol, - geneId, - description, - representativeSequenceNodeId - ).mapTo[GeneAnnotation] -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/GenomeRegionTables.scala b/app/models/dal/domain/genomics/GenomeRegionTables.scala deleted file mode 100644 index 9d328d4c..00000000 --- a/app/models/dal/domain/genomics/GenomeRegionTables.scala +++ /dev/null @@ -1,41 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{GenomeRegion, GenomeRegionVersion, RegionCoordinate} -import play.api.libs.json.JsValue -import java.time.Instant - -/** - * Slick table definition for genome_region_version table. - * Tracks data versions for ETag generation. - */ -class GenomeRegionVersionTable(tag: Tag) extends Table[GenomeRegionVersion](tag, "genome_region_version") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def referenceGenome = column[String]("reference_genome", O.Unique) - def dataVersion = column[String]("data_version") - def updatedAt = column[Instant]("updated_at") - - def * = (id.?, referenceGenome, dataVersion, updatedAt).mapTo[GenomeRegionVersion] -} - -/** - * Slick table definition for genome_region_v2 table. - * Stores structural regions (centromere, telomere, PAR, XTR, etc.) and Cytobands. - * Supports multi-reference coordinates via JSONB. - */ -class GenomeRegionTable(tag: Tag) extends Table[GenomeRegion](tag, "genome_region_v2") { - def id = column[Int]("region_id", O.PrimaryKey, O.AutoInc) // Column name changed to region_id - def regionType = column[String]("region_type") - def name = column[Option[String]]("name") - def coordinates = column[JsValue]("coordinates") - def properties = column[JsValue]("properties") - - def * = (id.?, regionType, name, coordinates, properties).<> ( - (t: (Option[Int], String, Option[String], JsValue, JsValue)) => GenomeRegion( - t._1, t._2, t._3, t._4.as[Map[String, RegionCoordinate]], t._5 - ), - (r: GenomeRegion) => Some((r.id, r.regionType, r.name, play.api.libs.json.Json.toJson(r.coordinates), r.properties)) - ) - - // No Foreign Key to Contig anymore, as coordinates are embedded -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/GenotypeDataTable.scala b/app/models/dal/domain/genomics/GenotypeDataTable.scala deleted file mode 100644 index 3af48cd5..00000000 --- a/app/models/dal/domain/genomics/GenotypeDataTable.scala +++ /dev/null @@ -1,64 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{GenotypeData, GenotypeMetrics} - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Slick table definition for genotype_data table. - * Stores SNP array/chip genotype data with quality metrics and derived haplogroups. - */ -class GenotypeDataTable(tag: Tag) extends Table[GenotypeData](tag, "genotype_data") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def atUri = column[Option[String]]("at_uri") - def atCid = column[Option[String]]("at_cid") - def sampleGuid = column[UUID]("sample_guid") - def testTypeId = column[Option[Int]]("test_type_id") - def provider = column[Option[String]]("provider") - def chipVersion = column[Option[String]]("chip_version") - def buildVersion = column[Option[String]]("build_version") - def sourceFileHash = column[Option[String]]("source_file_hash") - def metrics = column[GenotypeMetrics]("metrics") - def populationBreakdownId = column[Option[Int]]("population_breakdown_id") - def deleted = column[Boolean]("deleted") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - def * = ( - id.?, - atUri, - atCid, - sampleGuid, - testTypeId, - provider, - chipVersion, - buildVersion, - sourceFileHash, - metrics, - populationBreakdownId, - deleted, - createdAt, - updatedAt - ).mapTo[GenotypeData] - - // Indexes - def atUriIdx = index("idx_genotype_at_uri", atUri, unique = true) - def sampleGuidIdx = index("idx_genotype_sample_guid", sampleGuid) - def testTypeIdx = index("idx_genotype_test_type", testTypeId) - def providerIdx = index("idx_genotype_provider", provider) - - // Foreign keys - def testTypeFk = foreignKey( - "fk_genotype_test_type", - testTypeId, - TableQuery[TestTypeTable] - )(_.id.?, onDelete = ForeignKeyAction.Restrict) - - def populationBreakdownFk = foreignKey( - "fk_genotype_population_breakdown", - populationBreakdownId, - TableQuery[PopulationBreakdownTable] - )(_.id.?, onDelete = ForeignKeyAction.SetNull) -} diff --git a/app/models/dal/domain/genomics/HaplogroupCharacterState.scala b/app/models/dal/domain/genomics/HaplogroupCharacterState.scala deleted file mode 100644 index 19bf6688..00000000 --- a/app/models/dal/domain/genomics/HaplogroupCharacterState.scala +++ /dev/null @@ -1,92 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.haplogroups.HaplogroupsTable -import play.api.libs.json.{JsValue, Json, OFormat} - -import java.time.Instant - -/** - * Represents an ASR-reconstructed character state at a haplogroup node. - * - * This table stores the inferred ancestral state for each variant at each - * haplogroup in the tree. Used for: - * - SNPs: ancestral vs derived allele - * - STRs: inferred repeat count (modal haplotype) - * - SVs: presence/absence, orientation, copy number - * - * @param id Auto-generated primary key - * @param haplogroupId FK to the haplogroup node - * @param variantId FK to the variant - * @param inferredState The reconstructed state (allele, count, "present"/"absent", etc.) - * @param confidence Confidence score from ASR algorithm (0.0-1.0) - * @param stateProbabilities JSONB probability distribution for uncertain reconstructions - * @param algorithm ASR method used: "parsimony", "ml", "bayesian" - * @param reconstructedAt Timestamp of reconstruction - */ -case class HaplogroupCharacterState( - id: Option[Int] = None, - haplogroupId: Int, - variantId: Int, - inferredState: String, - confidence: Option[BigDecimal] = None, - stateProbabilities: Option[JsValue] = None, - algorithm: Option[String] = None, - reconstructedAt: Instant = Instant.now() -) - -object HaplogroupCharacterState { - implicit val format: OFormat[HaplogroupCharacterState] = Json.format[HaplogroupCharacterState] -} - -/** - * Slick table definition for haplogroup_character_state. - */ -class HaplogroupCharacterStateTable(tag: Tag) - extends Table[HaplogroupCharacterState](tag, Some("public"), "haplogroup_character_state") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def haplogroupId = column[Int]("haplogroup_id") - - def variantId = column[Int]("variant_id") - - def inferredState = column[String]("inferred_state") - - def confidence = column[Option[BigDecimal]]("confidence") - - def stateProbabilities = column[Option[JsValue]]("state_probabilities") - - def algorithm = column[Option[String]]("algorithm") - - def reconstructedAt = column[Instant]("reconstructed_at") - - def * = ( - id.?, - haplogroupId, - variantId, - inferredState, - confidence, - stateProbabilities, - algorithm, - reconstructedAt - ).mapTo[HaplogroupCharacterState] - - def haplogroupFK = foreignKey( - "haplogroup_character_state_haplogroup_fk", - haplogroupId, - TableQuery[HaplogroupsTable] - )(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) - - def variantFK = foreignKey( - "haplogroup_character_state_variant_fk", - variantId, - TableQuery[VariantV2Table] - )(_.variantId, onDelete = ForeignKeyAction.Cascade) - - def uniqueHaplogroupVariant = index( - "idx_character_state_unique", - (haplogroupId, variantId), - unique = true - ) -} diff --git a/app/models/dal/domain/genomics/HaplogroupReconciliationTable.scala b/app/models/dal/domain/genomics/HaplogroupReconciliationTable.scala deleted file mode 100644 index 0375dbe0..00000000 --- a/app/models/dal/domain/genomics/HaplogroupReconciliationTable.scala +++ /dev/null @@ -1,63 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{DnaType, HaplogroupReconciliation, ReconciliationStatus} -import play.api.libs.json.JsValue - -import java.time.LocalDateTime - -/** - * Slick table definition for haplogroup_reconciliation table. - * Stores multi-run haplogroup reconciliation at specimen donor level. - */ -class HaplogroupReconciliationTable(tag: Tag) extends Table[HaplogroupReconciliation](tag, "haplogroup_reconciliation") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def atUri = column[Option[String]]("at_uri") - def atCid = column[Option[String]]("at_cid") - def specimenDonorId = column[Int]("specimen_donor_id") - def dnaType = column[DnaType]("dna_type") - // JSONB column for status metrics - def status = column[ReconciliationStatus]("status") - // JSONB columns stored as JsValue - def runCalls = column[JsValue]("run_calls") - def snpConflicts = column[Option[JsValue]]("snp_conflicts") - def heteroplasmyObservations = column[Option[JsValue]]("heteroplasmy_observations") - def identityVerification = column[Option[JsValue]]("identity_verification") - def manualOverride = column[Option[JsValue]]("manual_override") - def auditLog = column[Option[JsValue]]("audit_log") - def lastReconciliationAt = column[Option[LocalDateTime]]("last_reconciliation_at") - def deleted = column[Boolean]("deleted") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - // 16 fields - under the 22 tuple limit - def * = ( - id.?, - atUri, - atCid, - specimenDonorId, - dnaType, - status, - runCalls, - snpConflicts, - heteroplasmyObservations, - identityVerification, - manualOverride, - auditLog, - lastReconciliationAt, - deleted, - createdAt, - updatedAt - ).mapTo[HaplogroupReconciliation] - - // Indexes - def atUriIdx = index("idx_reconciliation_at_uri", atUri, unique = true) - def specimenDonorIdx = index("idx_reconciliation_specimen_donor", specimenDonorId) - - // Foreign key to specimen_donor - def specimenDonorFk = foreignKey( - "fk_reconciliation_specimen_donor", - specimenDonorId, - TableQuery[SpecimenDonorsTable] - )(_.id, onDelete = ForeignKeyAction.Cascade) -} diff --git a/app/models/dal/domain/genomics/InstrumentAssociationProposalTable.scala b/app/models/dal/domain/genomics/InstrumentAssociationProposalTable.scala deleted file mode 100644 index 16cf01e5..00000000 --- a/app/models/dal/domain/genomics/InstrumentAssociationProposalTable.scala +++ /dev/null @@ -1,57 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{InstrumentAssociationProposal, ProposalStatus} -import slick.lifted.{ProvenShape, Tag} - -import java.time.LocalDateTime - -class InstrumentAssociationProposalTable(tag: Tag) - extends MyPostgresProfile.api.Table[InstrumentAssociationProposal](tag, "instrument_association_proposal") { - - implicit val proposalStatusMapper: BaseColumnType[ProposalStatus] = - MappedColumnType.base[ProposalStatus, String](_.dbValue, ProposalStatus.fromString) - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def instrumentId = column[String]("instrument_id") - def proposedLabName = column[String]("proposed_lab_name") - def proposedManufacturer = column[Option[String]]("proposed_manufacturer") - def proposedModel = column[Option[String]]("proposed_model") - def existingLabId = column[Option[Int]]("existing_lab_id") - def observationCount = column[Int]("observation_count") - def distinctCitizenCount = column[Int]("distinct_citizen_count") - def confidenceScore = column[Double]("confidence_score") - def earliestObservation = column[Option[LocalDateTime]]("earliest_observation") - def latestObservation = column[Option[LocalDateTime]]("latest_observation") - def status = column[ProposalStatus]("status") - def reviewedAt = column[Option[LocalDateTime]]("reviewed_at") - def reviewedBy = column[Option[String]]("reviewed_by") - def reviewNotes = column[Option[String]]("review_notes") - def acceptedLabId = column[Option[Int]]("accepted_lab_id") - def acceptedInstrumentId = column[Option[Int]]("accepted_instrument_id") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - override def * : ProvenShape[InstrumentAssociationProposal] = ( - id.?, - instrumentId, - proposedLabName, - proposedManufacturer, - proposedModel, - existingLabId, - observationCount, - distinctCitizenCount, - confidenceScore, - earliestObservation, - latestObservation, - status, - reviewedAt, - reviewedBy, - reviewNotes, - acceptedLabId, - acceptedInstrumentId, - createdAt, - updatedAt - ).mapTo[InstrumentAssociationProposal] -} diff --git a/app/models/dal/domain/genomics/InstrumentObservationTable.scala b/app/models/dal/domain/genomics/InstrumentObservationTable.scala deleted file mode 100644 index a6f88b25..00000000 --- a/app/models/dal/domain/genomics/InstrumentObservationTable.scala +++ /dev/null @@ -1,34 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{InstrumentObservation, ObservationConfidence} -import slick.ast.BaseTypedType - -import java.time.LocalDateTime - -class InstrumentObservationTable(tag: Tag) extends Table[InstrumentObservation](tag, "instrument_observation") { - - implicit private val confidenceMapper: BaseTypedType[ObservationConfidence] = - MappedColumnType.base[ObservationConfidence, String](_.dbValue, ObservationConfidence.fromString) - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def atUri = column[String]("at_uri", O.Unique) - def atCid = column[Option[String]]("at_cid") - def instrumentId = column[String]("instrument_id") - def labName = column[String]("lab_name") - def biosampleRef = column[String]("biosample_ref") - def sequenceRunRef = column[Option[String]]("sequence_run_ref") - def platform = column[Option[String]]("platform") - def instrumentModel = column[Option[String]]("instrument_model") - def flowcellId = column[Option[String]]("flowcell_id") - def runDate = column[Option[LocalDateTime]]("run_date") - def confidence = column[ObservationConfidence]("confidence") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[Option[LocalDateTime]]("updated_at") - - def * = ( - id.?, atUri, atCid, instrumentId, labName, biosampleRef, - sequenceRunRef, platform, instrumentModel, flowcellId, - runDate, confidence, createdAt, updatedAt - ).mapTo[InstrumentObservation] -} diff --git a/app/models/dal/domain/genomics/PopulationBreakdownTable.scala b/app/models/dal/domain/genomics/PopulationBreakdownTable.scala deleted file mode 100644 index 5fe54743..00000000 --- a/app/models/dal/domain/genomics/PopulationBreakdownTable.scala +++ /dev/null @@ -1,57 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{PcaCoordinatesJsonb, PopulationBreakdown} - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Slick table definition for population_breakdown table. - * Stores ancestry analysis breakdowns using PCA projection onto reference populations. - */ -class PopulationBreakdownTable(tag: Tag) extends Table[PopulationBreakdown](tag, "population_breakdown") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def atUri = column[Option[String]]("at_uri") - def atCid = column[Option[String]]("at_cid") - def sampleGuid = column[UUID]("sample_guid") - def analysisMethod = column[String]("analysis_method") - def panelType = column[Option[String]]("panel_type") - def referencePopulations = column[Option[String]]("reference_populations") - def snpsAnalyzed = column[Option[Int]]("snps_analyzed") - def snpsWithGenotype = column[Option[Int]]("snps_with_genotype") - def snpsMissing = column[Option[Int]]("snps_missing") - def confidenceLevel = column[Option[Double]]("confidence_level") - def pcaCoordinates = column[Option[PcaCoordinatesJsonb]]("pca_coordinates") - def analysisDate = column[Option[LocalDateTime]]("analysis_date") - def pipelineVersion = column[Option[String]]("pipeline_version") - def referenceVersion = column[Option[String]]("reference_version") - def deleted = column[Boolean]("deleted") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - def * = ( - id.?, - atUri, - atCid, - sampleGuid, - analysisMethod, - panelType, - referencePopulations, - snpsAnalyzed, - snpsWithGenotype, - snpsMissing, - confidenceLevel, - pcaCoordinates, - analysisDate, - pipelineVersion, - referenceVersion, - deleted, - createdAt, - updatedAt - ).mapTo[PopulationBreakdown] - - // Unique index on at_uri - def atUriIdx = index("idx_population_breakdown_at_uri", atUri, unique = true) - def sampleGuidIdx = index("idx_population_breakdown_sample_guid", sampleGuid) -} diff --git a/app/models/dal/domain/genomics/PopulationComponentTable.scala b/app/models/dal/domain/genomics/PopulationComponentTable.scala deleted file mode 100644 index b7371549..00000000 --- a/app/models/dal/domain/genomics/PopulationComponentTable.scala +++ /dev/null @@ -1,41 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.PopulationComponent - -/** - * Slick table definition for population_component table. - * Stores individual population components in an ancestry breakdown (~33 reference populations). - */ -class PopulationComponentTable(tag: Tag) extends Table[PopulationComponent](tag, "population_component") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def populationBreakdownId = column[Int]("population_breakdown_id") - def populationCode = column[String]("population_code") - def populationName = column[Option[String]]("population_name") - def superPopulation = column[Option[String]]("super_population") - def percentage = column[Double]("percentage") - def confidenceLower = column[Option[Double]]("confidence_lower") - def confidenceUpper = column[Option[Double]]("confidence_upper") - def rank = column[Option[Int]]("rank") - - def * = ( - id.?, - populationBreakdownId, - populationCode, - populationName, - superPopulation, - percentage, - confidenceLower, - confidenceUpper, - rank - ).mapTo[PopulationComponent] - - // Foreign key to population_breakdown - def populationBreakdownFk = foreignKey( - "fk_population_component_breakdown", - populationBreakdownId, - TableQuery[PopulationBreakdownTable] - )(_.id, onDelete = ForeignKeyAction.Cascade) - - def breakdownIdx = index("idx_population_component_breakdown", populationBreakdownId) -} diff --git a/app/models/dal/domain/genomics/PopulationsTable.scala b/app/models/dal/domain/genomics/PopulationsTable.scala deleted file mode 100644 index db9c10b0..00000000 --- a/app/models/dal/domain/genomics/PopulationsTable.scala +++ /dev/null @@ -1,32 +0,0 @@ -package models.dal.domain.genomics - -import models.domain.genomics.Population -import models.dal.MyPostgresProfile.api.* - -/** - * Represents the `PopulationsTable` database table definition. - * - * This class defines the schema for the `population` table, mapping database columns - * to the corresponding attributes of the `Population` case class. The table is used - * to store information about various population groups or demographics. - * - * @constructor Initializes a new instance of the `PopulationsTable` class. - * @param tag A Slick `Tag` object used for referencing the table within a database schema. - * - * Columns: - * - `populationId`: An optional unique identifier for the population (primary key, auto-incremented). - * - `populationName`: The unique name of the population, serving as a primary identifier (unique constraint). - * - * Primary Key: - * - `populationId`: The primary key for the table, automatically incremented. - * - * Mapping: - * - Defines a mapping to the `Population` case class via the `*` projection. - */ -class PopulationsTable(tag: Tag) extends Table[Population](tag, "population") { - def populationId = column[Int]("population_id", O.PrimaryKey, O.AutoInc) - - def populationName = column[String]("population_name", O.Unique) - - def * = (populationId.?, populationName).mapTo[Population] -} diff --git a/app/models/dal/domain/genomics/SequenceFilesTable.scala b/app/models/dal/domain/genomics/SequenceFilesTable.scala deleted file mode 100644 index 8d24f1f1..00000000 --- a/app/models/dal/domain/genomics/SequenceFilesTable.scala +++ /dev/null @@ -1,57 +0,0 @@ -package models.dal.domain.genomics - -import models.domain.genomics.{SequenceFile, SequenceFileAtpLocationJsonb, SequenceFileChecksumJsonb, SequenceFileHttpLocationJsonb} -import models.dal.MyPostgresProfile // Import the object itself -import models.dal.MyPostgresProfile.api.* // Import the api contents - -import java.time.LocalDateTime - -/** - * Represents the table mapping for storing sequence file records in the database. - * - * The `SequenceFilesTable` class extends the Slick `Table` class and specifies the schema for the - * `sequence_file` table. Each column defined here correlates to a field in the `SequenceFile` case class, - * which models a sequence file along with its associated metadata. - * - * Columns: - * - `id`: The primary key, automatically incremented for each new row. - * - `libraryId`: A foreign key linking this sequence file to a specific library. - * - `fileName`: The name of the file. - * - `fileSizeBytes`: The size of the file in bytes. - * - `fileFormat`: The format of the file (e.g., FASTQ, BAM, etc.). - * - `checksums`: JSONB column storing a list of file checksums. - * - `httpLocations`: JSONB column storing a list of HTTP locations. - * - `atpLocation`: Optional JSONB column storing an AT Protocol location. - * - `aligner`: The name of the aligner tool used in the sequence file processing, if applicable. - * - `targetReference`: The reference genome or target used for alignment. - * - `createdAt`: A timestamp of when this sequence file entry was created. - * - `updatedAt`: An optional timestamp of when the file entry was last updated. - * - * The `*` projection defines the mapping between the table columns and the `SequenceFile` case class. - */ -class SequenceFilesTable(tag: Tag) extends MyPostgresProfile.api.Table[SequenceFile](tag, "sequence_file") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def libraryId = column[Int]("library_id") - - def fileName = column[String]("file_name") - - def fileSizeBytes = column[Long]("file_size_bytes") - - def fileFormat = column[String]("file_format") - - // New JSONB columns - def checksums = column[List[SequenceFileChecksumJsonb]]("checksums") - def httpLocations = column[List[SequenceFileHttpLocationJsonb]]("http_locations") - def atpLocation = column[Option[SequenceFileAtpLocationJsonb]]("atp_location") - - def aligner = column[String]("aligner") - - def targetReference = column[String]("target_reference") - - def createdAt = column[LocalDateTime]("created_at") - - def updatedAt = column[Option[LocalDateTime]]("updated_at") - - def * = (id.?, libraryId, fileName, fileSizeBytes, fileFormat, checksums, httpLocations, atpLocation, aligner, targetReference, createdAt, updatedAt).mapTo[SequenceFile] -} diff --git a/app/models/dal/domain/genomics/SequenceLibrariesTable.scala b/app/models/dal/domain/genomics/SequenceLibrariesTable.scala deleted file mode 100644 index 8c4583b2..00000000 --- a/app/models/dal/domain/genomics/SequenceLibrariesTable.scala +++ /dev/null @@ -1,66 +0,0 @@ -package models.dal.domain.genomics - -import models.domain.genomics.SequenceLibrary -import models.dal.MyPostgresProfile.api.* - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Represents the `sequence_library` table in the database, providing mappings and schema definitions for sequencing library records. - * - * A sequencing library is a collection of sequence data generated during a sequencing experiment, along with its associated metadata. - * This table stores information such as the laboratory that generated the data, the sequencing instrument used, the type of test, - * and various sequencing parameters (e.g., read count, read length, paired-end status). - * - * Table columns: - * - `id`: A unique identifier for the sequencing library (primary key, auto-incremented). - * - `sample_guid`: A UUID representing the sample associated with this sequencing library. - * - `lab`: The name of the laboratory responsible for creating or processing the sequencing data. - * - `test_type`: The type of sequencing test performed (e.g., WGS, RNA-Seq). - * - `run_date`: The timestamp when the sequencing run occurred. - * - `instrument`: The name or model of the sequencing instrument used for generating the data. - * - `reads`: The total number of sequencing reads generated in this library. - * - `read_length`: The length of each read in base pairs. - * - `paired_end`: A boolean indicating whether the sequencing was paired-end (true) or single-end (false). - * - `insert_size`: Median insert size for paired-end sequencing (null if not applicable). - * - `created_at`: The timestamp when the record was created. - * - `updated_at`: An optional timestamp indicating when the record was last updated. - * - * The data in this table is mapped to the `SequenceLibrary` case class. The case class encapsulates the sequencing library's - * metadata, making it easier to access and manipulate within the application. - * - * Relationships: - * This table may be linked to other tables (e.g., samples or sequencing experiments) via the `sample_guid` column. - */ -class SequenceLibrariesTable(tag: Tag) extends Table[SequenceLibrary](tag, "sequence_library") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def sampleGuid = column[UUID]("sample_guid") - - def lab = column[String]("lab") - - def testTypeId = column[Int]("test_type_id") - - def runDate = column[LocalDateTime]("run_date") - - def instrument = column[String]("instrument") - - def reads = column[Int]("reads") - - def readLength = column[Int]("read_length") - - def pairedEnd = column[Boolean]("paired_end") - - def insertSize = column[Option[Int]]("insert_size") - - def atUri = column[Option[String]]("at_uri") - - def atCid = column[Option[String]]("at_cid") - - def createdAt = column[LocalDateTime]("created_at") - - def updatedAt = column[Option[LocalDateTime]]("updated_at") - - def * = (id.?, sampleGuid, lab, testTypeId, runDate, instrument, reads, readLength, pairedEnd, insertSize, atUri, atCid, createdAt, updatedAt).mapTo[SequenceLibrary] -} diff --git a/app/models/dal/domain/genomics/SequenceLibrarySkechetsTable.scala b/app/models/dal/domain/genomics/SequenceLibrarySkechetsTable.scala deleted file mode 100644 index 3f63d40f..00000000 --- a/app/models/dal/domain/genomics/SequenceLibrarySkechetsTable.scala +++ /dev/null @@ -1,90 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{MinHashSketch, SequenceLibrarySketch} - -import java.nio.ByteBuffer -import java.time.LocalDateTime - - -class SequenceLibrarySketchesTable(tag: Tag) extends Table[SequenceLibrarySketch](tag, "sequence_library_sketch") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def sequenceLibraryId = column[Int]("sequence_library_id") - - def autosomalKmerHashes = column[Array[Byte]]("autosomal_kmers", O.SqlType("bytea")) - - def autosomalHash = column[String]("autosomal_hash") - - def yChromosomeKmerHashes = column[Option[Array[Byte]]]("y_chromosome_kmers", O.SqlType("bytea")) - - def yChromosomeHash = column[Option[String]]("y_chromosome_hash") - - def mtDnaKmerHashes = column[Option[Array[Byte]]]("mt_dna_kmers", O.SqlType("bytea")) - - def mtDnaHash = column[Option[String]]("mt_dna_hash") - - def createdAt = column[LocalDateTime]("created_at") - - // Use proper constructor syntax for case class - private def toMinHashSketch(bytes: Array[Byte], hash: String): MinHashSketch = - new MinHashSketch(MinHashSketch.bytesToLongArray(bytes), hash) - - private def toOptionalMinHashSketch(bytesAndHash: (Option[Array[Byte]], Option[String])): Option[MinHashSketch] = - for { - bytes <- bytesAndHash._1 - hash <- bytesAndHash._2 - } yield toMinHashSketch(bytes, hash) - - def * = ( - id.?, - sequenceLibraryId, - (autosomalKmerHashes, autosomalHash) <> ( { - case (bytes: Array[Byte], hash: String) => toMinHashSketch(bytes, hash) - }, { (sketch: MinHashSketch) => - Some((MinHashSketch.longArrayToBytes(sketch.kmerHashes), sketch.finalHash)) - } - ), - (yChromosomeKmerHashes, yChromosomeHash) <> ( { (tuple: (Option[Array[Byte]], Option[String])) => toOptionalMinHashSketch(tuple) }, { (optSketch: Option[MinHashSketch]) => - Some(( - optSketch.map(sketch => MinHashSketch.longArrayToBytes(sketch.kmerHashes)), - optSketch.map(_.finalHash) - )) - } - ), - (mtDnaKmerHashes, mtDnaHash) <> ( { (tuple: (Option[Array[Byte]], Option[String])) => toOptionalMinHashSketch(tuple) }, { (optSketch: Option[MinHashSketch]) => - Some(( - optSketch.map(sketch => MinHashSketch.longArrayToBytes(sketch.kmerHashes)), - optSketch.map(_.finalHash) - )) - } - ), - createdAt - ) <> ((SequenceLibrarySketch.apply _).tupled, SequenceLibrarySketch.unapply) - - def sequenceLibrary = foreignKey( - "fk_sequence_library_sketch_library", - sequenceLibraryId, - TableQuery[SequenceLibrariesTable])(_.id, onDelete = ForeignKeyAction.Cascade) -} - - -object MinHashSketch { - def longArrayToBytes(arr: Array[Long]): Array[Byte] = { - val bb = ByteBuffer.allocate(arr.length * 8) - arr.foreach(bb.putLong) - bb.array() - } - - def bytesToLongArray(bytes: Array[Byte]): Array[Long] = { - val bb = ByteBuffer.wrap(bytes) - val result = new Array[Long](bytes.length / 8) - for (i <- result.indices) { - result(i) = bb.getLong() - } - result - } - - -} diff --git a/app/models/dal/domain/genomics/SequencerInstrumentsTable.scala b/app/models/dal/domain/genomics/SequencerInstrumentsTable.scala deleted file mode 100644 index c1d749e1..00000000 --- a/app/models/dal/domain/genomics/SequencerInstrumentsTable.scala +++ /dev/null @@ -1,50 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.SequencerInstrument -import slick.lifted.{ProvenShape, Tag} - -import java.time.LocalDateTime - -class SequencerInstrumentsTable(tag: Tag) extends MyPostgresProfile.api.Table[SequencerInstrument](tag, "sequencer_instrument") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def instrumentId = column[String]("instrument_id") - - def labId = column[Int]("lab_id") - - def manufacturer = column[Option[String]]("manufacturer") - - def model = column[Option[String]]("model") - - def source = column[Option[String]]("source") - - def observationCount = column[Int]("observation_count") - - def confidenceScore = column[Double]("confidence_score") - - def lastObservedAt = column[Option[LocalDateTime]]("last_observed_at") - - def createdAt = column[LocalDateTime]("created_at") - - def updatedAt = column[Option[LocalDateTime]]("updated_at") - - override def * : ProvenShape[SequencerInstrument] = ( - id.?, - instrumentId, - labId, - manufacturer, - model, - source, - observationCount, - confidenceScore, - lastObservedAt, - createdAt, - updatedAt - ).mapTo[SequencerInstrument] - - // Unique index on instrument_id - def instrumentIdIdx = index("sequencer_instrument_instrument_id_uindex", instrumentId, unique = true) -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/SequencingLabsTable.scala b/app/models/dal/domain/genomics/SequencingLabsTable.scala deleted file mode 100644 index 1bad17c7..00000000 --- a/app/models/dal/domain/genomics/SequencingLabsTable.scala +++ /dev/null @@ -1,38 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.SequencingLab -import slick.lifted.{ProvenShape, Tag} - -import java.time.LocalDateTime - -class SequencingLabsTable(tag: Tag) extends MyPostgresProfile.api.Table[SequencingLab](tag, "sequencing_lab") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def name = column[String]("name") - - def isD2c = column[Boolean]("is_d2c") - - def websiteUrl = column[Option[String]]("website_url") - - def descriptionMarkdown = column[Option[String]]("description_markdown") - - def createdAt = column[LocalDateTime]("created_at") - - def updatedAt = column[Option[LocalDateTime]]("updated_at") - - override def * : ProvenShape[SequencingLab] = ( - id.?, - name, - isD2c, - websiteUrl, - descriptionMarkdown, - createdAt, - updatedAt - ).mapTo[SequencingLab] - - // Unique constraint on name - def nameIdx = index("sequencing_lab_name_key", name, unique = true) -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/SpecimenDonorsTable.scala b/app/models/dal/domain/genomics/SpecimenDonorsTable.scala deleted file mode 100644 index 3f674b9c..00000000 --- a/app/models/dal/domain/genomics/SpecimenDonorsTable.scala +++ /dev/null @@ -1,66 +0,0 @@ -package models.dal.domain.genomics - -import com.vividsolutions.jts.geom.Point -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{BiologicalSex, BiosampleType, SpecimenDonor} - -/** - * Represents the database table definition for storing specimen donor records. - * - * @constructor Initializes a new instance of the `SpecimenDonorsTable` class. - * @param tag A Slick `Tag` object used to scope and reference the table within a database schema. - * - * This table is linked to `SpecimenDonor` entity and is used to store information - * about donors, including their unique identifier and the biobank they originate from. - * - * Columns: - * - `id`: Auto-incremented primary key, uniquely identifying each specimen donor. - * - `donorIdentifier`: A unique string identifier for the donor, used for cross-referencing. - * - `originBiobank`: The biobank's name or identifier where the donor originates. - * - * Primary key: - * - `id`: Serves as the primary key for the table. - * - * Mapping: - * - Defines a mapping to the `SpecimenDonor` case class. - */ -class SpecimenDonorsTable(tag: Tag) extends Table[SpecimenDonor](tag, "specimen_donor") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def donorIdentifier = column[String]("donor_identifier") - - def originBiobank = column[String]("origin_biobank") - - def donorType = column[BiosampleType]("donor_type") - - def sex = column[Option[BiologicalSex]]("sex") - - def geocoord = column[Option[Point]]("geocoord") - - def pgpParticipantId = column[Option[String]]("pgp_participant_id") - - def atUri = column[Option[String]]("at_uri") - - def dateRangeStart = column[Option[Int]]("date_range_start") - - def dateRangeEnd = column[Option[Int]]("date_range_end") - - def yDnaReconciliationRef = column[Option[String]]("y_dna_reconciliation_ref") - - def mtDnaReconciliationRef = column[Option[String]]("mt_dna_reconciliation_ref") - - def * = ( - id.?, - donorIdentifier, - originBiobank, - donorType, - sex, - geocoord, - pgpParticipantId, - atUri, - dateRangeStart, - dateRangeEnd, - yDnaReconciliationRef, - mtDnaReconciliationRef - ).mapTo[SpecimenDonor] -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/StrMutationRate.scala b/app/models/dal/domain/genomics/StrMutationRate.scala deleted file mode 100644 index b8d920a4..00000000 --- a/app/models/dal/domain/genomics/StrMutationRate.scala +++ /dev/null @@ -1,145 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import play.api.libs.json.{Json, OFormat} - -import java.time.Instant - -/** - * Per-marker STR mutation rates for ASR and age estimation. - * - * Sources include Ballantyne 2010, Willems 2016, and other published studies. - * These rates are critical for accurate branch age estimation using the - * stepwise mutation model. - * - * @param id Auto-generated primary key - * @param markerName STR marker name (e.g., DYS456, DYS389I) - * @param panelNames Panels containing this marker (PowerPlex, YHRD, BigY, etc.) - * @param mutationRate Mutations per generation - * @param mutationRateLower 95% CI lower bound - * @param mutationRateUpper 95% CI upper bound - * @param omegaPlus Probability of expansion (default 0.5) - * @param omegaMinus Probability of contraction (default 0.5) - * @param multiStepRate Combined rate for multi-step mutations (omega_2 + omega_3 + ...) - * @param source Publication source (e.g., "Ballantyne 2010") - * @param createdAt When the rate was recorded - */ -case class StrMutationRate( - id: Option[Int] = None, - markerName: String, - panelNames: Option[List[String]] = None, - mutationRate: BigDecimal, - mutationRateLower: Option[BigDecimal] = None, - mutationRateUpper: Option[BigDecimal] = None, - omegaPlus: Option[BigDecimal] = Some(BigDecimal("0.5")), - omegaMinus: Option[BigDecimal] = Some(BigDecimal("0.5")), - multiStepRate: Option[BigDecimal] = None, - source: Option[String] = None, - createdAt: Instant = Instant.now() -) { - - /** - * Check if mutation is biased toward expansion. - */ - def isExpansionBiased: Boolean = - omegaPlus.getOrElse(BigDecimal("0.5")) > BigDecimal("0.5") - - /** - * Check if mutation is biased toward contraction. - */ - def isContractionBiased: Boolean = - omegaMinus.getOrElse(BigDecimal("0.5")) > BigDecimal("0.5") - - /** - * Get the symmetry of mutation direction (1.0 = perfectly symmetric). - * Values < 1.0 indicate directional bias. - */ - def directionalSymmetry: BigDecimal = { - val plus = omegaPlus.getOrElse(BigDecimal("0.5")) - val minus = omegaMinus.getOrElse(BigDecimal("0.5")) - if (plus >= minus) minus / plus else plus / minus - } -} - -object StrMutationRate { - implicit val format: OFormat[StrMutationRate] = Json.format[StrMutationRate] - - /** - * Create a rate entry with symmetric mutation probability. - */ - def symmetric( - markerName: String, - rate: BigDecimal, - source: String - ): StrMutationRate = StrMutationRate( - markerName = markerName, - mutationRate = rate, - source = Some(source) - ) - - /** - * Create a rate entry with directional bias. - */ - def withBias( - markerName: String, - rate: BigDecimal, - omegaPlus: BigDecimal, - omegaMinus: BigDecimal, - source: String - ): StrMutationRate = StrMutationRate( - markerName = markerName, - mutationRate = rate, - omegaPlus = Some(omegaPlus), - omegaMinus = Some(omegaMinus), - source = Some(source) - ) -} - -/** - * Slick table definition for str_mutation_rate. - */ -class StrMutationRateTable(tag: Tag) - extends Table[StrMutationRate](tag, Some("public"), "str_mutation_rate") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def markerName = column[String]("marker_name") - - def panelNames = column[Option[List[String]]]("panel_names") - - def mutationRate = column[BigDecimal]("mutation_rate") - - def mutationRateLower = column[Option[BigDecimal]]("mutation_rate_lower") - - def mutationRateUpper = column[Option[BigDecimal]]("mutation_rate_upper") - - def omegaPlus = column[Option[BigDecimal]]("omega_plus") - - def omegaMinus = column[Option[BigDecimal]]("omega_minus") - - def multiStepRate = column[Option[BigDecimal]]("multi_step_rate") - - def source = column[Option[String]]("source") - - def createdAt = column[Instant]("created_at") - - def * = ( - id.?, - markerName, - panelNames, - mutationRate, - mutationRateLower, - mutationRateUpper, - omegaPlus, - omegaMinus, - multiStepRate, - source, - createdAt - ).mapTo[StrMutationRate] - - def uniqueMarkerName = index( - "idx_str_mutation_rate_marker_unique", - markerName, - unique = true - ) -} diff --git a/app/models/dal/domain/genomics/SuperPopulationSummaryTable.scala b/app/models/dal/domain/genomics/SuperPopulationSummaryTable.scala deleted file mode 100644 index afd7dfe4..00000000 --- a/app/models/dal/domain/genomics/SuperPopulationSummaryTable.scala +++ /dev/null @@ -1,33 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{SuperPopulationListJsonb, SuperPopulationSummary} - -/** - * Slick table definition for super_population_summary table. - * Stores aggregated ancestry at continental level (9 super-populations). - */ -class SuperPopulationSummaryTable(tag: Tag) extends Table[SuperPopulationSummary](tag, "super_population_summary") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def populationBreakdownId = column[Int]("population_breakdown_id") - def superPopulation = column[String]("super_population") - def percentage = column[Double]("percentage") - def populations = column[Option[SuperPopulationListJsonb]]("populations") - - def * = ( - id.?, - populationBreakdownId, - superPopulation, - percentage, - populations - ).mapTo[SuperPopulationSummary] - - // Foreign key to population_breakdown - def populationBreakdownFk = foreignKey( - "fk_super_population_breakdown", - populationBreakdownId, - TableQuery[PopulationBreakdownTable] - )(_.id, onDelete = ForeignKeyAction.Cascade) - - def breakdownIdx = index("idx_super_population_breakdown", populationBreakdownId) -} diff --git a/app/models/dal/domain/genomics/TestTypeTable.scala b/app/models/dal/domain/genomics/TestTypeTable.scala deleted file mode 100644 index f4a6e096..00000000 --- a/app/models/dal/domain/genomics/TestTypeTable.scala +++ /dev/null @@ -1,44 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{DataGenerationMethod, TargetType, TestType, TestTypeRow} // Import new enums and TestTypeRow -import slick.lifted.ProvenShape -import java.time.LocalDate // For releaseDate, deprecatedAt - -/** - * Represents the Slick table definition for storing different types of genetic tests or sequencing methodologies. - * This table is used to define a taxonomy of test types that can be referenced throughout the system. - * - * @param tag A Slick Tag object used for binding the table to the database schema. - */ -class TestTypeTable(tag: Tag) extends Table[TestTypeRow](tag, Some("public"), "test_type_definition") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def code = column[String]("code", O.Unique) // Changed from name: TestType to code: String - def displayName = column[String]("display_name") - def category = column[DataGenerationMethod]("category") // Added - def vendor = column[Option[String]]("vendor") // Added - def targetType = column[TargetType]("target_type") // Added - def expectedMinDepth = column[Option[Double]]("expected_min_depth") - def expectedTargetDepth = column[Option[Double]]("expected_target_depth") - def expectedMarkerCount = column[Option[Int]]("expected_marker_count") - def supportsHaplogroupY = column[Boolean]("supports_haplogroup_y") - def supportsHaplogroupMt = column[Boolean]("supports_haplogroup_mt") - def supportsAutosomalIbd = column[Boolean]("supports_autosomal_ibd") - def supportsAncestry = column[Boolean]("supports_ancestry") - def typicalFileFormats = column[List[String]]("typical_file_formats") // Use List[String] for TEXT[], Slick-Pg handles List - def version = column[Option[String]]("version") - def releaseDate = column[Option[LocalDate]]("release_date") - def deprecatedAt = column[Option[LocalDate]]("deprecated_at") - def successorTestTypeId = column[Option[Int]]("successor_test_type_id") - def description = column[Option[String]]("description") - def documentationUrl = column[Option[String]]("documentation_url") - - // Projection for the case class - def * : ProvenShape[TestTypeRow] = ( - id.?, code, displayName, category, vendor, targetType, - expectedMinDepth, expectedTargetDepth, expectedMarkerCount, - supportsHaplogroupY, supportsHaplogroupMt, supportsAutosomalIbd, supportsAncestry, - typicalFileFormats, version, releaseDate, deprecatedAt, successorTestTypeId, - description, documentationUrl - ) <> ((TestTypeRow.apply _).tupled, TestTypeRow.unapply) -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/TestTypeTargetRegionTable.scala b/app/models/dal/domain/genomics/TestTypeTargetRegionTable.scala deleted file mode 100644 index 46f03189..00000000 --- a/app/models/dal/domain/genomics/TestTypeTargetRegionTable.scala +++ /dev/null @@ -1,32 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.TestTypeTargetRegion -import slick.lifted.{ProvenShape, Tag} - -class TestTypeTargetRegionTable(tag: Tag) - extends MyPostgresProfile.api.Table[TestTypeTargetRegion](tag, "test_type_target_region") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def testTypeId = column[Int]("test_type_id") - def contigName = column[String]("contig_name") - def startPosition = column[Option[Int]]("start_position") - def endPosition = column[Option[Int]]("end_position") - def regionName = column[String]("region_name") - def regionType = column[String]("region_type") - def expectedCoveragePct = column[Option[Double]]("expected_coverage_pct") - def expectedMinDepth = column[Option[Double]]("expected_min_depth") - - override def * : ProvenShape[TestTypeTargetRegion] = ( - id.?, - testTypeId, - contigName, - startPosition, - endPosition, - regionName, - regionType, - expectedCoveragePct, - expectedMinDepth - ).mapTo[TestTypeTargetRegion] -} diff --git a/app/models/dal/domain/genomics/ValidationServicesTable.scala b/app/models/dal/domain/genomics/ValidationServicesTable.scala deleted file mode 100644 index 600e1d03..00000000 --- a/app/models/dal/domain/genomics/ValidationServicesTable.scala +++ /dev/null @@ -1,26 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.ValidationService - -import java.util.UUID - -class ValidationServicesTable(tag: Tag) extends Table[ValidationService](tag, "validation_service") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def guid = column[UUID]("guid", O.Unique) // UUID, with unique constraint - - def name = column[String]("name", O.Unique) // String, with unique constraint - - def description = column[Option[String]]("description") - - def trustLevel = column[Option[String]]("trust_level") - - def * = ( - id.?, - guid, - name, - description, - trustLevel - ).mapTo[ValidationService] -} \ No newline at end of file diff --git a/app/models/dal/domain/genomics/VariantV2Table.scala b/app/models/dal/domain/genomics/VariantV2Table.scala deleted file mode 100644 index bc275224..00000000 --- a/app/models/dal/domain/genomics/VariantV2Table.scala +++ /dev/null @@ -1,89 +0,0 @@ -package models.dal.domain.genomics - -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.haplogroups.HaplogroupsTable -import models.domain.genomics.{MutationType, NamingStatus, VariantV2} -import play.api.libs.json.JsValue -import slick.ast.BaseTypedType -import slick.jdbc.JdbcType - -import java.time.Instant - -/** - * Slick table definition for the `variant_v2` table. - * - * This table stores consolidated variants with JSONB columns for coordinates - * (supporting multiple reference genomes) and aliases (supporting multiple - * naming sources). - * - * Schema: - * - One row per logical variant (not per reference genome) - * - JSONB `coordinates` contains position + alleles per assembly - * - JSONB `aliases` contains all known names grouped by source - * - `defining_haplogroup_id` distinguishes parallel mutations - */ -class VariantV2Table(tag: Tag) extends Table[VariantV2](tag, Some("public"), "variant_v2") { - - // MappedColumnType for MutationType enum - implicit val mutationTypeMapper: JdbcType[MutationType] with BaseTypedType[MutationType] = - MappedColumnType.base[MutationType, String]( - _.dbValue, - MutationType.fromStringOrDefault(_) - ) - - // MappedColumnType for NamingStatus enum - implicit val namingStatusMapper: JdbcType[NamingStatus] with BaseTypedType[NamingStatus] = - MappedColumnType.base[NamingStatus, String]( - _.dbValue, - NamingStatus.fromStringOrDefault(_) - ) - - def variantId = column[Int]("variant_id", O.PrimaryKey, O.AutoInc) - - def canonicalName = column[Option[String]]("canonical_name") - - def mutationType = column[MutationType]("mutation_type") - - def namingStatus = column[NamingStatus]("naming_status") - - def aliases = column[JsValue]("aliases") - - def coordinates = column[JsValue]("coordinates") - - def definingHaplogroupId = column[Option[Int]]("defining_haplogroup_id") - - def evidence = column[JsValue]("evidence") - - def primers = column[JsValue]("primers") - - def notes = column[Option[String]]("notes") - - def annotations = column[JsValue]("annotations") - - def createdAt = column[Instant]("created_at") - - def updatedAt = column[Instant]("updated_at") - - def * = ( - variantId.?, - canonicalName, - mutationType, - namingStatus, - aliases, - coordinates, - definingHaplogroupId, - evidence, - primers, - notes, - annotations, - createdAt, - updatedAt - ).mapTo[VariantV2] - - // Foreign key to haplogroup for parallel mutation disambiguation - def definingHaplogroupFK = foreignKey( - "variant_v2_defining_haplogroup_fk", - definingHaplogroupId, - TableQuery[HaplogroupsTable] - )(_.haplogroupId.?, onDelete = ForeignKeyAction.SetNull) -} diff --git a/app/models/dal/domain/haplogroups/GenealogicalAnchorTable.scala b/app/models/dal/domain/haplogroups/GenealogicalAnchorTable.scala deleted file mode 100644 index 290fe278..00000000 --- a/app/models/dal/domain/haplogroups/GenealogicalAnchorTable.scala +++ /dev/null @@ -1,37 +0,0 @@ -package models.dal.domain.haplogroups - -import models.dal.MyPostgresProfile.api.* -import models.domain.haplogroups.{AnchorType, GenealogicalAnchor} - -import java.time.LocalDateTime - -class GenealogicalAnchorTable(tag: Tag) extends Table[GenealogicalAnchor](tag, Some("tree"), "genealogical_anchor") { - - implicit val anchorTypeMapper: BaseColumnType[AnchorType] = - MappedColumnType.base[AnchorType, String](_.dbValue, AnchorType.fromString) - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def haplogroupId = column[Int]("haplogroup_id") - def anchorType = column[AnchorType]("anchor_type") - def dateCe = column[Int]("date_ce") - def dateUncertaintyYears = column[Option[Int]]("date_uncertainty_years") - def confidence = column[Option[BigDecimal]]("confidence") - def description = column[Option[String]]("description") - def source = column[Option[String]]("source") - def carbonDateBp = column[Option[Int]]("carbon_date_bp") - def carbonDateSigma = column[Option[Int]]("carbon_date_sigma") - def createdBy = column[Option[String]]("created_by") - def createdAt = column[LocalDateTime]("created_at") - - def * = ( - id.?, haplogroupId, anchorType, dateCe, dateUncertaintyYears, - confidence, description, source, carbonDateBp, carbonDateSigma, - createdBy, createdAt - ).mapTo[GenealogicalAnchor] - - def haplogroupFK = foreignKey( - "genealogical_anchor_haplogroup_fk", - haplogroupId, - TableQuery[HaplogroupsTable] - )(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) -} diff --git a/app/models/dal/domain/haplogroups/HaplogroupAncestralStrTable.scala b/app/models/dal/domain/haplogroups/HaplogroupAncestralStrTable.scala deleted file mode 100644 index eefece2f..00000000 --- a/app/models/dal/domain/haplogroups/HaplogroupAncestralStrTable.scala +++ /dev/null @@ -1,40 +0,0 @@ -package models.dal.domain.haplogroups - -import models.dal.MyPostgresProfile.api.* -import models.domain.haplogroups.{HaplogroupAncestralStr, MotifMethod} - -import java.time.LocalDateTime - -class HaplogroupAncestralStrTable(tag: Tag) extends Table[HaplogroupAncestralStr](tag, Some("tree"), "haplogroup_ancestral_str") { - - implicit val motifMethodMapper: BaseColumnType[MotifMethod] = - MappedColumnType.base[MotifMethod, String](_.dbValue, MotifMethod.fromString) - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def haplogroupId = column[Int]("haplogroup_id") - def markerName = column[String]("marker_name") - def ancestralValue = column[Option[Int]]("ancestral_value") - def ancestralValueAlt = column[Option[List[Int]]]("ancestral_value_alt") - def confidence = column[Option[BigDecimal]]("confidence") - def supportingSamples = column[Option[Int]]("supporting_samples") - def variance = column[Option[BigDecimal]]("variance") - def computedAt = column[LocalDateTime]("computed_at") - def method = column[MotifMethod]("method") - - def * = ( - id.?, haplogroupId, markerName, ancestralValue, ancestralValueAlt, - confidence, supportingSamples, variance, computedAt, method - ).mapTo[HaplogroupAncestralStr] - - def haplogroupFK = foreignKey( - "hg_ancestral_str_haplogroup_fk", - haplogroupId, - TableQuery[HaplogroupsTable] - )(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) - - def uniqueHaplogroupMarker = index( - "idx_hg_ancestral_str_unique", - (haplogroupId, markerName), - unique = true - ) -} diff --git a/app/models/dal/domain/haplogroups/HaplogroupRelationshipsTable.scala b/app/models/dal/domain/haplogroups/HaplogroupRelationshipsTable.scala deleted file mode 100644 index 2f401b33..00000000 --- a/app/models/dal/domain/haplogroups/HaplogroupRelationshipsTable.scala +++ /dev/null @@ -1,51 +0,0 @@ -package models.dal.domain.haplogroups - -import models.domain.haplogroups.HaplogroupRelationship -import models.dal.MyPostgresProfile.api.* - -import java.time.LocalDateTime - -/** - * Represents the table definition for storing relationships between haplogroups in the database. - * This table maps a child haplogroup to its parent haplogroup, maintaining lineage and hierarchy, - * along with metadata about validity and source information. - * - * The table includes columns for a unique identifier, child and parent haplogroup IDs, revision - * details, validity timeframes, and source of the relationship definition. - * - * Database schema details: - * - Table name: "haplogroup_relationship" - * - Primary key: "haplogroup_relationship_id" - * - Foreign keys: - * - "child_haplogroup_fk" references HaplogroupsTable on "child_haplogroup_id" - * - "parent_haplogroup_fk" references HaplogroupsTable on "parent_haplogroup_id" - * - Indexes: - * - "unique_child_revision" ensures uniqueness of child haplogroup ID with the associated revision ID. - * - * Note: - * - Relationships are associated with revision IDs, allowing tracking of updates or historical changes in the data. - * - Validity is defined by "valid_from" and optionally "valid_until" columns, indicating the effective timespan of the relationship. - */ -class HaplogroupRelationshipsTable(tag: Tag) extends Table[HaplogroupRelationship](tag, Some("tree"), "haplogroup_relationship") { - def haplogroupRelationshipId = column[Int]("haplogroup_relationship_id", O.PrimaryKey, O.AutoInc) - - def childHaplogroupId = column[Int]("child_haplogroup_id") - - def parentHaplogroupId = column[Int]("parent_haplogroup_id") - - def revisionId = column[Int]("revision_id") - - def validFrom = column[LocalDateTime]("valid_from") - - def validUntil = column[Option[LocalDateTime]]("valid_until") - - def source = column[String]("source") - - def * = (haplogroupRelationshipId.?, childHaplogroupId, parentHaplogroupId, revisionId, validFrom, validUntil, source).mapTo[HaplogroupRelationship] - - def childHaplogroupFK = foreignKey("child_haplogroup_fk", childHaplogroupId, TableQuery[HaplogroupsTable])(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) - - def parentHaplogroupFK = foreignKey("parent_haplogroup_fk", parentHaplogroupId, TableQuery[HaplogroupsTable])(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) - - def uniqueChildRevision = index("unique_child_revision", (childHaplogroupId, revisionId), unique = true) -} diff --git a/app/models/dal/domain/haplogroups/HaplogroupVariantMetadataTable.scala b/app/models/dal/domain/haplogroups/HaplogroupVariantMetadataTable.scala deleted file mode 100644 index 4ba823e5..00000000 --- a/app/models/dal/domain/haplogroups/HaplogroupVariantMetadataTable.scala +++ /dev/null @@ -1,71 +0,0 @@ -package models.dal.domain.haplogroups - -import models.dal.MyPostgresProfile.api.* -import models.domain.haplogroups.HaplogroupVariantMetadata - -import java.time.LocalDateTime - -/** - * Represents the table definition for the "haplogroup_variant_metadata" database table, which stores metadata - * about changes or revisions associated with haplogroup variants. - * - * This class extends Slick's `Table` and provides column mappings, primary keys, and foreign key constraints - * for the underlying database table. - * - * Table columns: - * - `haplogroup_variant_id`: The unique identifier for the associated haplogroup variant. - * - `revision_id`: An integer indicating the revision number for the haplogroup variant. - * - `author`: The name or identifier of the person or entity responsible for the revision. - * - `timestamp`: A `LocalDateTime` value indicating when the revision occurred. - * - `comment`: A textual comment or description associated with the revision. - * - `change_type`: A string describing the type of change, e.g., 'update', 'create', or 'delete'. - * - `previous_revision_id`: An optional identifier indicating the previous revision in the sequence, if any. - * - * Primary key: - * - A composite primary key based on `haplogroup_variant_id` and `revision_id`. - * - * Foreign key constraints: - * - `haplogroup_variant_id` references the primary key of the `haplogroup_variant` table, - * with update actions restricted and delete actions cascaded. - * - * Mapping: - * - Maps the columns to the corresponding fields of the `HaplogroupVariantMetadata` case class. - * - * The table is used to track and manage revisions or updates made to haplogroup variants, enabling a history - * of changes to be stored for audit and reference purposes. - */ -class HaplogroupVariantMetadataTable(tag: Tag) extends Table[HaplogroupVariantMetadata](tag, Some("tree"), "haplogroup_variant_metadata") { - def haplogroup_variant_id = column[Int]("haplogroup_variant_id") - - def revision_id = column[Int]("revision_id") - - def author = column[String]("author") - - def timestamp = column[LocalDateTime]("timestamp") - - def comment = column[String]("comment") - - def change_type = column[String]("change_type") - - def previous_revision_id = column[Option[Int]]("previous_revision_id") - - def pk = primaryKey("pk_haplogroup_variant_metadata", (haplogroup_variant_id, revision_id)) - - def fk = foreignKey( - "fk_haplogroup_variant_metadata_variant", - haplogroup_variant_id, - TableQuery[HaplogroupVariantsTable])(_.haplogroupVariantId, - onUpdate = ForeignKeyAction.Restrict, - onDelete = ForeignKeyAction.Cascade - ) - - def * = ( - haplogroup_variant_id, - revision_id, - author, - timestamp, - comment, - change_type, - previous_revision_id - ).mapTo[HaplogroupVariantMetadata] -} \ No newline at end of file diff --git a/app/models/dal/domain/haplogroups/HaplogroupVariantsTable.scala b/app/models/dal/domain/haplogroups/HaplogroupVariantsTable.scala deleted file mode 100644 index 9d8f9c53..00000000 --- a/app/models/dal/domain/haplogroups/HaplogroupVariantsTable.scala +++ /dev/null @@ -1,48 +0,0 @@ -package models.dal.domain.haplogroups - -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.genomics.VariantV2Table -import models.domain.haplogroups.{Haplogroup, HaplogroupVariant} - -/** - * Represents the mapping for the `haplogroup_variant` table in the database. This table defines - * an associative relationship between haplogroups and genetic variants, allowing each haplogroup - * to be linked to specific variants. - * - * @constructor Initializes the Slick table mapping for `haplogroup_variant`. - * @param tag A Slick `Tag` object containing meta-information used internally for query construction. - * - * Table columns: - * - `haplogroupVariantId`: Auto-incrementing primary key for the association (integer). - * - `haplogroupId`: Foreign key referencing the `haplogroup_id` in the `haplogroups` table. - * - `variantId`: Foreign key referencing the `variant_id` in the `variant` table. - * - * Relationships: - * - `haplogroupFK`: Defines a foreign key constraint on the `haplogroupId` column, referencing - * the `haplogroup_id` column in the `HaplogroupsTable`. Automatically cascades deletes. - * - `variantFK`: Defines a foreign key constraint on the `variantId` column, referencing the - * `variant_id` column in the `VariantsTable`. Automatically cascades deletes. - * - * Indexes: - * - `uniqueHaplogroupVariant`: Ensures unique associations between haplogroups and variants, - * preventing duplicate entries for the same pair of `haplogroupId` and `variantId`. - * - * Slick mapping: - * - The `*` projection maps table rows to the `HaplogroupVariant` case class. - */ -class HaplogroupVariantsTable(tag: Tag) extends Table[HaplogroupVariant](tag, Some("tree"), "haplogroup_variant") { - def haplogroupVariantId = column[Int]("haplogroup_variant_id", O.PrimaryKey, O.AutoInc) - - def haplogroupId = column[Int]("haplogroup_id") - - def variantId = column[Int]("variant_id") - - def * = (haplogroupVariantId.?, haplogroupId, variantId).mapTo[HaplogroupVariant] - - def haplogroupFK = foreignKey("haplogroup_fk", haplogroupId, TableQuery[HaplogroupsTable])(_.haplogroupId, onDelete = ForeignKeyAction.Cascade) - - // Foreign key to variant_v2 table - def variantFK = foreignKey("variant_fk", variantId, TableQuery[VariantV2Table])(_.variantId, onDelete = ForeignKeyAction.Cascade) - - def uniqueHaplogroupVariant = index("unique_haplogroup_variant", (haplogroupId, variantId), unique = true) -} diff --git a/app/models/dal/domain/haplogroups/HaplogroupsTable.scala b/app/models/dal/domain/haplogroups/HaplogroupsTable.scala deleted file mode 100644 index ab8e018e..00000000 --- a/app/models/dal/domain/haplogroups/HaplogroupsTable.scala +++ /dev/null @@ -1,81 +0,0 @@ -package models.dal.domain.haplogroups - -import models.HaplogroupType -import models.dal.MyPostgresProfile.api.* -import models.domain.haplogroups.{Haplogroup, HaplogroupProvenance} -import slick.ast.TypedType -import slick.lifted.{MappedProjection, ProvenShape} - -import java.time.LocalDateTime - -/** - * Represents the Slick mapping for the `haplogroup` table in the database. Each row in this table corresponds - * to a `Haplogroup` entity. The table captures information about genetic haplogroups, which are groups of populations - * that share a common ancestor through either paternal (Y-DNA) or maternal (mtDNA) lineage. - * - * @constructor Creates a new instance of the HaplogroupsTable. - * @param tag A Slick `Tag` object that contains meta-information used internally by Slick for query construction. - * - * Table columns: - * - `haplogroupId`: Auto-incrementing primary key for the haplogroup (integer). - * - `name`: The name of the haplogroup (string). - * - `lineage`: Optional lineage description of the haplogroup (string). - * - `description`: Optional textual description of the haplogroup (string). - * - `haplogroupType`: The type of haplogroup (e.g., Y-DNA or mtDNA) stored as a string. - * - `revisionId`: An integer indicating the revision or version of the haplogroup data. - * - `source`: The data source or origin of the haplogroup information (string). - * - `confidenceLevel`: A string representing the confidence level of the haplogroup assignment. - * - `validFrom`: The timestamp indicating when this haplogroup record becomes valid. - * - `validUntil`: Optional timestamp indicating when this haplogroup record becomes invalid. - * - * Relationship: - * This table contains all relevant metadata and structural information for managing haplogroups within an application or database. - * - * Slick mapping: - * The `*` projection maps table rows to the `Haplogroup` case class. - */ -class HaplogroupsTable(tag: Tag) extends Table[Haplogroup](tag, Some("tree"), "haplogroup") { - - def haplogroupId = column[Int]("haplogroup_id", O.PrimaryKey, O.AutoInc) - - def name = column[String]("name") - - def lineage = column[Option[String]]("lineage") - - def description = column[Option[String]]("description") - - def haplogroupType = column[HaplogroupType]("haplogroup_type") - - def revisionId = column[Int]("revision_id") - - def source = column[String]("source") - - def confidenceLevel = column[String]("confidence_level") - - def validFrom = column[LocalDateTime]("valid_from") - - def validUntil = column[Option[LocalDateTime]]("valid_until") - - // Branch age estimate columns - def formedYbp = column[Option[Int]]("formed_ybp") - - def formedYbpLower = column[Option[Int]]("formed_ybp_lower") - - def formedYbpUpper = column[Option[Int]]("formed_ybp_upper") - - def tmrcaYbp = column[Option[Int]]("tmrca_ybp") - - def tmrcaYbpLower = column[Option[Int]]("tmrca_ybp_lower") - - def tmrcaYbpUpper = column[Option[Int]]("tmrca_ybp_upper") - - def ageEstimateSource = column[Option[String]]("age_estimate_source") - - // Multi-source provenance tracking (JSONB) - def provenance = column[Option[HaplogroupProvenance]]("provenance") - - def * = ( - haplogroupId.?, name, lineage, description, haplogroupType, revisionId, source, confidenceLevel, validFrom, validUntil, - formedYbp, formedYbpLower, formedYbpUpper, tmrcaYbp, tmrcaYbpLower, tmrcaYbpUpper, ageEstimateSource, provenance - ).mapTo[Haplogroup] -} diff --git a/app/models/dal/domain/haplogroups/RelationshipRevisionMetadataTable.scala b/app/models/dal/domain/haplogroups/RelationshipRevisionMetadataTable.scala deleted file mode 100644 index 844d8769..00000000 --- a/app/models/dal/domain/haplogroups/RelationshipRevisionMetadataTable.scala +++ /dev/null @@ -1,67 +0,0 @@ -package models.dal.domain.haplogroups - -import models.domain.haplogroups.RelationshipRevisionMetadata -import models.dal.MyPostgresProfile.api.* - -import java.time.LocalDateTime - -/** - * Represents the table definition for storing and managing metadata about - * revisions made to haplogroup relationships. Each entry captures details - * about a specific revision, including the author, timestamp, and a description - * of the changes made. It also includes references to the affected haplogroup - * relationship and optionally to the previous revision. - * - * Table schema details: - * - Table name: `relationship_revision_metadata` - * - Primary key: (`haplogroup_relationship_id`, `revision_id`) - * - Foreign keys: - * - `haplogroup_relationship_id` references `HaplogroupRelationshipsTable(haplogroupRelationshipId)` - * - * Columns: - * - `haplogroup_relationship_id`: The ID of the haplogroup relationship being revised. - * - `revision_id`: A unique identifier for the specific revision. - * - `author`: The name or identifier of the individual or entity that authored the revision. - * - `timestamp`: The time at which the revision was created. - * - `comment`: A descriptive comment explaining the details or purpose of the revision. - * - `change_type`: A string indicating the type of change made (e.g., "update", "create", "delete"). - * - `previous_revision_id`: An optional reference to the ID of the previous revision, if applicable. - * - * This table is intended to provide traceability and context for changes made over time to - * the haplogroup relationship data, supporting historical comparisons and auditing requirements. - */ -class RelationshipRevisionMetadataTable(tag: Tag) extends Table[RelationshipRevisionMetadata](tag, Some("tree"), "relationship_revision_metadata") { - def haplogroup_relationship_id = column[Int]("haplogroup_relationship_id") - - def revisionId = column[Int]("revision_id") - - def author = column[String]("author") - - def timestamp = column[LocalDateTime]("timestamp") - - def comment = column[String]("comment") - - def changeType = column[String]("change_type") - - def previousRevisionId = column[Option[Int]]("previous_revision_id") - - def pk = primaryKey("pk_relationship_revision_metadata", (haplogroup_relationship_id, revisionId)) - - def relationshipFk = foreignKey( - "fk_relationship", - haplogroup_relationship_id, - TableQuery[HaplogroupRelationshipsTable])(_.haplogroupRelationshipId) - - def * = ( - haplogroup_relationship_id, - revisionId, - author, - timestamp, - comment, - changeType, - previousRevisionId - ).mapTo[RelationshipRevisionMetadata] -} - -val relationshipRevisionMetadata = TableQuery[RelationshipRevisionMetadataTable] - diff --git a/app/models/dal/domain/haplogroups/TreeVersioningTables.scala b/app/models/dal/domain/haplogroups/TreeVersioningTables.scala deleted file mode 100644 index 6825204d..00000000 --- a/app/models/dal/domain/haplogroups/TreeVersioningTables.scala +++ /dev/null @@ -1,227 +0,0 @@ -package models.dal.domain.haplogroups - -import models.HaplogroupType -import models.dal.MyPostgresProfile.api.* -import models.domain.haplogroups.{ChangeSetStatus, ChangeStatus, TreeChangeType} -import slick.lifted.ProvenShape - -import java.time.LocalDateTime - -/** - * Slick table definitions for the Tree Versioning System. - * - * Supports Production/WIP tree versioning for bulk merge operations. - */ - -// ============================================================================ -// Row Case Classes -// ============================================================================ - -case class ChangeSetRow( - id: Option[Int], - haplogroupType: HaplogroupType, - name: String, - description: Option[String], - sourceName: String, - createdAt: LocalDateTime, - createdBy: String, - finalizedAt: Option[LocalDateTime], - appliedAt: Option[LocalDateTime], - appliedBy: Option[String], - discardedAt: Option[LocalDateTime], - discardedBy: Option[String], - discardReason: Option[String], - status: String, - nodesProcessed: Int, - nodesCreated: Int, - nodesUpdated: Int, - nodesUnchanged: Int, - variantsAdded: Int, - relationshipsCreated: Int, - relationshipsUpdated: Int, - splitOperations: Int, - ambiguityCount: Int, - ambiguityReportPath: Option[String], - metadata: Option[String] -) - -case class TreeChangeRow( - id: Option[Int], - changeSetId: Int, - changeType: String, - haplogroupId: Option[Int], - variantId: Option[Int], - oldParentId: Option[Int], - newParentId: Option[Int], - haplogroupData: Option[String], - oldData: Option[String], - createdHaplogroupId: Option[Int], - sequenceNum: Int, - status: String, - reviewedAt: Option[LocalDateTime], - reviewedBy: Option[String], - reviewNotes: Option[String], - createdAt: LocalDateTime, - appliedAt: Option[LocalDateTime], - ambiguityType: Option[String], - ambiguityConfidence: Option[Double] -) - -case class ChangeSetCommentRow( - id: Option[Int], - changeSetId: Int, - treeChangeId: Option[Int], - author: String, - content: String, - createdAt: LocalDateTime, - updatedAt: Option[LocalDateTime] -) - -// ============================================================================ -// Table Definitions -// ============================================================================ - -class ChangeSetsTable(tag: Tag) extends Table[ChangeSetRow](tag, Some("tree"), "change_set") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def haplogroupType = column[HaplogroupType]("haplogroup_type") - def name = column[String]("name") - def description = column[Option[String]]("description") - def sourceName = column[String]("source_name") - def createdAt = column[LocalDateTime]("created_at") - def createdBy = column[String]("created_by") - def finalizedAt = column[Option[LocalDateTime]]("finalized_at") - def appliedAt = column[Option[LocalDateTime]]("applied_at") - def appliedBy = column[Option[String]]("applied_by") - def discardedAt = column[Option[LocalDateTime]]("discarded_at") - def discardedBy = column[Option[String]]("discarded_by") - def discardReason = column[Option[String]]("discard_reason") - def status = column[String]("status") - def nodesProcessed = column[Int]("nodes_processed") - def nodesCreated = column[Int]("nodes_created") - def nodesUpdated = column[Int]("nodes_updated") - def nodesUnchanged = column[Int]("nodes_unchanged") - def variantsAdded = column[Int]("variants_added") - def relationshipsCreated = column[Int]("relationships_created") - def relationshipsUpdated = column[Int]("relationships_updated") - def splitOperations = column[Int]("split_operations") - def ambiguityCount = column[Int]("ambiguity_count") - def ambiguityReportPath = column[Option[String]]("ambiguity_report_path") - def metadata = column[Option[String]]("metadata") - - // Split into nested tuples to work around 22-column limit - private type CoreFields = (Option[Int], HaplogroupType, String, Option[String], String, - LocalDateTime, String, Option[LocalDateTime], Option[LocalDateTime], Option[String], - Option[LocalDateTime], Option[String], Option[String], String) - private type StatsFields = (Int, Int, Int, Int, Int, Int, Int, Int, Int, Option[String], Option[String]) - - def * : ProvenShape[ChangeSetRow] = ( - (id.?, haplogroupType, name, description, sourceName, createdAt, createdBy, - finalizedAt, appliedAt, appliedBy, discardedAt, discardedBy, discardReason, status), - (nodesProcessed, nodesCreated, nodesUpdated, nodesUnchanged, variantsAdded, - relationshipsCreated, relationshipsUpdated, splitOperations, ambiguityCount, - ambiguityReportPath, metadata) - ).<>( - { case (core: CoreFields, stats: StatsFields) => - ChangeSetRow( - core._1, core._2, core._3, core._4, core._5, core._6, core._7, - core._8, core._9, core._10, core._11, core._12, core._13, core._14, - stats._1, stats._2, stats._3, stats._4, stats._5, stats._6, stats._7, - stats._8, stats._9, stats._10, stats._11 - ) - }, - { (row: ChangeSetRow) => - Some(( - (row.id, row.haplogroupType, row.name, row.description, row.sourceName, row.createdAt, - row.createdBy, row.finalizedAt, row.appliedAt, row.appliedBy, row.discardedAt, - row.discardedBy, row.discardReason, row.status), - (row.nodesProcessed, row.nodesCreated, row.nodesUpdated, row.nodesUnchanged, row.variantsAdded, - row.relationshipsCreated, row.relationshipsUpdated, row.splitOperations, row.ambiguityCount, - row.ambiguityReportPath, row.metadata) - )) - } - ) - - def nameIdx = index("idx_change_set_name", (haplogroupType, name), unique = true) - def statusIdx = index("idx_change_set_status", status) - def typeIdx = index("idx_change_set_type", haplogroupType) -} - -class TreeChangesTable(tag: Tag) extends Table[TreeChangeRow](tag, Some("tree"), "tree_change") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def changeSetId = column[Int]("change_set_id") - def changeType = column[String]("change_type") - def haplogroupId = column[Option[Int]]("haplogroup_id") - def variantId = column[Option[Int]]("variant_id") - def oldParentId = column[Option[Int]]("old_parent_id") - def newParentId = column[Option[Int]]("new_parent_id") - def haplogroupData = column[Option[String]]("haplogroup_data") - def oldData = column[Option[String]]("old_data") - def createdHaplogroupId = column[Option[Int]]("created_haplogroup_id") - def sequenceNum = column[Int]("sequence_num") - def status = column[String]("status") - def reviewedAt = column[Option[LocalDateTime]]("reviewed_at") - def reviewedBy = column[Option[String]]("reviewed_by") - def reviewNotes = column[Option[String]]("review_notes") - def createdAt = column[LocalDateTime]("created_at") - def appliedAt = column[Option[LocalDateTime]]("applied_at") - def ambiguityType = column[Option[String]]("ambiguity_type") - def ambiguityConfidence = column[Option[Double]]("ambiguity_confidence") - - def * : ProvenShape[TreeChangeRow] = ( - id.?, - changeSetId, - changeType, - haplogroupId, - variantId, - oldParentId, - newParentId, - haplogroupData, - oldData, - createdHaplogroupId, - sequenceNum, - status, - reviewedAt, - reviewedBy, - reviewNotes, - createdAt, - appliedAt, - ambiguityType, - ambiguityConfidence - ).mapTo[TreeChangeRow] - - def changeSetIdx = index("idx_tree_change_set", changeSetId) - def haplogroupIdx = index("idx_tree_change_hg", haplogroupId) - def statusIdx = index("idx_tree_change_status", status) - def seqIdx = index("idx_tree_change_seq", (changeSetId, sequenceNum)) - - def changeSetFk = foreignKey("fk_tree_change_set", changeSetId, TableQuery[ChangeSetsTable])(_.id, onDelete = ForeignKeyAction.Cascade) -} - -class ChangeSetCommentsTable(tag: Tag) extends Table[ChangeSetCommentRow](tag, Some("tree"), "change_set_comment") { - - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def changeSetId = column[Int]("change_set_id") - def treeChangeId = column[Option[Int]]("tree_change_id") - def author = column[String]("author") - def content = column[String]("content") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[Option[LocalDateTime]]("updated_at") - - def * : ProvenShape[ChangeSetCommentRow] = ( - id.?, - changeSetId, - treeChangeId, - author, - content, - createdAt, - updatedAt - ).mapTo[ChangeSetCommentRow] - - def changeSetIdx = index("idx_change_set_comment_set", changeSetId) - def treeChangeIdx = index("idx_change_set_comment_change", treeChangeId) - - def changeSetFk = foreignKey("fk_comment_change_set", changeSetId, TableQuery[ChangeSetsTable])(_.id, onDelete = ForeignKeyAction.Cascade) - def treeChangeFk = foreignKey("fk_comment_tree_change", treeChangeId, TableQuery[TreeChangesTable])(_.id.?, onDelete = ForeignKeyAction.Cascade) -} diff --git a/app/models/dal/domain/haplogroups/WipTreeTables.scala b/app/models/dal/domain/haplogroups/WipTreeTables.scala deleted file mode 100644 index f103a86e..00000000 --- a/app/models/dal/domain/haplogroups/WipTreeTables.scala +++ /dev/null @@ -1,273 +0,0 @@ -package models.dal.domain.haplogroups - -import models.HaplogroupType -import models.dal.MyPostgresProfile.api.* -import models.domain.haplogroups.HaplogroupProvenance -import slick.lifted.ProvenShape - -import java.time.LocalDateTime - -/** - * Slick table definitions for WIP (Work In Progress) tree staging tables. - * - * These tables hold staged tree changes during merge operations before - * they are applied to production. Each table is scoped by change_set_id - * for easy cleanup on discard. - */ - -// ============================================================================ -// Row Case Classes -// ============================================================================ - -case class WipHaplogroupRow( - id: Option[Int], - changeSetId: Int, - placeholderId: Int, - name: String, - lineage: Option[String], - description: Option[String], - haplogroupType: HaplogroupType, - source: String, - confidenceLevel: String, - formedYbp: Option[Int], - formedYbpLower: Option[Int], - formedYbpUpper: Option[Int], - tmrcaYbp: Option[Int], - tmrcaYbpLower: Option[Int], - tmrcaYbpUpper: Option[Int], - ageEstimateSource: Option[String], - provenance: Option[HaplogroupProvenance], // JSONB via custom column mapper - createdAt: LocalDateTime -) - -case class WipRelationshipRow( - id: Option[Int], - changeSetId: Int, - childHaplogroupId: Option[Int], - childPlaceholderId: Option[Int], - parentHaplogroupId: Option[Int], - parentPlaceholderId: Option[Int], - source: String, - createdAt: LocalDateTime -) - -case class WipHaplogroupVariantRow( - id: Option[Int], - changeSetId: Int, - haplogroupId: Option[Int], - haplogroupPlaceholderId: Option[Int], - variantId: Int, - source: Option[String], - createdAt: LocalDateTime -) - -case class WipReparentRow( - id: Option[Int], - changeSetId: Int, - haplogroupId: Int, - oldParentId: Option[Int], - newParentId: Option[Int], - newParentPlaceholderId: Option[Int], - source: String, - createdAt: LocalDateTime -) - -/** - * Resolution types for curator conflict corrections. - */ -object ResolutionType extends Enumeration { - type ResolutionType = Value - val Reparent, EditVariants, MergeExisting, Defer = Value - - def fromString(s: String): ResolutionType = s.toUpperCase match { - case "REPARENT" => Reparent - case "EDIT_VARIANTS" => EditVariants - case "MERGE_EXISTING" => MergeExisting - case "DEFER" => Defer - case _ => throw new IllegalArgumentException(s"Unknown resolution type: $s") - } - - def toDbString(rt: ResolutionType): String = rt match { - case Reparent => "REPARENT" - case EditVariants => "EDIT_VARIANTS" - case MergeExisting => "MERGE_EXISTING" - case Defer => "DEFER" - } -} - -object ResolutionStatus extends Enumeration { - type ResolutionStatus = Value - val Pending, Applied, Cancelled = Value - - def fromString(s: String): ResolutionStatus = s.toUpperCase match { - case "PENDING" => Pending - case "APPLIED" => Applied - case "CANCELLED" => Cancelled - case _ => throw new IllegalArgumentException(s"Unknown resolution status: $s") - } - - def toDbString(rs: ResolutionStatus): String = rs match { - case Pending => "PENDING" - case Applied => "APPLIED" - case Cancelled => "CANCELLED" - } -} - -object DeferPriority extends Enumeration { - type DeferPriority = Value - val Low, Normal, High, Critical = Value - - def fromString(s: String): DeferPriority = s.toUpperCase match { - case "LOW" => Low - case "NORMAL" => Normal - case "HIGH" => High - case "CRITICAL" => Critical - case _ => Normal - } - - def toDbString(dp: DeferPriority): String = dp match { - case Low => "LOW" - case Normal => "NORMAL" - case High => "HIGH" - case Critical => "CRITICAL" - } -} - -case class WipResolutionRow( - id: Option[Int], - changeSetId: Int, - wipHaplogroupId: Option[Int], - wipReparentId: Option[Int], - resolutionType: String, - // REPARENT fields - newParentId: Option[Int], - newParentPlaceholderId: Option[Int], - // MERGE_EXISTING fields - mergeTargetId: Option[Int], - // EDIT_VARIANTS fields (stored as JSON arrays) - variantsToAdd: Option[String], // JSON array of variant IDs - variantsToRemove: Option[String], // JSON array of variant IDs - // DEFER fields - deferReason: Option[String], - deferPriority: String, - // Curator tracking - curatorId: String, - curatorNotes: Option[String], - // Status - status: String, - createdAt: LocalDateTime, - appliedAt: Option[LocalDateTime] -) - -// ============================================================================ -// Table Definitions -// ============================================================================ - -class WipHaplogroupTable(tag: Tag) extends Table[WipHaplogroupRow](tag, Some("tree"), "wip_haplogroup") { - def id = column[Int]("wip_haplogroup_id", O.PrimaryKey, O.AutoInc) - def changeSetId = column[Int]("change_set_id") - def placeholderId = column[Int]("placeholder_id") - def name = column[String]("name") - def lineage = column[Option[String]]("lineage") - def description = column[Option[String]]("description") - def haplogroupType = column[HaplogroupType]("haplogroup_type") - def source = column[String]("source") - def confidenceLevel = column[String]("confidence_level") - def formedYbp = column[Option[Int]]("formed_ybp") - def formedYbpLower = column[Option[Int]]("formed_ybp_lower") - def formedYbpUpper = column[Option[Int]]("formed_ybp_upper") - def tmrcaYbp = column[Option[Int]]("tmrca_ybp") - def tmrcaYbpLower = column[Option[Int]]("tmrca_ybp_lower") - def tmrcaYbpUpper = column[Option[Int]]("tmrca_ybp_upper") - def ageEstimateSource = column[Option[String]]("age_estimate_source") - def provenance = column[Option[HaplogroupProvenance]]("provenance") - def createdAt = column[LocalDateTime]("created_at") - - def * : ProvenShape[WipHaplogroupRow] = ( - id.?, changeSetId, placeholderId, name, lineage, description, haplogroupType, - source, confidenceLevel, formedYbp, formedYbpLower, formedYbpUpper, - tmrcaYbp, tmrcaYbpLower, tmrcaYbpUpper, ageEstimateSource, provenance, createdAt - ).mapTo[WipHaplogroupRow] - - def changeSetFk = foreignKey("wip_haplogroup_change_set_fk", changeSetId, TableQuery[ChangeSetsTable])(_.id) -} - -class WipRelationshipTable(tag: Tag) extends Table[WipRelationshipRow](tag, Some("tree"), "wip_haplogroup_relationship") { - def id = column[Int]("wip_relationship_id", O.PrimaryKey, O.AutoInc) - def changeSetId = column[Int]("change_set_id") - def childHaplogroupId = column[Option[Int]]("child_haplogroup_id") - def childPlaceholderId = column[Option[Int]]("child_placeholder_id") - def parentHaplogroupId = column[Option[Int]]("parent_haplogroup_id") - def parentPlaceholderId = column[Option[Int]]("parent_placeholder_id") - def source = column[String]("source") - def createdAt = column[LocalDateTime]("created_at") - - def * : ProvenShape[WipRelationshipRow] = ( - id.?, changeSetId, childHaplogroupId, childPlaceholderId, - parentHaplogroupId, parentPlaceholderId, source, createdAt - ).mapTo[WipRelationshipRow] - - def changeSetFk = foreignKey("wip_relationship_change_set_fk", changeSetId, TableQuery[ChangeSetsTable])(_.id) -} - -class WipHaplogroupVariantTable(tag: Tag) extends Table[WipHaplogroupVariantRow](tag, Some("tree"), "wip_haplogroup_variant") { - def id = column[Int]("wip_haplogroup_variant_id", O.PrimaryKey, O.AutoInc) - def changeSetId = column[Int]("change_set_id") - def haplogroupId = column[Option[Int]]("haplogroup_id") - def haplogroupPlaceholderId = column[Option[Int]]("haplogroup_placeholder_id") - def variantId = column[Int]("variant_id") - def source = column[Option[String]]("source") - def createdAt = column[LocalDateTime]("created_at") - - def * : ProvenShape[WipHaplogroupVariantRow] = ( - id.?, changeSetId, haplogroupId, haplogroupPlaceholderId, variantId, source, createdAt - ).mapTo[WipHaplogroupVariantRow] - - def changeSetFk = foreignKey("wip_variant_change_set_fk", changeSetId, TableQuery[ChangeSetsTable])(_.id) -} - -class WipReparentTable(tag: Tag) extends Table[WipReparentRow](tag, Some("tree"), "wip_reparent") { - def id = column[Int]("wip_reparent_id", O.PrimaryKey, O.AutoInc) - def changeSetId = column[Int]("change_set_id") - def haplogroupId = column[Int]("haplogroup_id") - def oldParentId = column[Option[Int]]("old_parent_id") - def newParentId = column[Option[Int]]("new_parent_id") - def newParentPlaceholderId = column[Option[Int]]("new_parent_placeholder_id") - def source = column[String]("source") - def createdAt = column[LocalDateTime]("created_at") - - def * : ProvenShape[WipReparentRow] = ( - id.?, changeSetId, haplogroupId, oldParentId, newParentId, newParentPlaceholderId, source, createdAt - ).mapTo[WipReparentRow] - - def changeSetFk = foreignKey("wip_reparent_change_set_fk", changeSetId, TableQuery[ChangeSetsTable])(_.id) -} - -class WipResolutionTable(tag: Tag) extends Table[WipResolutionRow](tag, Some("tree"), "wip_resolution") { - def id = column[Int]("resolution_id", O.PrimaryKey, O.AutoInc) - def changeSetId = column[Int]("change_set_id") - def wipHaplogroupId = column[Option[Int]]("wip_haplogroup_id") - def wipReparentId = column[Option[Int]]("wip_reparent_id") - def resolutionType = column[String]("resolution_type") - def newParentId = column[Option[Int]]("new_parent_id") - def newParentPlaceholderId = column[Option[Int]]("new_parent_placeholder_id") - def mergeTargetId = column[Option[Int]]("merge_target_id") - def variantsToAdd = column[Option[String]]("variants_to_add") - def variantsToRemove = column[Option[String]]("variants_to_remove") - def deferReason = column[Option[String]]("defer_reason") - def deferPriority = column[String]("defer_priority") - def curatorId = column[String]("curator_id") - def curatorNotes = column[Option[String]]("curator_notes") - def status = column[String]("status") - def createdAt = column[LocalDateTime]("created_at") - def appliedAt = column[Option[LocalDateTime]]("applied_at") - - def * : ProvenShape[WipResolutionRow] = ( - id.?, changeSetId, wipHaplogroupId, wipReparentId, resolutionType, - newParentId, newParentPlaceholderId, mergeTargetId, - variantsToAdd, variantsToRemove, deferReason, deferPriority, - curatorId, curatorNotes, status, createdAt, appliedAt - ).mapTo[WipResolutionRow] - - def changeSetFk = foreignKey("wip_resolution_change_set_fk", changeSetId, TableQuery[ChangeSetsTable])(_.id) -} diff --git a/app/models/dal/domain/ibd/IbdDiscoveryIndicesTable.scala b/app/models/dal/domain/ibd/IbdDiscoveryIndicesTable.scala deleted file mode 100644 index 57939e10..00000000 --- a/app/models/dal/domain/ibd/IbdDiscoveryIndicesTable.scala +++ /dev/null @@ -1,54 +0,0 @@ -package models.dal.domain.ibd - -import models.dal.MyPostgresProfile.api.* -import models.domain.ibd.IbdDiscoveryIndex - -import java.time.ZonedDateTime -import java.util.UUID - -class IbdDiscoveryIndicesTable(tag: Tag) extends Table[IbdDiscoveryIndex](tag, "ibd_discovery_index") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def sampleGuid1 = column[UUID]("sample_guid_1") - - def sampleGuid2 = column[UUID]("sample_guid_2") - - def pangenomeGraphId = column[Int]("pangenome_graph_id") - - def matchRegionType = column[String]("match_region_type") - - def totalSharedCmApprox = column[Option[Double]]("total_shared_cm_approx") - - def numSharedSegmentsApprox = column[Option[Int]]("num_shared_segments_approx") - - def isPubliclyDiscoverable = column[Boolean]("is_publicly_discoverable") - - def consensusStatus = column[String]("consensus_status") - - def lastConsensusUpdate = column[ZonedDateTime]("last_consensus_update") - - def validationServiceGuid = column[Option[UUID]]("validation_service_guid") - - def validationTimestamp = column[Option[ZonedDateTime]]("validation_timestamp") - - def indexedByService = column[Option[String]]("indexed_by_service") - - def indexedDate = column[ZonedDateTime]("indexed_date") - - def * = ( - id.?, - sampleGuid1, - sampleGuid2, - pangenomeGraphId, - matchRegionType, - totalSharedCmApprox, - numSharedSegmentsApprox, - isPubliclyDiscoverable, - consensusStatus, - lastConsensusUpdate, - validationServiceGuid, - validationTimestamp, - indexedByService, - indexedDate - ).mapTo[IbdDiscoveryIndex] -} \ No newline at end of file diff --git a/app/models/dal/domain/ibd/IbdPdsAttestationsTable.scala b/app/models/dal/domain/ibd/IbdPdsAttestationsTable.scala deleted file mode 100644 index a60f97ef..00000000 --- a/app/models/dal/domain/ibd/IbdPdsAttestationsTable.scala +++ /dev/null @@ -1,42 +0,0 @@ -package models.dal.domain.ibd - -import models.dal.MyPostgresProfile.api.* -import models.domain.ibd.IbdPdsAttestation - -import java.time.ZonedDateTime -import java.util.UUID - -class IbdPdsAttestationsTable(tag: Tag) extends Table[IbdPdsAttestation](tag, "ibd_pds_attestation") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def ibdDiscoveryIndexId = column[Long]("ibd_discovery_index_id") - - def attestingPdsGuid = column[UUID]("attesting_pds_guid") - - def attestingSampleGuid = column[UUID]("attesting_sample_guid") - - def attestationTimestamp = column[ZonedDateTime]("attestation_timestamp") - - def attestationSignature = column[String]("attestation_signature") - - def matchSummaryHash = column[String]("match_summary_hash") - - def attestationType = column[String]("attestation_type") // CHECK constraint handled by DB - - def attestationNotes = column[Option[String]]("attestation_notes") - - // Define the composite unique constraint - def uniqueAttestation = index("idx_unique_pds_attestation", (ibdDiscoveryIndexId, attestingPdsGuid, attestationType), unique = true) - - def * = ( - id.?, - ibdDiscoveryIndexId, - attestingPdsGuid, - attestingSampleGuid, - attestationTimestamp, - attestationSignature, - matchSummaryHash, - attestationType, - attestationNotes - ).mapTo[IbdPdsAttestation] -} \ No newline at end of file diff --git a/app/models/dal/domain/ibd/MatchConsentTrackingTable.scala b/app/models/dal/domain/ibd/MatchConsentTrackingTable.scala deleted file mode 100644 index 8a457a51..00000000 --- a/app/models/dal/domain/ibd/MatchConsentTrackingTable.scala +++ /dev/null @@ -1,34 +0,0 @@ -package models.dal.domain.ibd - -import models.dal.MyPostgresProfile.api.* -import models.domain.ibd.MatchConsentTracking -import play.api.libs.json.JsValue - -import java.time.ZonedDateTime -import java.util.UUID - -class MatchConsentTrackingTable(tag: Tag) extends Table[MatchConsentTracking](tag, "match_consent_tracking") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - def atUri = column[String]("at_uri") - def consentingDid = column[String]("consenting_did") - def sampleGuid = column[UUID]("sample_guid") - def consentLevel = column[String]("consent_level") - def allowedMatchTypes = column[Option[JsValue]]("allowed_match_types") - def shareContactInfo = column[Boolean]("share_contact_info") - def consentedAt = column[ZonedDateTime]("consented_at") - def expiresAt = column[Option[ZonedDateTime]]("expires_at") - def revokedAt = column[Option[ZonedDateTime]]("revoked_at") - - def * = ( - id.?, - atUri, - consentingDid, - sampleGuid, - consentLevel, - allowedMatchTypes, - shareContactInfo, - consentedAt, - expiresAt, - revokedAt - ).mapTo[MatchConsentTracking] -} diff --git a/app/models/dal/domain/ibd/MatchRequestTrackingTable.scala b/app/models/dal/domain/ibd/MatchRequestTrackingTable.scala deleted file mode 100644 index 0075b510..00000000 --- a/app/models/dal/domain/ibd/MatchRequestTrackingTable.scala +++ /dev/null @@ -1,42 +0,0 @@ -package models.dal.domain.ibd - -import models.dal.MyPostgresProfile.api.* -import models.domain.ibd.MatchRequestTracking -import play.api.libs.json.JsValue - -import java.time.ZonedDateTime -import java.util.UUID - -class MatchRequestTrackingTable(tag: Tag) extends Table[MatchRequestTracking](tag, "match_request_tracking") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - def atUri = column[String]("at_uri") - def requesterDid = column[String]("requester_did") - def targetDid = column[Option[String]]("target_did") - def fromSampleGuid = column[UUID]("from_sample_guid") - def toSampleGuid = column[UUID]("to_sample_guid") - def requestType = column[String]("request_type") - def status = column[String]("status") - def discoveryReason = column[Option[JsValue]]("discovery_reason") - def message = column[Option[String]]("message") - def createdAt = column[ZonedDateTime]("created_at") - def updatedAt = column[ZonedDateTime]("updated_at") - def expiresAt = column[Option[ZonedDateTime]]("expires_at") - def completedAt = column[Option[ZonedDateTime]]("completed_at") - - def * = ( - id.?, - atUri, - requesterDid, - targetDid, - fromSampleGuid, - toSampleGuid, - requestType, - status, - discoveryReason, - message, - createdAt, - updatedAt, - expiresAt, - completedAt - ).mapTo[MatchRequestTracking] -} diff --git a/app/models/dal/domain/ibd/MatchSuggestionsTable.scala b/app/models/dal/domain/ibd/MatchSuggestionsTable.scala deleted file mode 100644 index 6e8e877a..00000000 --- a/app/models/dal/domain/ibd/MatchSuggestionsTable.scala +++ /dev/null @@ -1,32 +0,0 @@ -package models.dal.domain.ibd - -import models.dal.MyPostgresProfile.api.* -import models.domain.ibd.MatchSuggestion -import play.api.libs.json.JsValue - -import java.time.ZonedDateTime -import java.util.UUID - -class MatchSuggestionsTable(tag: Tag) extends Table[MatchSuggestion](tag, "match_suggestion") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - def targetSampleGuid = column[UUID]("target_sample_guid") - def suggestedSampleGuid = column[UUID]("suggested_sample_guid") - def suggestionType = column[String]("suggestion_type") - def score = column[Double]("score") - def metadata = column[Option[JsValue]]("metadata") - def status = column[String]("status") - def createdAt = column[ZonedDateTime]("created_at") - def expiresAt = column[Option[ZonedDateTime]]("expires_at") - - def * = ( - id.?, - targetSampleGuid, - suggestedSampleGuid, - suggestionType, - score, - metadata, - status, - createdAt, - expiresAt - ).mapTo[MatchSuggestion] -} diff --git a/app/models/dal/domain/ibd/PopulationBreakdownCacheTable.scala b/app/models/dal/domain/ibd/PopulationBreakdownCacheTable.scala deleted file mode 100644 index adf01805..00000000 --- a/app/models/dal/domain/ibd/PopulationBreakdownCacheTable.scala +++ /dev/null @@ -1,26 +0,0 @@ -package models.dal.domain.ibd - -import models.dal.MyPostgresProfile.api.* -import models.domain.ibd.PopulationBreakdownCache -import play.api.libs.json.JsValue - -import java.time.ZonedDateTime -import java.util.UUID - -class PopulationBreakdownCacheTable(tag: Tag) extends Table[PopulationBreakdownCache](tag, "population_breakdown_cache") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - def sampleGuid = column[UUID]("sample_guid") - def breakdown = column[JsValue]("breakdown") - def breakdownHash = column[String]("breakdown_hash") - def cachedAt = column[ZonedDateTime]("cached_at") - def sourceAtUri = column[Option[String]]("source_at_uri") - - def * = ( - id.?, - sampleGuid, - breakdown, - breakdownHash, - cachedAt, - sourceAtUri - ).mapTo[PopulationBreakdownCache] -} diff --git a/app/models/dal/domain/ibd/PopulationOverlapScoresTable.scala b/app/models/dal/domain/ibd/PopulationOverlapScoresTable.scala deleted file mode 100644 index 93cb25b2..00000000 --- a/app/models/dal/domain/ibd/PopulationOverlapScoresTable.scala +++ /dev/null @@ -1,23 +0,0 @@ -package models.dal.domain.ibd - -import models.dal.MyPostgresProfile.api.* -import models.domain.ibd.PopulationOverlapScore - -import java.time.ZonedDateTime -import java.util.UUID - -class PopulationOverlapScoresTable(tag: Tag) extends Table[PopulationOverlapScore](tag, "population_overlap_score") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - def sampleGuid1 = column[UUID]("sample_guid_1") - def sampleGuid2 = column[UUID]("sample_guid_2") - def overlapScore = column[Double]("overlap_score") - def computedAt = column[ZonedDateTime]("computed_at") - - def * = ( - id.?, - sampleGuid1, - sampleGuid2, - overlapScore, - computedAt - ).mapTo[PopulationOverlapScore] -} diff --git a/app/models/dal/domain/pangenome/CanonicalPangenomeVariantsTable.scala b/app/models/dal/domain/pangenome/CanonicalPangenomeVariantsTable.scala deleted file mode 100644 index 23eb0e10..00000000 --- a/app/models/dal/domain/pangenome/CanonicalPangenomeVariantsTable.scala +++ /dev/null @@ -1,55 +0,0 @@ -package models.dal.domain.pangenome - -import models.dal.MyPostgresProfile.api.* -import models.domain.pangenome.CanonicalPangenomeVariant - -import java.time.ZonedDateTime - -class CanonicalPangenomeVariantsTable(tag: Tag) extends Table[CanonicalPangenomeVariant](tag, "canonical_pangenome_variant") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def pangenomeGraphId = column[Long]("pangenome_graph_id") - - def variantType = column[String]("variant_type") - - def variantNodes = column[List[Int]]("variant_nodes") - - def variantEdges = column[List[Int]]("variant_edges") - - def referencePathId = column[Option[Long]]("reference_path_id") - - def referenceStartPosition = column[Option[Int]]("reference_start_position") - - def referenceEndPosition = column[Option[Int]]("reference_end_position") - - def referenceAlleleSequence = column[Option[String]]("reference_allele_sequence") - - def alternateAlleleSequence = column[Option[String]]("alternate_allele_sequence") - - def canonicalHash = column[String]("canonical_hash") - - def description = column[Option[String]]("description") - - def creationDate = column[ZonedDateTime]("creation_date") - - def * = ( - id.?, - pangenomeGraphId, - variantType, - variantNodes, - variantEdges, - referencePathId, - referenceStartPosition, - referenceEndPosition, - referenceAlleleSequence, - alternateAlleleSequence, - canonicalHash, - description, - creationDate - ).mapTo[CanonicalPangenomeVariant] - - def graphFk = foreignKey("fk_variant_graph", pangenomeGraphId, TableQuery[PangenomeGraphsTable])(_.id) - - def pathFk = foreignKey("fk_variant_path", referencePathId, TableQuery[PangenomePathsTable])(_.id.?) -} - diff --git a/app/models/dal/domain/pangenome/PangenomeAlignmentCoverageTable.scala b/app/models/dal/domain/pangenome/PangenomeAlignmentCoverageTable.scala deleted file mode 100644 index 23946d66..00000000 --- a/app/models/dal/domain/pangenome/PangenomeAlignmentCoverageTable.scala +++ /dev/null @@ -1,51 +0,0 @@ -package models.dal.domain.pangenome - -import models.dal.MyPostgresProfile.api.* -import models.domain.pangenome.PangenomeAlignmentCoverage -import slick.lifted.ProvenShape - -class PangenomeAlignmentCoverageTable(tag: Tag) extends Table[PangenomeAlignmentCoverage](tag, "pangenome_alignment_coverage") { - def alignmentMetadataId = column[Long]("alignment_metadata_id", O.PrimaryKey) - - def meanDepth = column[Option[Double]]("mean_depth") - - def medianDepth = column[Option[Double]]("median_depth") - - def percentCoverageAt1x = column[Option[Double]]("percent_coverage_at_1x") - - def percentCoverageAt5x = column[Option[Double]]("percent_coverage_at_5x") - - def percentCoverageAt10x = column[Option[Double]]("percent_coverage_at_10x") - - def percentCoverageAt20x = column[Option[Double]]("percent_coverage_at_20x") - - def percentCoverageAt30x = column[Option[Double]]("percent_coverage_at_30x") - - def basesNoCoverage = column[Option[Long]]("bases_no_coverage") - - def basesLowQualityMapping = column[Option[Long]]("bases_low_quality_mapping") - - def basesCallable = column[Option[Long]]("bases_callable") - - def meanMappingQuality = column[Option[Double]]("mean_mapping_quality") - - def * = ( - alignmentMetadataId, - meanDepth, - medianDepth, - percentCoverageAt1x, - percentCoverageAt5x, - percentCoverageAt10x, - percentCoverageAt20x, - percentCoverageAt30x, - basesNoCoverage, - basesLowQualityMapping, - basesCallable, - meanMappingQuality - ).mapTo[PangenomeAlignmentCoverage] - - def metadataFk = foreignKey( - "fk_alignment_coverage_metadata", - alignmentMetadataId, - TableQuery[PangenomeAlignmentMetadataTable])(_.id, onDelete = ForeignKeyAction.Cascade) -} \ No newline at end of file diff --git a/app/models/dal/domain/pangenome/PangenomeAlignmentMetadataTable.scala b/app/models/dal/domain/pangenome/PangenomeAlignmentMetadataTable.scala deleted file mode 100644 index 3b83f28b..00000000 --- a/app/models/dal/domain/pangenome/PangenomeAlignmentMetadataTable.scala +++ /dev/null @@ -1,59 +0,0 @@ -package models.dal.domain.pangenome - -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.genomics.SequenceFilesTable -import models.domain.pangenome.PangenomeAlignmentMetadata -import play.api.libs.json.JsValue -import slick.lifted.ProvenShape - -import java.time.ZonedDateTime - -class PangenomeAlignmentMetadataTable(tag: Tag) extends Table[PangenomeAlignmentMetadata](tag, "pangenome_alignment_metadata") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def sequenceFileId = column[Long]("sequence_file_id") - - def pangenomeGraphId = column[Int]("pangenome_graph_id") - - def metricLevel = column[String]("metric_level") - - def pangenomePathId = column[Option[Int]]("pangenome_path_id") - - def pangenomeNodeId = column[Option[Int]]("pangenome_node_id") - - def regionStartNodeId = column[Option[Int]]("region_start_node_id") - - def regionEndNodeId = column[Option[Int]]("region_end_node_id") - - def regionName = column[Option[String]]("region_name") - - def regionLengthBp = column[Option[Long]]("region_length_bp") - - def metricsDate = column[ZonedDateTime]("metrics_date") - - def analysisTool = column[String]("analysis_tool") - - def analysisToolVersion = column[Option[String]]("analysis_tool_version") - - def notes = column[Option[String]]("notes") - - def metadata = column[Option[JsValue]]("metadata") - - def * = ( - id.?, - sequenceFileId, - pangenomeGraphId, - metricLevel, - pangenomePathId, - pangenomeNodeId, - regionStartNodeId, - regionEndNodeId, - regionName, - regionLengthBp, - metricsDate, - analysisTool, - analysisToolVersion, - notes, - metadata - ).mapTo[PangenomeAlignmentMetadata] -} diff --git a/app/models/dal/domain/pangenome/PangenomeGraphsTable.scala b/app/models/dal/domain/pangenome/PangenomeGraphsTable.scala deleted file mode 100644 index d39d80c6..00000000 --- a/app/models/dal/domain/pangenome/PangenomeGraphsTable.scala +++ /dev/null @@ -1,26 +0,0 @@ -package models.dal.domain.pangenome - -import models.dal.MyPostgresProfile.api.* -import models.domain.pangenome.PangenomeGraph - -import java.time.ZonedDateTime - -class PangenomeGraphsTable(tag: Tag) extends Table[PangenomeGraph](tag, "pangenome_graph") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def graphName = column[String]("graph_name") - - def sourceGfaFile = column[Option[String]]("source_gfa_file") - - def description = column[Option[String]]("description") - - def creationDate = column[ZonedDateTime]("creation_date") - - def * = ( - id.?, - graphName, - sourceGfaFile, - description, - creationDate - ).mapTo[PangenomeGraph] -} \ No newline at end of file diff --git a/app/models/dal/domain/pangenome/PangenomeNodesTable.scala b/app/models/dal/domain/pangenome/PangenomeNodesTable.scala deleted file mode 100644 index 5b49422d..00000000 --- a/app/models/dal/domain/pangenome/PangenomeNodesTable.scala +++ /dev/null @@ -1,23 +0,0 @@ -package models.dal.domain.pangenome - -import models.dal.MyPostgresProfile.api.* -import models.domain.pangenome.PangenomeNode - -class PangenomeNodesTable(tag: Tag) extends Table[PangenomeNode](tag, "pangenome_node") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def graphId = column[Long]("graph_id") - - def nodeName = column[String]("node_name") - - def sequenceLength = column[Option[Long]]("sequence_length") - - def * = ( - id.?, - graphId, - nodeName, - sequenceLength - ).mapTo[PangenomeNode] - - def graphFk = foreignKey("fk_node_graph", graphId, TableQuery[PangenomeGraphsTable])(_.id) -} diff --git a/app/models/dal/domain/pangenome/PangenomePathsTable.scala b/app/models/dal/domain/pangenome/PangenomePathsTable.scala deleted file mode 100644 index 3eb8c78e..00000000 --- a/app/models/dal/domain/pangenome/PangenomePathsTable.scala +++ /dev/null @@ -1,30 +0,0 @@ -package models.dal.domain.pangenome - -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.domain.pangenome.PangenomePath - -class PangenomePathsTable(tag: Tag) extends Table[PangenomePath](tag, "pangenome_path") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def graphId = column[Long]("graph_id") - - def pathName = column[String]("path_name") - - def isReference = column[Boolean]("is_reference") - - def lengthBp = column[Option[Long]]("length_bp") - - def description = column[Option[String]]("description") - - def * = ( - id.?, - graphId, - pathName, - isReference, - lengthBp, - description - ).mapTo[PangenomePath] - - def graphFk = foreignKey("fk_path_graph", graphId, TableQuery[PangenomeGraphsTable])(_.id) -} diff --git a/app/models/dal/domain/pangenome/PangenomeVariantLinksTable.scala b/app/models/dal/domain/pangenome/PangenomeVariantLinksTable.scala deleted file mode 100644 index 3c176224..00000000 --- a/app/models/dal/domain/pangenome/PangenomeVariantLinksTable.scala +++ /dev/null @@ -1,35 +0,0 @@ -package models.dal.domain.pangenome - -import models.dal.MyPostgresProfile.api.* -import models.domain.pangenome.PangenomeVariantLink - -import java.time.ZonedDateTime - -class PangenomeVariantLinksTable(tag: Tag) extends Table[PangenomeVariantLink](tag, "pangenome_variant_link") { - def id = column[Long]("pangenome_variant_link_id", O.PrimaryKey, O.AutoInc) - - def variantId = column[Int]("variant_id") - - def canonicalPangenomeVariantId = column[Int]("canonical_pangenome_variant_id") - - def pangenomeGraphId = column[Int]("pangenome_graph_id") - - def description = column[Option[String]]("description") - - def mappingSource = column[String]("mapping_source") - - def mappingDate = column[ZonedDateTime]("mapping_date") - - // Define the composite unique constraint - def uniqueLink = index("idx_unique_pangenome_variant_link", (variantId, canonicalPangenomeVariantId), unique = true) - - def * = ( - id.?, - variantId, - canonicalPangenomeVariantId, - pangenomeGraphId, - description, - mappingSource, - mappingDate - ).mapTo[PangenomeVariantLink] -} \ No newline at end of file diff --git a/app/models/dal/domain/pangenome/ReportedVariantPangenomesTable.scala b/app/models/dal/domain/pangenome/ReportedVariantPangenomesTable.scala deleted file mode 100644 index 89888c5c..00000000 --- a/app/models/dal/domain/pangenome/ReportedVariantPangenomesTable.scala +++ /dev/null @@ -1,80 +0,0 @@ -package models.dal.domain.pangenome - -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.domain.pangenome.ReportedVariantPangenome -import play.api.libs.json.JsValue - -import java.time.ZonedDateTime -import java.util.UUID - -class ReportedVariantPangenomesTable(tag: Tag) extends Table[ReportedVariantPangenome](tag, "reported_variant_pangenome") { - def id = column[Long]("id", O.PrimaryKey, O.AutoInc) - - def sampleGuid = column[UUID]("sample_guid") - - def graphId = column[Int]("graph_id") - - def variantType = column[String]("variant_type") - - def referencePathId = column[Option[Int]]("reference_path_id") - - def referenceStartPosition = column[Option[Int]]("reference_start_position") - - def referenceEndPosition = column[Option[Int]]("reference_end_position") - - def variantNodes = column[List[Int]]("variant_nodes")(MyPostgresProfile.api.intListTypeMapper) - - def variantEdges = column[List[Int]]("variant_edges")(MyPostgresProfile.api.intListTypeMapper) - - def alternateAlleleSequence = column[Option[String]]("alternate_allele_sequence") - - def referenceAlleleSequence = column[Option[String]]("reference_allele_sequence") - - def referenceRepeatCount = column[Option[Int]]("reference_repeat_count") - - def alternateRepeatCount = column[Option[Int]]("alternate_repeat_count") - - def alleleFraction = column[Option[Double]]("allele_fraction") - - def depth = column[Option[Int]]("depth") - - def reportedDate = column[ZonedDateTime]("reported_date") - - def provenance = column[String]("provenance") - - def confidenceScore = column[Double]("confidence_score") - - def notes = column[Option[String]]("notes") - - def status = column[String]("status") - - def zygosity = column[Option[String]]("zygosity") // CHECK constraint handled by DB - - def haplotypeInformation = column[Option[JsValue]]("haplotype_information") // JSONB - - def * = ( - id.?, - sampleGuid, - graphId, - variantType, - referencePathId, - referenceStartPosition, - referenceEndPosition, - variantNodes, - variantEdges, - alternateAlleleSequence, - referenceAlleleSequence, - referenceRepeatCount, - alternateRepeatCount, - alleleFraction, - depth, - reportedDate, - provenance, - confidenceScore, - notes, - status, - zygosity, - haplotypeInformation - ).mapTo[ReportedVariantPangenome] -} \ No newline at end of file diff --git a/app/models/dal/domain/pds/PdsFleetConfigTable.scala b/app/models/dal/domain/pds/PdsFleetConfigTable.scala deleted file mode 100644 index 24f17287..00000000 --- a/app/models/dal/domain/pds/PdsFleetConfigTable.scala +++ /dev/null @@ -1,19 +0,0 @@ -package models.dal.domain.pds - -import models.dal.MyPostgresProfile.api.* -import models.domain.pds.PdsFleetConfig - -import java.time.LocalDateTime - -class PdsFleetConfigTable(tag: Tag) extends Table[PdsFleetConfig](tag, "pds_fleet_config") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def configKey = column[String]("config_key") - def configValue = column[String]("config_value") - def description = column[Option[String]]("description") - def updatedBy = column[Option[String]]("updated_by") - def updatedAt = column[LocalDateTime]("updated_at") - - def * = (id.?, configKey, configValue, description, updatedBy, updatedAt).mapTo[PdsFleetConfig] - - def uniqueKey = index("idx_pds_fleet_config_key_unique", configKey, unique = true) -} diff --git a/app/models/dal/domain/pds/PdsHeartbeatLogTable.scala b/app/models/dal/domain/pds/PdsHeartbeatLogTable.scala deleted file mode 100644 index 51488b44..00000000 --- a/app/models/dal/domain/pds/PdsHeartbeatLogTable.scala +++ /dev/null @@ -1,24 +0,0 @@ -package models.dal.domain.pds - -import models.dal.MyPostgresProfile.api.* -import models.domain.pds.PdsHeartbeatLog -import play.api.libs.json.JsValue - -import java.time.LocalDateTime - -class PdsHeartbeatLogTable(tag: Tag) extends Table[PdsHeartbeatLog](tag, "pds_heartbeat_log") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def pdsNodeId = column[Int]("pds_node_id") - def status = column[String]("status") - def softwareVersion = column[Option[String]]("software_version") - def loadMetrics = column[Option[JsValue]]("load_metrics") - def processingQueueSize = column[Option[Int]]("processing_queue_size") - def errorMessage = column[Option[String]]("error_message") - def recordedAt = column[LocalDateTime]("recorded_at") - - def * = ( - id.?, pdsNodeId, status, softwareVersion, loadMetrics, processingQueueSize, errorMessage, recordedAt - ).mapTo[PdsHeartbeatLog] - - def nodeFk = foreignKey("pds_heartbeat_log_node_fk", pdsNodeId, TableQuery[PdsNodeTable])(_.id) -} diff --git a/app/models/dal/domain/pds/PdsNodeTable.scala b/app/models/dal/domain/pds/PdsNodeTable.scala deleted file mode 100644 index af4e7a12..00000000 --- a/app/models/dal/domain/pds/PdsNodeTable.scala +++ /dev/null @@ -1,32 +0,0 @@ -package models.dal.domain.pds - -import models.dal.MyPostgresProfile.api.* -import models.domain.pds.PdsNode -import play.api.libs.json.JsValue - -import java.time.LocalDateTime - -class PdsNodeTable(tag: Tag) extends Table[PdsNode](tag, "pds_node") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def did = column[String]("did") - def pdsUrl = column[String]("pds_url") - def handle = column[Option[String]]("handle") - def nodeName = column[Option[String]]("node_name") - def softwareVersion = column[Option[String]]("software_version") - def status = column[String]("status") - def capabilities = column[JsValue]("capabilities") - def lastHeartbeat = column[Option[LocalDateTime]]("last_heartbeat") - def lastCommitCid = column[Option[String]]("last_commit_cid") - def lastCommitRev = column[Option[String]]("last_commit_rev") - def ipAddress = column[Option[String]]("ip_address") - def osInfo = column[Option[String]]("os_info") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - def * = ( - id.?, did, pdsUrl, handle, nodeName, softwareVersion, status, capabilities, - lastHeartbeat, lastCommitCid, lastCommitRev, ipAddress, osInfo, createdAt, updatedAt - ).mapTo[PdsNode] - - def uniqueDid = index("idx_pds_node_did_unique", did, unique = true) -} diff --git a/app/models/dal/domain/pds/PdsSubmissionTable.scala b/app/models/dal/domain/pds/PdsSubmissionTable.scala deleted file mode 100644 index 76d5be1b..00000000 --- a/app/models/dal/domain/pds/PdsSubmissionTable.scala +++ /dev/null @@ -1,36 +0,0 @@ -package models.dal.domain.pds - -import models.dal.MyPostgresProfile.api.* -import models.domain.pds.PdsSubmission -import play.api.libs.json.JsValue - -import java.time.LocalDateTime -import java.util.UUID - -class PdsSubmissionTable(tag: Tag) extends Table[PdsSubmission](tag, "pds_submission") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def pdsNodeId = column[Int]("pds_node_id") - def submissionType = column[String]("submission_type") - def biosampleId = column[Option[Int]]("biosample_id") - def biosampleGuid = column[Option[UUID]]("biosample_guid") - def proposedValue = column[String]("proposed_value") - def confidenceScore = column[Option[Double]]("confidence_score") - def algorithmVersion = column[Option[String]]("algorithm_version") - def softwareVersion = column[Option[String]]("software_version") - def payload = column[Option[JsValue]]("payload") - def status = column[String]("status") - def reviewedBy = column[Option[String]]("reviewed_by") - def reviewedAt = column[Option[LocalDateTime]]("reviewed_at") - def reviewNotes = column[Option[String]]("review_notes") - def atUri = column[Option[String]]("at_uri") - def atCid = column[Option[String]]("at_cid") - def createdAt = column[LocalDateTime]("created_at") - - def * = ( - id.?, pdsNodeId, submissionType, biosampleId, biosampleGuid, proposedValue, - confidenceScore, algorithmVersion, softwareVersion, payload, status, - reviewedBy, reviewedAt, reviewNotes, atUri, atCid, createdAt - ).mapTo[PdsSubmission] - - def nodeFk = foreignKey("pds_submission_node_fk", pdsNodeId, TableQuery[PdsNodeTable])(_.id) -} diff --git a/app/models/dal/domain/publications/GenomicStudiesTable.scala b/app/models/dal/domain/publications/GenomicStudiesTable.scala deleted file mode 100644 index e0125fb3..00000000 --- a/app/models/dal/domain/publications/GenomicStudiesTable.scala +++ /dev/null @@ -1,88 +0,0 @@ -package models.dal.domain.publications - -import models.dal.MyPostgresProfile.api.* -import models.domain.publications.{GenomicStudy, StudySource} - -/** - * Represents the database table definition for genomic studies. - * - * This table stores metadata related to genomic studies, including accession numbers, titles, - * study details, submission and update dates, as well as source-specific attributes such as - * project IDs, molecular information, and taxonomy identifiers. - * - * Columns: - * - `id`: An auto-incrementing primary key that uniquely identifies each genomic study. - * - `accession`: A unique accession number for the genomic study, providing a reference for databases. - * - `title`: The title of the genomic study, offering a summary or description. - * - `centerName`: The name of the research center or institution responsible for the study. - * - `studyName`: A specific or additional name assigned to the genomic study. - * - `details`: A textual description containing detailed information about the study. - * - `source`: The source of the genomic study data, represented using the `StudySource` enumeration. - * - `submissionDate`: An optional field capturing the date when the study was submitted. - * - `lastUpdate`: An optional field storing the date of the most recent update for the study information. - * - `bioProjectId`: An optional field containing the BioProject ID associated with the study. - * - `molecule`: An optional field describing the type of molecule studied in the project. - * - `topology`: An optional field containing information about the molecular topology. - * - `taxonomyId`: An optional field referencing the taxonomy ID associated with the study. - * - `version`: An optional field containing the version or release information for the study. - * - * Primary key: - * - The `id` column serves as the primary key, which is auto-incremented. - * - * Mapping: - * - Defines a mapping to the `GenomicStudy` case class, which models the domain representation - * of the genomic study entity. - * - * Table Name: - * - The physical table name in the database is `genomic_studies`. - * - * This table can be used in conjunction with other table definitions such as `PublicationEnaStudiesTable` - * to establish relationships between genomic studies and publications. - */ -class GenomicStudiesTable(tag: Tag) extends Table[GenomicStudy](tag, "genomic_studies") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def accession = column[String]("accession", O.Unique) - - def title = column[String]("title") - - def centerName = column[String]("center_name") - - def studyName = column[String]("study_name") - - def details = column[String]("details") - - def source = column[StudySource]("source") - - def submissionDate = column[Option[java.time.LocalDate]]("submission_date") - - def lastUpdate = column[Option[java.time.LocalDate]]("last_update") - - def bioProjectId = column[Option[String]]("bio_project_id") - - def molecule = column[Option[String]]("molecule") - - def topology = column[Option[String]]("topology") - - def taxonomyId = column[Option[Int]]("taxonomy_id") - - def version = column[Option[String]]("version") - - - def * = ( - id.?, - accession, - title, - centerName, - studyName, - details, - source, - submissionDate, - lastUpdate, - bioProjectId, - molecule, - topology, - taxonomyId, - version - ).mapTo[GenomicStudy] -} diff --git a/app/models/dal/domain/publications/PublicationBiosamplesTable.scala b/app/models/dal/domain/publications/PublicationBiosamplesTable.scala deleted file mode 100644 index 2026c3a9..00000000 --- a/app/models/dal/domain/publications/PublicationBiosamplesTable.scala +++ /dev/null @@ -1,34 +0,0 @@ -package models.dal.domain.publications - -import models.domain.publications.PublicationBiosample -import models.dal.MyPostgresProfile.api.* - -/** - * Represents the database table definition for associating publications with biosamples. - * - * This table facilitates a many-to-many relationship between publications and biosamples, where each - * association is identified by a composite primary key consisting of a `publicationId` and a `biosampleId`. - * - * @constructor Initializes a new instance of the `PublicationBiosamplesTable` class, mapping columns - * to the attributes of the `PublicationBiosample` case class. - * @param tag A Slick `Tag` object used to scope and reference the table within a database schema. - * - * Columns: - * - `publicationId`: Integer column representing the unique identifier of the associated publication. - * - `biosampleId`: Integer column representing the unique identifier of the associated biosample. - * - * Primary key: - * - Composite primary key composed of the `publicationId` and `biosampleId` columns. - * - * Mapping: - * - Defines a mapping to the `PublicationBiosample` case class. - */ -class PublicationBiosamplesTable(tag: Tag) extends Table[PublicationBiosample](tag, "publication_biosample") { - def publicationId = column[Int]("publication_id") - - def biosampleId = column[Int]("biosample_id") - - def * = (publicationId, biosampleId).mapTo[PublicationBiosample] - - def pkey = primaryKey("publication_biosample_pkey", (publicationId, biosampleId)) -} diff --git a/app/models/dal/domain/publications/PublicationCandidatesTable.scala b/app/models/dal/domain/publications/PublicationCandidatesTable.scala deleted file mode 100644 index 465a38ae..00000000 --- a/app/models/dal/domain/publications/PublicationCandidatesTable.scala +++ /dev/null @@ -1,41 +0,0 @@ -package models.dal.domain.publications - -import models.domain.publications.PublicationCandidate -import models.dal.MyPostgresProfile.api.* -import java.time.{LocalDate, LocalDateTime} -import java.util.UUID -import play.api.libs.json.JsValue - -class PublicationCandidatesTable(tag: Tag) extends Table[PublicationCandidate](tag, "publication_candidates") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def openAlexId = column[String]("openalex_id", O.Unique) - def doi = column[Option[String]]("doi") - def title = column[String]("title") - def abstractSummary = column[Option[String]]("abstract") - def publicationDate = column[Option[LocalDate]]("publication_date") - def journalName = column[Option[String]]("journal_name") - def relevanceScore = column[Option[Double]]("relevance_score") - def discoveryDate = column[LocalDateTime]("discovery_date") - def status = column[String]("status") - def reviewedBy = column[Option[UUID]]("reviewed_by") - def reviewedAt = column[Option[LocalDateTime]]("reviewed_at") - def rejectionReason = column[Option[String]]("rejection_reason") - def rawMetadata = column[Option[JsValue]]("raw_metadata") - - def * = ( - id.?, - openAlexId, - doi, - title, - abstractSummary, - publicationDate, - journalName, - relevanceScore, - discoveryDate, - status, - reviewedBy, - reviewedAt, - rejectionReason, - rawMetadata - ).mapTo[PublicationCandidate] -} diff --git a/app/models/dal/domain/publications/PublicationCitizenBiosamplesTable.scala b/app/models/dal/domain/publications/PublicationCitizenBiosamplesTable.scala deleted file mode 100644 index 6f91be3e..00000000 --- a/app/models/dal/domain/publications/PublicationCitizenBiosamplesTable.scala +++ /dev/null @@ -1,14 +0,0 @@ -package models.dal.domain.publications - -import models.domain.publications.PublicationCitizenBiosample -import models.dal.MyPostgresProfile.api.* - -class PublicationCitizenBiosamplesTable(tag: Tag) extends Table[PublicationCitizenBiosample](tag, "publication_citizen_biosample") { - def publicationId = column[Int]("publication_id") - - def citizenBiosampleId = column[Int]("citizen_biosample_id") - - def * = (publicationId, citizenBiosampleId).mapTo[PublicationCitizenBiosample] - - def pkey = primaryKey("publication_citizen_biosample_pkey", (publicationId, citizenBiosampleId)) -} diff --git a/app/models/dal/domain/publications/PublicationEnaStudiesTable.scala b/app/models/dal/domain/publications/PublicationEnaStudiesTable.scala deleted file mode 100644 index 3da5cf94..00000000 --- a/app/models/dal/domain/publications/PublicationEnaStudiesTable.scala +++ /dev/null @@ -1,38 +0,0 @@ -package models.dal.domain.publications - -import models.domain.publications.PublicationGenomicStudy -import models.dal.MyPostgresProfile.api.* - -/** - * Represents the database table definition for storing the relationship between publications - * and ENA (European Nucleotide Archive) studies. - * - * This table establishes a many-to-many relationship between publications and ENA studies, - * enabling the association of publications with the corresponding study metadata in ENA. - * - * @constructor Creates an instance of `PublicationEnaStudiesTable`. - * @param tag A Slick `Tag` object used to scope and reference the table within a database schema. - * - * Table name: - * - The physical name of the table in the database is `publication_ena_study`. - * - * Columns: - * - `publicationId`: Integer column representing the unique identifier of a publication. - * - `enaStudyId`: Integer column representing the unique identifier of an ENA study. - * - * Primary key: - * - A composite primary key is defined using both `publicationId` and `enaStudyId`. - * - * Mapping: - * - Defines a mapping to the `PublicationEnaStudy` case class, which represents the relationship - * between a publication and an ENA study in the application domain. - */ -class PublicationEnaStudiesTable(tag: Tag) extends Table[PublicationGenomicStudy](tag, "publication_ena_study") { - def publicationId = column[Int]("publication_id") - - def genomicStudyId = column[Int]("genomic_study_id") - - def * = (publicationId, genomicStudyId).mapTo[PublicationGenomicStudy] - - def pkey = primaryKey("publication_ena_study_genomic_study_id_fkey", (publicationId, genomicStudyId)) -} diff --git a/app/models/dal/domain/publications/PublicationSearchConfigsTable.scala b/app/models/dal/domain/publications/PublicationSearchConfigsTable.scala deleted file mode 100644 index 1ebb6c70..00000000 --- a/app/models/dal/domain/publications/PublicationSearchConfigsTable.scala +++ /dev/null @@ -1,28 +0,0 @@ -package models.dal.domain.publications - -import models.domain.publications.PublicationSearchConfig -import models.dal.MyPostgresProfile.api.* -import java.time.LocalDateTime -import play.api.libs.json.JsValue - -class PublicationSearchConfigsTable(tag: Tag) extends Table[PublicationSearchConfig](tag, "publication_search_configs") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def name = column[String]("name") - def searchQuery = column[String]("search_query") - def concepts = column[Option[JsValue]]("concepts") - def journals = column[Option[JsValue]]("journals") - def enabled = column[Boolean]("enabled") - def lastRun = column[Option[LocalDateTime]]("last_run") - def createdAt = column[LocalDateTime]("created_at") - - def * = ( - id.?, - name, - searchQuery, - concepts, - journals, - enabled, - lastRun, - createdAt - ).mapTo[PublicationSearchConfig] -} diff --git a/app/models/dal/domain/publications/PublicationSearchRunsTable.scala b/app/models/dal/domain/publications/PublicationSearchRunsTable.scala deleted file mode 100644 index ad9a74cb..00000000 --- a/app/models/dal/domain/publications/PublicationSearchRunsTable.scala +++ /dev/null @@ -1,27 +0,0 @@ -package models.dal.domain.publications - -import models.domain.publications.PublicationSearchRun -import models.dal.MyPostgresProfile.api.* -import java.time.LocalDateTime - -class PublicationSearchRunsTable(tag: Tag) extends Table[PublicationSearchRun](tag, "publication_search_runs") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - def configId = column[Int]("config_id") - def runAt = column[LocalDateTime]("run_at") - def candidatesFound = column[Int]("candidates_found") - def newCandidates = column[Int]("new_candidates") - def queryUsed = column[Option[String]]("query_used") - def durationMs = column[Option[Int]]("duration_ms") - - def * = ( - id.?, - configId, - runAt, - candidatesFound, - newCandidates, - queryUsed, - durationMs - ).mapTo[PublicationSearchRun] - - def config = foreignKey("fk_run_config", configId, TableQuery[PublicationSearchConfigsTable])(_.id) -} diff --git a/app/models/dal/domain/publications/PublicationsTable.scala b/app/models/dal/domain/publications/PublicationsTable.scala deleted file mode 100644 index 1f635749..00000000 --- a/app/models/dal/domain/publications/PublicationsTable.scala +++ /dev/null @@ -1,64 +0,0 @@ -package models.dal.domain.publications - -import models.domain.publications.Publication -import models.dal.MyPostgresProfile.api.* - -import java.time.LocalDate - -class PublicationsTable(tag: Tag) extends Table[Publication](tag, "publication") { - def id = column[Int]("id", O.PrimaryKey, O.AutoInc) - - def openAlexId = column[Option[String]]("open_alex_id", O.Unique) - - def pubmedId = column[Option[String]]("pubmed_id", O.Unique) - - def doi = column[Option[String]]("doi", O.Unique) - - def title = column[String]("title") - - def authors = column[Option[String]]("authors") - - def abstractSummary = column[Option[String]]("abstract_summary") - - def journal = column[Option[String]]("journal") - - def publicationDate = column[Option[LocalDate]]("publication_date") - - def url = column[Option[String]]("url") - - def citationNormalizedPercentile = column[Option[Float]]("citation_normalized_percentile") - - def citedByCount = column[Option[Int]]("cited_by_count") - - def openAccessStatus = column[Option[String]]("open_access_status") - - def openAccessUrl = column[Option[String]]("open_access_url") - - def primaryTopic = column[Option[String]]("primary_topic") // NEW column - - def publicationType = column[Option[String]]("publication_type") - - def publisher = column[Option[String]]("publisher") - - - // Update the * projection to include all new columns and remove old ones - def * = ( - id.?, - openAlexId, - pubmedId, - doi, - title, - authors, - abstractSummary, - journal, - publicationDate, - url, - citationNormalizedPercentile, - citedByCount, - openAccessStatus, - openAccessUrl, - primaryTopic, // Updated here - publicationType, - publisher - ).mapTo[Publication] -} diff --git a/app/models/dal/domain/social/ConversationParticipantsTable.scala b/app/models/dal/domain/social/ConversationParticipantsTable.scala deleted file mode 100644 index 8c1b9a60..00000000 --- a/app/models/dal/domain/social/ConversationParticipantsTable.scala +++ /dev/null @@ -1,23 +0,0 @@ -package models.dal.domain.social - -import models.domain.social.ConversationParticipant -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -class ConversationParticipantsTable(tag: Tag) extends Table[ConversationParticipant](tag, Some("social"), "conversation_participants") { - - def conversationId = column[UUID]("conversation_id") - def userDid = column[String]("user_did") - def role = column[String]("role") - def lastReadAt = column[Option[LocalDateTime]]("last_read_at") - def joinedAt = column[LocalDateTime]("joined_at") - - def * : ProvenShape[ConversationParticipant] = (conversationId, userDid, role, lastReadAt, joinedAt).mapTo[ConversationParticipant] - - def pk = primaryKey("pk_conversation_participants", (conversationId, userDid)) - - def conversationFk = foreignKey("fk_conversation_participants_conversation_id", conversationId, TableQuery[ConversationsTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) -} diff --git a/app/models/dal/domain/social/ConversationsTable.scala b/app/models/dal/domain/social/ConversationsTable.scala deleted file mode 100644 index b5b17568..00000000 --- a/app/models/dal/domain/social/ConversationsTable.scala +++ /dev/null @@ -1,18 +0,0 @@ -package models.dal.domain.social - -import models.domain.social.Conversation -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -class ConversationsTable(tag: Tag) extends Table[Conversation](tag, Some("social"), "conversations") { - - def id = column[UUID]("id", O.PrimaryKey) - def `type` = column[String]("type") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - def * : ProvenShape[Conversation] = (id, `type`, createdAt, updatedAt).mapTo[Conversation] -} diff --git a/app/models/dal/domain/social/FeedPostsTable.scala b/app/models/dal/domain/social/FeedPostsTable.scala deleted file mode 100644 index 2f826e4f..00000000 --- a/app/models/dal/domain/social/FeedPostsTable.scala +++ /dev/null @@ -1,26 +0,0 @@ -package models.dal.domain.social - -import models.domain.social.FeedPost -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -class FeedPostsTable(tag: Tag) extends Table[FeedPost](tag, Some("social"), "feed_posts") { - - def id = column[UUID]("id", O.PrimaryKey) - def authorDid = column[String]("author_did") - def content = column[String]("content") - def parentPostId = column[Option[UUID]]("parent_post_id") - def rootPostId = column[Option[UUID]]("root_post_id") - def topic = column[Option[String]]("topic") - def authorReputationScore = column[Int]("author_reputation_score") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - - def * : ProvenShape[FeedPost] = (id, authorDid, content, parentPostId, rootPostId, topic, authorReputationScore, createdAt, updatedAt).mapTo[FeedPost] - - def parentPostFk = foreignKey("fk_feed_posts_parent_id", parentPostId, TableQuery[FeedPostsTable])(_.id.?, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) - def rootPostFk = foreignKey("fk_feed_posts_root_id", rootPostId, TableQuery[FeedPostsTable])(_.id.?, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) -} diff --git a/app/models/dal/domain/social/MessagesTable.scala b/app/models/dal/domain/social/MessagesTable.scala deleted file mode 100644 index 4ae86157..00000000 --- a/app/models/dal/domain/social/MessagesTable.scala +++ /dev/null @@ -1,23 +0,0 @@ -package models.dal.domain.social - -import models.domain.social.Message -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -class MessagesTable(tag: Tag) extends Table[Message](tag, Some("social"), "messages") { - - def id = column[UUID]("id", O.PrimaryKey) - def conversationId = column[UUID]("conversation_id") - def senderDid = column[String]("sender_did") - def content = column[String]("content") - def contentType = column[String]("content_type") - def createdAt = column[LocalDateTime]("created_at") - def isEdited = column[Boolean]("is_edited") - - def * : ProvenShape[Message] = (id, conversationId, senderDid, content, contentType, createdAt, isEdited).mapTo[Message] - - def conversationFk = foreignKey("fk_messages_conversation_id", conversationId, TableQuery[ConversationsTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) -} diff --git a/app/models/dal/domain/social/ReputationEventTypesTable.scala b/app/models/dal/domain/social/ReputationEventTypesTable.scala deleted file mode 100644 index c6d94c4d..00000000 --- a/app/models/dal/domain/social/ReputationEventTypesTable.scala +++ /dev/null @@ -1,38 +0,0 @@ -package models.dal.domain.social - -import models.domain.social.ReputationEventType -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -class ReputationEventTypesTable(tag: Tag) extends Table[ReputationEventType](tag, "reputation_event_types") { - def id = column[UUID]("id", O.PrimaryKey) - - def name = column[String]("name", O.Unique) - - def description = column[Option[String]]("description") - - def defaultPointsChange = column[Int]("default_points_change") - - def isPositive = column[Boolean]("is_positive") - - def isSystemGenerated = column[Boolean]("is_system_generated") - - def createdAt = column[LocalDateTime]("created_at") - - def updatedAt = column[LocalDateTime]("updated_at") - - // Projection for the case class - def * : ProvenShape[ReputationEventType] = ( - id.?, - name, - description, - defaultPointsChange, - isPositive, - isSystemGenerated, - createdAt, - updatedAt - ).mapTo[ReputationEventType] -} \ No newline at end of file diff --git a/app/models/dal/domain/social/ReputationEventsTable.scala b/app/models/dal/domain/social/ReputationEventsTable.scala deleted file mode 100644 index a366112d..00000000 --- a/app/models/dal/domain/social/ReputationEventsTable.scala +++ /dev/null @@ -1,50 +0,0 @@ -package models.dal.domain.social - -import models.dal.domain.user.UsersTable -import models.domain.social.{ReputationEvent, ReputationEventType} -import models.domain.user.User -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -class ReputationEventsTable(tag: Tag) extends Table[ReputationEvent](tag, "reputation_events") { - def id = column[UUID]("id", O.PrimaryKey) - - def userId = column[UUID]("user_id") - - def eventTypeId = column[UUID]("event_type_id") - - def actualPointsChange = column[Int]("actual_points_change") - - def sourceUserId = column[Option[UUID]]("source_user_id") - - def relatedEntityType = column[Option[String]]("related_entity_type") - - def relatedEntityId = column[Option[UUID]]("related_entity_id") - - def notes = column[Option[String]]("notes") - - def createdAt = column[LocalDateTime]("created_at") - - // Projection for the case class - def * : ProvenShape[ReputationEvent] = ( - id.?, - userId, - eventTypeId, - actualPointsChange, - sourceUserId, - relatedEntityType, - relatedEntityId, - notes, - createdAt - ).mapTo[ReputationEvent] - - def userFk = foreignKey("fk_reputation_events_user_id", userId, TableQuery[UsersTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) - - def eventTypeFk = foreignKey("fk_reputation_events_event_type_id", eventTypeId, TableQuery[ReputationEventTypesTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Restrict) - - def sourceUserFk = foreignKey("fk_reputation_events_source_user_id", sourceUserId, TableQuery[UsersTable])(_.id.?, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.SetNull) - -} \ No newline at end of file diff --git a/app/models/dal/domain/social/UserBlocksTable.scala b/app/models/dal/domain/social/UserBlocksTable.scala deleted file mode 100644 index a1142a82..00000000 --- a/app/models/dal/domain/social/UserBlocksTable.scala +++ /dev/null @@ -1,19 +0,0 @@ -package models.dal.domain.social - -import models.domain.social.UserBlock -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.LocalDateTime - -class UserBlocksTable(tag: Tag) extends Table[UserBlock](tag, Some("social"), "user_blocks") { - - def blockerDid = column[String]("blocker_did") - def blockedDid = column[String]("blocked_did") - def reason = column[Option[String]]("reason") - def createdAt = column[LocalDateTime]("created_at") - - def * : ProvenShape[UserBlock] = (blockerDid, blockedDid, reason, createdAt).mapTo[UserBlock] - - def pk = primaryKey("pk_user_blocks", (blockerDid, blockedDid)) -} diff --git a/app/models/dal/domain/social/UserReputationScoresTable.scala b/app/models/dal/domain/social/UserReputationScoresTable.scala deleted file mode 100644 index 945f2a90..00000000 --- a/app/models/dal/domain/social/UserReputationScoresTable.scala +++ /dev/null @@ -1,25 +0,0 @@ -package models.dal.domain.social - -import models.dal.domain.user.UsersTable -import models.domain.social.UserReputationScore -import models.domain.user.User -import models.dal.MyPostgresProfile.api.* -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -class UserReputationScoresTable(tag: Tag) extends Table[UserReputationScore](tag, Some("social"), "user_reputation_scores") { - - def userId = column[UUID]("user_id", O.PrimaryKey) - def score = column[Long]("score") - def lastCalculatedAt = column[LocalDateTime]("last_calculated_at") - - def * : ProvenShape[UserReputationScore] = ( - userId, - score, - lastCalculatedAt - ).mapTo[UserReputationScore] - - def userFk = foreignKey("fk_user_reputation_scores_user_id", userId, TableQuery[UsersTable])(_.id, onUpdate = ForeignKeyAction.Restrict, onDelete = ForeignKeyAction.Cascade) -} \ No newline at end of file diff --git a/app/models/dal/domain/user/UsersTable.scala b/app/models/dal/domain/user/UsersTable.scala deleted file mode 100644 index 2d8361a8..00000000 --- a/app/models/dal/domain/user/UsersTable.scala +++ /dev/null @@ -1,36 +0,0 @@ -package models.dal.domain.user - -import models.dal.MyPostgresProfile.api.* -import models.domain.user.User - -import java.time.LocalDateTime -import java.util.UUID - -class UsersTable(tag: Tag) extends Table[User](tag, "users") { - def id = column[UUID]("id", O.PrimaryKey) // O.AutoInc is not used for UUID defaults - - def email = column[Option[String]]("email", O.Unique) - - def did = column[String]("did", O.Unique) - - def handle = column[Option[String]]("handle", O.Unique) - - def displayName = column[Option[String]]("display_name") - - def createdAt = column[LocalDateTime]("created_at") - - def updatedAt = column[LocalDateTime]("updated_at") - - def isActive = column[Boolean]("is_active") - - def * = ( - id.?, - email, - did, - handle, - displayName, - createdAt, - updatedAt, - isActive - ).mapTo[User] -} \ No newline at end of file diff --git a/app/models/dal/support/ContactMessagesTable.scala b/app/models/dal/support/ContactMessagesTable.scala deleted file mode 100644 index fc696a05..00000000 --- a/app/models/dal/support/ContactMessagesTable.scala +++ /dev/null @@ -1,49 +0,0 @@ -package models.dal.support - -import models.dal.MyPostgresProfile.api.* -import models.domain.support.{ContactMessage, MessageStatus} -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -/** - * DAL table for support.contact_messages - */ -class ContactMessagesTable(tag: Tag) extends Table[ContactMessage](tag, Some("support"), "contact_messages") { - - // Custom mapper for MessageStatus enum - implicit val messageStatusMapper: BaseColumnType[MessageStatus] = - MappedColumnType.base[MessageStatus, String]( - status => status.value, - str => MessageStatus.fromString(str).getOrElse(MessageStatus.New) - ) - - def id = column[UUID]("id", O.PrimaryKey) - def userId = column[Option[UUID]]("user_id") - def senderName = column[Option[String]]("sender_name") - def senderEmail = column[Option[String]]("sender_email") - def subject = column[String]("subject") - def message = column[String]("message") - def status = column[MessageStatus]("status") - def ipAddressHash = column[Option[String]]("ip_address_hash") - def userAgent = column[Option[String]]("user_agent") - def createdAt = column[LocalDateTime]("created_at") - def updatedAt = column[LocalDateTime]("updated_at") - def userLastViewedAt = column[Option[LocalDateTime]]("user_last_viewed_at") - - def * : ProvenShape[ContactMessage] = ( - id.?, - userId, - senderName, - senderEmail, - subject, - message, - status, - ipAddressHash, - userAgent, - createdAt, - updatedAt, - userLastViewedAt - ).mapTo[ContactMessage] -} diff --git a/app/models/dal/support/MessageRepliesTable.scala b/app/models/dal/support/MessageRepliesTable.scala deleted file mode 100644 index 395e2710..00000000 --- a/app/models/dal/support/MessageRepliesTable.scala +++ /dev/null @@ -1,34 +0,0 @@ -package models.dal.support - -import models.dal.MyPostgresProfile.api.* -import models.domain.support.MessageReply -import slick.lifted.ProvenShape - -import java.time.LocalDateTime -import java.util.UUID - -/** - * DAL table for support.message_replies - */ -class MessageRepliesTable(tag: Tag) extends Table[MessageReply](tag, Some("support"), "message_replies") { - - def id = column[UUID]("id", O.PrimaryKey) - def messageId = column[UUID]("message_id") - def adminUserId = column[UUID]("admin_user_id") - def replyText = column[String]("reply_text") - def emailSent = column[Boolean]("email_sent") - def emailSentAt = column[Option[LocalDateTime]]("email_sent_at") - def createdAt = column[LocalDateTime]("created_at") - - def * : ProvenShape[MessageReply] = ( - id.?, - messageId, - adminUserId, - replyText, - emailSent, - emailSentAt, - createdAt - ).mapTo[MessageReply] - - def messageFk = foreignKey("fk_message_replies_message_id", messageId, TableQuery[ContactMessagesTable])(_.id) -} diff --git a/app/models/domain/GroupProject.scala b/app/models/domain/GroupProject.scala deleted file mode 100644 index 3a4b024a..00000000 --- a/app/models/domain/GroupProject.scala +++ /dev/null @@ -1,188 +0,0 @@ -package models.domain - -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDateTime -import java.util.UUID - -case class GroupProject( - id: Option[Int] = None, - projectGuid: UUID = UUID.randomUUID(), - projectName: String, - projectType: String, - targetHaplogroup: Option[String] = None, - targetLineage: Option[String] = None, - description: Option[String] = None, - backgroundInfo: Option[String] = None, - joinPolicy: String = "APPROVAL_REQUIRED", - haplogroupRequirement: Option[String] = None, - memberListVisibility: String = "MEMBERS_ONLY", - strPolicy: String = "DISTANCE_ONLY", - snpPolicy: String = "TERMINAL_ONLY", - publicTreeView: Boolean = false, - successionPolicy: Option[String] = Some("CO_ADMIN_INHERITS"), - ownerDid: String, - atUri: Option[String] = None, - atCid: Option[String] = None, - deleted: Boolean = false, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) - -object GroupProject { - implicit val format: OFormat[GroupProject] = Json.format[GroupProject] - - val ValidProjectTypes: Set[String] = Set("HAPLOGROUP", "SURNAME", "GEOGRAPHIC", "ETHNIC", "RESEARCH", "CUSTOM") - val ValidJoinPolicies: Set[String] = Set("OPEN", "APPROVAL_REQUIRED", "INVITE_ONLY", "HAPLOGROUP_VERIFIED") - val ValidLineages: Set[String] = Set("Y_DNA", "MT_DNA", "BOTH") -} - -case class GroupProjectMember( - id: Option[Int] = None, - groupProjectId: Int, - citizenDid: String, - biosampleAtUri: Option[String] = None, - role: String = "MEMBER", - status: String = "PENDING_APPROVAL", - displayName: Option[String] = None, - kitId: Option[String] = None, - visibility: MemberVisibility = MemberVisibility(), - subgroupIds: List[String] = List.empty, - contributionLevel: Option[String] = Some("OBSERVER"), - joinedAt: Option[LocalDateTime] = None, - atUri: Option[String] = None, - atCid: Option[String] = None, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) - -object GroupProjectMember { - implicit val format: OFormat[GroupProjectMember] = Json.format[GroupProjectMember] - - val ValidRoles: Set[String] = Set("ADMIN", "CO_ADMIN", "MODERATOR", "CURATOR", "MEMBER") - val ValidStatuses: Set[String] = Set("PENDING_APPROVAL", "ACTIVE", "SUSPENDED", "LEFT", "REMOVED") -} - -case class MemberVisibility( - showInMemberList: Boolean = true, - showInTree: Boolean = true, - shareTerminalHaplogroup: Boolean = true, - shareFullLineagePath: Boolean = false, - sharePrivateVariants: Boolean = false, - ancestorVisibility: String = "NONE", - strVisibility: String = "NONE", - allowDirectContact: Boolean = false, - showDisplayName: Boolean = true - ) - -object MemberVisibility { - implicit val format: OFormat[MemberVisibility] = Json.format[MemberVisibility] - - val ValidAncestorVisibility: Set[String] = Set("NONE", "CENTURY_ONLY", "REGION_ONLY", "COUNTRY_ONLY", "SURNAME_ONLY", "FULL") - val ValidStrVisibility: Set[String] = Set("NONE", "DISTANCE_CALCULATION_ONLY", "MODAL_COMPARISON_ONLY", "FULL_TO_MEMBERS", "FULL_PUBLIC") - - private val ancestorRank: Map[String, Int] = Map( - "NONE" -> 0, "CENTURY_ONLY" -> 1, "REGION_ONLY" -> 2, - "COUNTRY_ONLY" -> 3, "SURNAME_ONLY" -> 4, "FULL" -> 5 - ) - private val strRank: Map[String, Int] = Map( - "NONE" -> 0, "DISTANCE_CALCULATION_ONLY" -> 1, "MODAL_COMPARISON_ONLY" -> 2, - "FULL_TO_MEMBERS" -> 3, "FULL_PUBLIC" -> 4 - ) - - def moreRestrictiveAncestor(a: String, b: String): String = - if (ancestorRank.getOrElse(a, 0) <= ancestorRank.getOrElse(b, 0)) a else b - - def moreRestrictiveStr(a: String, b: String): String = - if (strRank.getOrElse(a, 0) <= strRank.getOrElse(b, 0)) a else b -} - -case class EffectiveVisibility( - showInMemberList: Boolean, - showInTree: Boolean, - shareTerminalHaplogroup: Boolean, - shareFullLineagePath: Boolean, - sharePrivateVariants: Boolean, - ancestorVisibility: String, - strVisibility: String, - allowDirectContact: Boolean, - showDisplayName: Boolean - ) - -object EffectiveVisibility { - implicit val format: OFormat[EffectiveVisibility] = Json.format[EffectiveVisibility] - - def compute(project: GroupProject, member: MemberVisibility): EffectiveVisibility = { - val projectSnpAllowsFullPath = project.snpPolicy == "FULL_PATH" || project.snpPolicy == "WITH_PRIVATE_VARIANTS" - val projectSnpAllowsPrivate = project.snpPolicy == "WITH_PRIVATE_VARIANTS" - - val projectStrLevel = project.strPolicy match { - case "HIDDEN" => "NONE" - case "DISTANCE_ONLY" => "DISTANCE_CALCULATION_ONLY" - case "MODAL_COMPARISON" => "MODAL_COMPARISON_ONLY" - case "MEMBERS_ONLY_RAW" => "FULL_TO_MEMBERS" - case "PUBLIC_RAW" => "FULL_PUBLIC" - case _ => "NONE" - } - - val projectAncestorLevel = "FULL" // project doesn't restrict ancestor granularity directly; member controls - - EffectiveVisibility( - showInMemberList = member.showInMemberList && project.memberListVisibility != "HIDDEN", - showInTree = member.showInTree && project.publicTreeView || member.showInTree, - shareTerminalHaplogroup = member.shareTerminalHaplogroup && project.snpPolicy != "HIDDEN", - shareFullLineagePath = member.shareFullLineagePath && projectSnpAllowsFullPath, - sharePrivateVariants = member.sharePrivateVariants && projectSnpAllowsPrivate, - ancestorVisibility = MemberVisibility.moreRestrictiveAncestor(member.ancestorVisibility, projectAncestorLevel), - strVisibility = MemberVisibility.moreRestrictiveStr(member.strVisibility, projectStrLevel), - allowDirectContact = member.allowDirectContact, - showDisplayName = member.showDisplayName - ) - } -} - -case class AncestorData( - name: Option[String] = None, - surname: Option[String] = None, - birthYear: Option[Int] = None, - birthCentury: Option[String] = None, - birthDecade: Option[String] = None, - birthCountry: Option[String] = None, - birthRegion: Option[String] = None, - birthPlace: Option[String] = None, - additionalInfo: Option[String] = None - ) - -object AncestorData { - implicit val format: OFormat[AncestorData] = Json.format[AncestorData] - - def filter(data: AncestorData, level: String): AncestorData = level match { - case "NONE" => AncestorData() - case "CENTURY_ONLY" => AncestorData(birthCentury = data.birthCentury) - case "REGION_ONLY" => AncestorData(birthCountry = data.birthCountry, birthRegion = data.birthRegion) - case "COUNTRY_ONLY" => AncestorData(birthCountry = data.birthCountry) - case "SURNAME_ONLY" => AncestorData(surname = data.surname, birthCentury = data.birthCentury) - case "FULL" => data - case _ => AncestorData() - } -} - -case class FilteredMemberView( - memberId: Int, - kitId: Option[String], - displayName: Option[String], - role: String, - contributionLevel: Option[String], - terminalHaplogroup: Option[String], - lineagePath: Option[Seq[String]], - privateVariantCount: Option[Int], - ancestor: AncestorData, - strVisibility: String, - allowDirectContact: Boolean, - subgroupIds: List[String], - joinedAt: Option[LocalDateTime] - ) - -object FilteredMemberView { - implicit val format: OFormat[FilteredMemberView] = Json.format[FilteredMemberView] -} diff --git a/app/models/domain/Project.scala b/app/models/domain/Project.scala deleted file mode 100644 index c3f2f34f..00000000 --- a/app/models/domain/Project.scala +++ /dev/null @@ -1,17 +0,0 @@ -package models.domain - -import java.time.LocalDateTime -import java.util.UUID - -case class Project( - id: Option[Int] = None, - projectGuid: UUID, - name: String, - description: Option[String] = None, - ownerDid: String, - createdAt: LocalDateTime, - updatedAt: LocalDateTime, - deleted: Boolean = false, - atUri: Option[String] = None, - atCid: Option[String] = None - ) diff --git a/app/models/domain/auth/CookieConsent.scala b/app/models/domain/auth/CookieConsent.scala deleted file mode 100644 index 5aa7cc8d..00000000 --- a/app/models/domain/auth/CookieConsent.scala +++ /dev/null @@ -1,33 +0,0 @@ -package models.domain.auth - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Tracks user acceptance of cookie policy for GDPR compliance. - * - * @param id UUID Primary Key - * @param userId Optional - linked user if logged in - * @param sessionId Optional - session identifier for anonymous users - * @param ipAddressHash SHA-256 hash of IP for anonymous consent verification - * @param consentGiven Whether consent was given - * @param consentTimestamp When consent was recorded - * @param policyVersion Version of cookie policy accepted - * @param userAgent Browser user agent string - * @param createdAt Record creation timestamp - */ -case class CookieConsent( - id: Option[UUID], - userId: Option[UUID], - sessionId: Option[String], - ipAddressHash: Option[String], - consentGiven: Boolean, - consentTimestamp: LocalDateTime, - policyVersion: String, - userAgent: Option[String], - createdAt: LocalDateTime -) - -object CookieConsent { - val CurrentPolicyVersion = "1.0" -} diff --git a/app/models/domain/billing/PatronSubscription.scala b/app/models/domain/billing/PatronSubscription.scala deleted file mode 100644 index 2b80e76e..00000000 --- a/app/models/domain/billing/PatronSubscription.scala +++ /dev/null @@ -1,70 +0,0 @@ -package models.domain.billing - -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDateTime -import java.util.UUID - -case class PatronSubscription( - id: Option[Int] = None, - userId: UUID, - patronTier: String, - status: String = "ACTIVE", - paymentProvider: String, - providerSubscriptionId: Option[String] = None, - providerCustomerId: Option[String] = None, - amountCents: Int, - currency: String = "USD", - billingInterval: String, - currentPeriodStart: Option[LocalDateTime] = None, - currentPeriodEnd: Option[LocalDateTime] = None, - cancelledAt: Option[LocalDateTime] = None, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) - -object PatronSubscription { - implicit val format: OFormat[PatronSubscription] = Json.format[PatronSubscription] - - val ValidTiers: Set[String] = Set("SUPPORTER", "CONTRIBUTOR", "SUSTAINER", "FOUNDING_PATRON") - val ValidStatuses: Set[String] = Set("ACTIVE", "CANCELLED", "PAST_DUE", "EXPIRED") - val ValidProviders: Set[String] = Set("STRIPE", "PAYPAL") - val ValidIntervals: Set[String] = Set("MONTHLY", "YEARLY") -} - -object PatronTier { - val Supporter = "SUPPORTER" - val Contributor = "CONTRIBUTOR" - val Sustainer = "SUSTAINER" - val FoundingPatron = "FOUNDING_PATRON" - - def amountCents(tier: String, interval: String): Int = (tier, interval) match { - case (Supporter, "MONTHLY") => 200 - case (Supporter, "YEARLY") => 2000 - case (Contributor, "MONTHLY") => 500 - case (Contributor, "YEARLY") => 5000 - case (Sustainer, "MONTHLY") => 1000 - case (Sustainer, "YEARLY") => 10000 - case (FoundingPatron, "MONTHLY") => 5000 - case (FoundingPatron, "YEARLY") => 50000 - case _ => throw new IllegalArgumentException(s"Unknown tier/interval: $tier/$interval") - } - - def displayName(tier: String): String = tier match { - case Supporter => "Supporter" - case Contributor => "Contributor" - case Sustainer => "Sustainer" - case FoundingPatron => "Founding Patron" - case other => other - } -} - -case class PatronSummary( - activePatrons: Int, - tierCounts: Map[String, Int], - monthlyRevenueCents: Int - ) - -object PatronSummary { - implicit val format: OFormat[PatronSummary] = Json.format[PatronSummary] -} diff --git a/app/models/domain/curator/AuditLogEntry.scala b/app/models/domain/curator/AuditLogEntry.scala deleted file mode 100644 index 725f3d31..00000000 --- a/app/models/domain/curator/AuditLogEntry.scala +++ /dev/null @@ -1,65 +0,0 @@ -package models.domain.curator - -import play.api.libs.json.JsValue - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Represents an audit log entry for curator actions on haplogroups and variants. - * - * @param id UUID Primary Key - * @param userId The user who performed the action - * @param entityType The type of entity: "haplogroup" or "variant" - * @param entityId The ID of the affected entity - * @param action The action performed: "create", "update", or "delete" - * @param oldValue JSON representation of the entity before the change (for updates/deletes) - * @param newValue JSON representation of the entity after the change (for creates/updates) - * @param comment Optional comment explaining the change - * @param createdAt When the action was performed - */ -case class AuditLogEntry( - id: Option[UUID] = None, - userId: UUID, - entityType: String, - entityId: Int, - action: String, - oldValue: Option[JsValue], - newValue: Option[JsValue], - comment: Option[String], - createdAt: LocalDateTime = LocalDateTime.now() -) - -/** - * Audit action types. - */ -enum AuditAction(val value: String) { - case Create extends AuditAction("create") - case Update extends AuditAction("update") - case Delete extends AuditAction("delete") -} - -object AuditAction { - def fromString(s: String): Option[AuditAction] = s.toLowerCase match { - case "create" => Some(Create) - case "update" => Some(Update) - case "delete" => Some(Delete) - case _ => None - } -} - -/** - * Entity types that can be audited. - */ -enum AuditEntityType(val value: String) { - case Haplogroup extends AuditEntityType("haplogroup") - case Variant extends AuditEntityType("variant") -} - -object AuditEntityType { - def fromString(s: String): Option[AuditEntityType] = s.toLowerCase match { - case "haplogroup" => Some(Haplogroup) - case "variant" => Some(Variant) - case _ => None - } -} diff --git a/app/models/domain/curator/CuratorNotification.scala b/app/models/domain/curator/CuratorNotification.scala deleted file mode 100644 index dd1dadc6..00000000 --- a/app/models/domain/curator/CuratorNotification.scala +++ /dev/null @@ -1,66 +0,0 @@ -package models.domain.curator - -import play.api.libs.json.{Format, Json, OFormat, Reads, Writes} -import java.time.LocalDateTime - -/** - * Type of curator notification. - */ -enum NotificationType: - case ChangeSetReady // Change set is ready for review - case ChangeSetApplied // Change set was applied to production - case ChangeSetDiscarded // Change set was discarded - case AmbiguityAlert // High-priority ambiguities detected - case ReviewReminder // Reminder to review pending changes - -object NotificationType { - def fromString(s: String): NotificationType = s.toUpperCase match { - case "CHANGE_SET_READY" => NotificationType.ChangeSetReady - case "CHANGE_SET_APPLIED" => NotificationType.ChangeSetApplied - case "CHANGE_SET_DISCARDED" => NotificationType.ChangeSetDiscarded - case "AMBIGUITY_ALERT" => NotificationType.AmbiguityAlert - case "REVIEW_REMINDER" => NotificationType.ReviewReminder - case other => throw new IllegalArgumentException(s"Unknown NotificationType: $other") - } - - def toDbString(nt: NotificationType): String = nt match { - case NotificationType.ChangeSetReady => "CHANGE_SET_READY" - case NotificationType.ChangeSetApplied => "CHANGE_SET_APPLIED" - case NotificationType.ChangeSetDiscarded => "CHANGE_SET_DISCARDED" - case NotificationType.AmbiguityAlert => "AMBIGUITY_ALERT" - case NotificationType.ReviewReminder => "REVIEW_REMINDER" - } - - implicit val reads: Reads[NotificationType] = Reads.StringReads.map(fromString) - implicit val writes: Writes[NotificationType] = Writes.StringWrites.contramap(toDbString) - implicit val format: Format[NotificationType] = Format(reads, writes) -} - -/** - * A notification for curators about tree versioning events. - * - * @param id Unique identifier - * @param notificationType Type of notification - * @param title Short title - * @param message Detailed message - * @param changeSetId Related change set (if applicable) - * @param createdAt When notification was created - * @param readAt When notification was read (null if unread) - * @param link Optional link to related resource - */ -case class CuratorNotification( - id: Option[Int], - notificationType: NotificationType, - title: String, - message: String, - changeSetId: Option[Int] = None, - createdAt: LocalDateTime = LocalDateTime.now(), - readAt: Option[LocalDateTime] = None, - link: Option[String] = None -) { - def isRead: Boolean = readAt.isDefined -} - -object CuratorNotification { - implicit val format: OFormat[CuratorNotification] = Json.format[CuratorNotification] -} diff --git a/app/models/domain/discovery/DiscoveryEnums.scala b/app/models/domain/discovery/DiscoveryEnums.scala deleted file mode 100644 index 48910e86..00000000 --- a/app/models/domain/discovery/DiscoveryEnums.scala +++ /dev/null @@ -1,172 +0,0 @@ -package models.domain.discovery - -import play.api.libs.json.* - -enum BiosampleSourceType { - case Citizen, External - - override def toString: String = this match { - case Citizen => "CITIZEN" - case External => "EXTERNAL" - } -} - -object BiosampleSourceType { - def fromString(str: String): Option[BiosampleSourceType] = str.toUpperCase match { - case "CITIZEN" => Some(Citizen) - case "EXTERNAL" => Some(External) - case _ => None - } - - implicit val format: Format[BiosampleSourceType] = Format( - Reads.StringReads.flatMap { str => - fromString(str) match { - case Some(v) => Reads.pure(v) - case None => Reads.failed(s"Invalid BiosampleSourceType: $str") - } - }, - Writes.StringWrites.contramap(_.toString) - ) -} - -enum PrivateVariantStatus { - case Active, Promoted, Invalidated - - override def toString: String = this match { - case Active => "ACTIVE" - case Promoted => "PROMOTED" - case Invalidated => "INVALIDATED" - } -} - -object PrivateVariantStatus { - def fromString(str: String): Option[PrivateVariantStatus] = str.toUpperCase match { - case "ACTIVE" => Some(Active) - case "PROMOTED" => Some(Promoted) - case "INVALIDATED" => Some(Invalidated) - case _ => None - } - - implicit val format: Format[PrivateVariantStatus] = Format( - Reads.StringReads.flatMap { str => - fromString(str) match { - case Some(v) => Reads.pure(v) - case None => Reads.failed(s"Invalid PrivateVariantStatus: $str") - } - }, - Writes.StringWrites.contramap(_.toString) - ) -} - -enum ProposedBranchStatus { - case Pending, ReadyForReview, UnderReview, Accepted, Promoted, Rejected, Split - - override def toString: String = this match { - case Pending => "PENDING" - case ReadyForReview => "READY_FOR_REVIEW" - case UnderReview => "UNDER_REVIEW" - case Accepted => "ACCEPTED" - case Promoted => "PROMOTED" - case Rejected => "REJECTED" - case Split => "SPLIT" - } -} - -object ProposedBranchStatus { - def fromString(str: String): Option[ProposedBranchStatus] = str.toUpperCase match { - case "PENDING" => Some(Pending) - case "READY_FOR_REVIEW" => Some(ReadyForReview) - case "UNDER_REVIEW" => Some(UnderReview) - case "ACCEPTED" => Some(Accepted) - case "PROMOTED" => Some(Promoted) - case "REJECTED" => Some(Rejected) - case "SPLIT" => Some(Split) - case _ => None - } - - implicit val format: Format[ProposedBranchStatus] = Format( - Reads.StringReads.flatMap { str => - fromString(str) match { - case Some(v) => Reads.pure(v) - case None => Reads.failed(s"Invalid ProposedBranchStatus: $str") - } - }, - Writes.StringWrites.contramap(_.toString) - ) -} - -enum CuratorActionType { - case Review, Accept, Reject, Modify, Split, Merge, Create, Delete, Reassign, NameVariant - - override def toString: String = this match { - case Review => "REVIEW" - case Accept => "ACCEPT" - case Reject => "REJECT" - case Modify => "MODIFY" - case Split => "SPLIT" - case Merge => "MERGE" - case Create => "CREATE" - case Delete => "DELETE" - case Reassign => "REASSIGN" - case NameVariant => "NAME_VARIANT" - } -} - -object CuratorActionType { - def fromString(str: String): Option[CuratorActionType] = str.toUpperCase match { - case "REVIEW" => Some(Review) - case "ACCEPT" => Some(Accept) - case "REJECT" => Some(Reject) - case "MODIFY" => Some(Modify) - case "SPLIT" => Some(Split) - case "MERGE" => Some(Merge) - case "CREATE" => Some(Create) - case "DELETE" => Some(Delete) - case "REASSIGN" => Some(Reassign) - case "NAME_VARIANT" => Some(NameVariant) - case _ => None - } - - implicit val format: Format[CuratorActionType] = Format( - Reads.StringReads.flatMap { str => - fromString(str) match { - case Some(v) => Reads.pure(v) - case None => Reads.failed(s"Invalid CuratorActionType: $str") - } - }, - Writes.StringWrites.contramap(_.toString) - ) -} - -enum CuratorTargetType { - case ProposedBranch, Haplogroup, HaplogroupRelationship, Variant, Biosample - - override def toString: String = this match { - case ProposedBranch => "PROPOSED_BRANCH" - case Haplogroup => "HAPLOGROUP" - case HaplogroupRelationship => "HAPLOGROUP_RELATIONSHIP" - case Variant => "VARIANT" - case Biosample => "BIOSAMPLE" - } -} - -object CuratorTargetType { - def fromString(str: String): Option[CuratorTargetType] = str.toUpperCase match { - case "PROPOSED_BRANCH" => Some(ProposedBranch) - case "HAPLOGROUP" => Some(Haplogroup) - case "HAPLOGROUP_RELATIONSHIP" => Some(HaplogroupRelationship) - case "VARIANT" => Some(Variant) - case "BIOSAMPLE" => Some(Biosample) - case _ => None - } - - implicit val format: Format[CuratorTargetType] = Format( - Reads.StringReads.flatMap { str => - fromString(str) match { - case Some(v) => Reads.pure(v) - case None => Reads.failed(s"Invalid CuratorTargetType: $str") - } - }, - Writes.StringWrites.contramap(_.toString) - ) -} diff --git a/app/models/domain/discovery/DiscoveryModels.scala b/app/models/domain/discovery/DiscoveryModels.scala deleted file mode 100644 index cad974ff..00000000 --- a/app/models/domain/discovery/DiscoveryModels.scala +++ /dev/null @@ -1,102 +0,0 @@ -package models.domain.discovery - -import models.HaplogroupType -import play.api.libs.json.{JsValue, Json, OFormat} - -import java.time.LocalDateTime -import java.util.UUID - -case class BiosamplePrivateVariant( - id: Option[Int] = None, - sampleType: BiosampleSourceType, - sampleId: Int, - sampleGuid: UUID, - variantId: Int, - haplogroupType: HaplogroupType, - terminalHaplogroupId: Int, - discoveredAt: LocalDateTime = LocalDateTime.now(), - status: PrivateVariantStatus = PrivateVariantStatus.Active -) - -object BiosamplePrivateVariant { - implicit val format: OFormat[BiosamplePrivateVariant] = Json.format -} - -case class ProposedBranch( - id: Option[Int] = None, - parentHaplogroupId: Int, - proposedName: Option[String] = None, - haplogroupType: HaplogroupType, - status: ProposedBranchStatus = ProposedBranchStatus.Pending, - consensusCount: Int = 0, - confidenceScore: Double = 0.0, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now(), - reviewedAt: Option[LocalDateTime] = None, - reviewedBy: Option[String] = None, - notes: Option[String] = None, - promotedHaplogroupId: Option[Int] = None -) - -object ProposedBranch { - implicit val format: OFormat[ProposedBranch] = Json.format -} - -case class ProposedBranchVariant( - id: Option[Int] = None, - proposedBranchId: Int, - variantId: Int, - isDefining: Boolean = true, - evidenceCount: Int = 1, - firstObservedAt: LocalDateTime = LocalDateTime.now(), - lastObservedAt: LocalDateTime = LocalDateTime.now() -) - -object ProposedBranchVariant { - implicit val format: OFormat[ProposedBranchVariant] = Json.format -} - -case class ProposedBranchEvidence( - id: Option[Int] = None, - proposedBranchId: Int, - sampleType: BiosampleSourceType, - sampleId: Int, - sampleGuid: UUID, - addedAt: LocalDateTime = LocalDateTime.now(), - variantMatchCount: Int = 0, - variantMismatchCount: Int = 0 -) - -object ProposedBranchEvidence { - implicit val format: OFormat[ProposedBranchEvidence] = Json.format -} - -case class CuratorAction( - id: Option[Int] = None, - curatorId: String, - actionType: CuratorActionType, - targetType: CuratorTargetType, - targetId: Int, - previousState: Option[JsValue] = None, - newState: Option[JsValue] = None, - reason: Option[String] = None, - createdAt: LocalDateTime = LocalDateTime.now() -) - -object CuratorAction { - implicit val format: OFormat[CuratorAction] = Json.format -} - -case class DiscoveryConfig( - id: Option[Int] = None, - haplogroupType: HaplogroupType, - configKey: String, - configValue: String, - description: Option[String] = None, - updatedAt: LocalDateTime = LocalDateTime.now(), - updatedBy: Option[String] = None -) - -object DiscoveryConfig { - implicit val format: OFormat[DiscoveryConfig] = Json.format -} diff --git a/app/models/domain/genomics/AlignmentMetadata.scala b/app/models/domain/genomics/AlignmentMetadata.scala deleted file mode 100644 index 850c8924..00000000 --- a/app/models/domain/genomics/AlignmentMetadata.scala +++ /dev/null @@ -1,107 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{JsValue, Json, OFormat} - -import java.time.LocalDateTime - -/** - * Represents the scope level at which alignment metrics are calculated. - */ -enum MetricLevel { - case CONTIG_OVERALL // Metrics for entire contig - case REGION // Metrics for specific coordinate ranges - case GLOBAL -} - -object MetricLevel { - - import play.api.libs.json.* - - // Use Format instead of OFormat since we're dealing with enums (simple values, not objects) - implicit val format: Format[MetricLevel] = new Format[MetricLevel] { - def reads(json: JsValue): JsResult[MetricLevel] = json match { - case JsString(s) => - try { - JsSuccess(MetricLevel.valueOf(s)) - } catch { - case _: IllegalArgumentException => JsError(s"Unknown MetricLevel: $s") - } - case _ => JsError("String value expected") - } - - def writes(level: MetricLevel): JsValue = JsString(level.toString) - } -} - -/** - * Represents metadata about alignment statistics for a sequence file aligned to a linear reference. - * - * @param id Unique identifier for the metadata record - * @param sequenceFileId Foreign key to the sequence file - * @param genbankContigId Foreign key to the GenBank contig (linear reference) - * @param metricLevel Scope of the metrics (overall contig or specific region) - * @param regionName Optional name for the region (e.g., "Chromosome X", "Gene ABC") - * @param regionStartPos Start position for regional metrics (1-based, inclusive) - * @param regionEndPos End position for regional metrics (1-based, inclusive) - * @param regionLengthBp Length of the region in base pairs - * @param metricsDate Timestamp when metrics were calculated - * @param analysisTool Tool used to generate metrics (e.g., "samtools", "mosdepth") - * @param analysisToolVersion Version of the analysis tool - * @param notes Optional additional notes - * @param metadata Optional JSON metadata for tool-specific information - */ -case class AlignmentMetadata( - id: Option[Long] = None, - sequenceFileId: Long, - genbankContigId: Int, - metricLevel: MetricLevel, - regionName: Option[String] = None, - regionStartPos: Option[Long] = None, - regionEndPos: Option[Long] = None, - regionLengthBp: Option[Long] = None, - referenceBuild: Option[String] = None, - variantCaller: Option[String] = None, - genomeTerritory: Option[Long] = None, - meanCoverage: Option[Double] = None, - medianCoverage: Option[Double] = None, - sdCoverage: Option[Double] = None, - pctExcDupe: Option[Double] = None, - pctExcMapq: Option[Double] = None, - pct10x: Option[Double] = None, - pct20x: Option[Double] = None, - pct30x: Option[Double] = None, - hetSnpSensitivity: Option[Double] = None, - metricsDate: LocalDateTime = LocalDateTime.now(), - analysisTool: String, - analysisToolVersion: Option[String] = None, - notes: Option[String] = None, - metadata: Option[JsValue] = None, - coverage: Option[JsValue] = None - ) { - - def embeddedCoverage: Option[EmbeddedCoverage] = coverage.flatMap(_.asOpt[EmbeddedCoverage]) - - def withCoverage(ec: EmbeddedCoverage): AlignmentMetadata = copy(coverage = Some(Json.toJson(ec))) -} - -object AlignmentMetadata { - implicit val format: OFormat[AlignmentMetadata] = Json.format[AlignmentMetadata] -} - -case class EmbeddedCoverage( - meanDepth: Option[Double] = None, - medianDepth: Option[Double] = None, - percentCoverageAt1x: Option[Double] = None, - percentCoverageAt5x: Option[Double] = None, - percentCoverageAt10x: Option[Double] = None, - percentCoverageAt20x: Option[Double] = None, - percentCoverageAt30x: Option[Double] = None, - basesNoCoverage: Option[Long] = None, - basesLowQualityMapping: Option[Long] = None, - basesCallable: Option[Long] = None, - meanMappingQuality: Option[Double] = None - ) - -object EmbeddedCoverage { - implicit val format: OFormat[EmbeddedCoverage] = Json.format[EmbeddedCoverage] -} \ No newline at end of file diff --git a/app/models/domain/genomics/AnalysisMethod.scala b/app/models/domain/genomics/AnalysisMethod.scala deleted file mode 100644 index ce2ed2e4..00000000 --- a/app/models/domain/genomics/AnalysisMethod.scala +++ /dev/null @@ -1,9 +0,0 @@ -package models.domain.genomics - -/** - * Represents an analysis method used in ancestry or genetic analysis. - * - * @param id The unique identifier for the analysis method. This is optional. - * @param methodName The name of the analysis method, providing details about the approach or technique used. - */ -case class AnalysisMethod(id: Option[Int], methodName: String) diff --git a/app/models/domain/genomics/AncestryAnalysis.scala b/app/models/domain/genomics/AncestryAnalysis.scala deleted file mode 100644 index 60f7ed07..00000000 --- a/app/models/domain/genomics/AncestryAnalysis.scala +++ /dev/null @@ -1,15 +0,0 @@ -package models.domain.genomics - -import java.util.UUID - -/** - * A case class representing the results of an ancestry analysis. - * - * @param id An optional unique identifier for the analysis. - * @param sampleGuid A universally unique identifier (UUID) representing the sample the analysis pertains to. - * @param analysisMethodId The identifier of the analysis method used in determining the ancestry. - * @param populationId The identifier of the population determined or analyzed in this study. - * @param probability A double representing the probability that the sample belongs to the specified population. - */ -case class AncestryAnalysis(id: Option[Int], sampleGuid: UUID, analysisMethodId: Int, - populationId: Int, probability: Double) diff --git a/app/models/domain/genomics/AssemblyMetadata.scala b/app/models/domain/genomics/AssemblyMetadata.scala deleted file mode 100644 index 9ce0f48c..00000000 --- a/app/models/domain/genomics/AssemblyMetadata.scala +++ /dev/null @@ -1,15 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.JsValue - -import java.time.LocalDate - -case class AssemblyMetadata( - id: Option[Long], - assemblyName: String, - accession: Option[String], - releaseDate: Option[LocalDate], - sourceOrganism: Option[String], - assemblyLevel: Option[String], - metadata: Option[JsValue] - ) \ No newline at end of file diff --git a/app/models/domain/genomics/BiologicalSex.scala b/app/models/domain/genomics/BiologicalSex.scala deleted file mode 100644 index 700a43ec..00000000 --- a/app/models/domain/genomics/BiologicalSex.scala +++ /dev/null @@ -1,58 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.* - -/** - * Represents the biological sex of an individual or specimen. - * - * The available enumeration values are: - * - `Male`: Represents male biological sex. - * - `Female`: Represents female biological sex. - * - `Unknown`: Represents an undefined or unstated biological sex. - * - `Intersex`: Represents intersex biological sex. - */ -enum BiologicalSex { - case Male, Female, Unknown, Intersex - - def toLowerCase: String = this match { - case Male => "male" - case Female => "female" - case Unknown => "unknown" - case Intersex => "intersex" - } -} - -/** - * Provides functionality for interpreting and converting string values into - * the corresponding `BiologicalSex` enumeration values. It also includes - * implicit JSON formatting support for working with `BiologicalSex` in JSON - * serialization and deserialization. - * - * This object supports mapping well-known string representations of - * biological sexes (e.g., "male", "female", "intersex") to their respective - * enumeration values, as well as defaulting to `Unknown` for unsupported or - * undefined values. - * - * - `fromString`: Converts a string value into a `BiologicalSex` instance - * based on a case-insensitive match. Defaults to `Unknown` if no match is found. - * - `format`: An implicit `Format` instance for handling JSON reads and writes - * for `BiologicalSex` enumeration values. Assumes `String` values for both - * serialization and deserialization. - */ -object BiologicalSex { - def fromString(s: String): BiologicalSex = Option(s).map(_.trim.toLowerCase) match { - case Some("male") => Male - case Some("female") => Female - case Some("intersex") => Intersex - case _ => Unknown - } - - implicit val format: Format[BiologicalSex] = new Format[BiologicalSex] { - def reads(json: JsValue): JsResult[BiologicalSex] = json match { - case JsString(s) => JsSuccess(fromString(s)) - case _ => JsError("String value expected") - } - - def writes(sex: BiologicalSex): JsValue = JsString(sex.toLowerCase) - } -} \ No newline at end of file diff --git a/app/models/domain/genomics/Biosample.scala b/app/models/domain/genomics/Biosample.scala deleted file mode 100644 index 8fe382cd..00000000 --- a/app/models/domain/genomics/Biosample.scala +++ /dev/null @@ -1,47 +0,0 @@ -package models.domain.genomics - -import com.vividsolutions.jts.geom.Point -import play.api.libs.json.{JsValue, Json, OFormat} - -import java.util.UUID - -case class Biosample( - id: Option[Int] = None, - sampleGuid: UUID, - sampleAccession: String, - description: String, - alias: Option[String], - centerName: String, - specimenDonorId: Option[Int], - locked: Boolean = false, - sourcePlatform: Option[String] = None, - originalHaplogroups: Option[JsValue] = None - ) { - - def getOriginalHaplogroupEntries: Seq[OriginalHaplogroupEntry] = - originalHaplogroups.flatMap(_.asOpt[Seq[OriginalHaplogroupEntry]]).getOrElse(Seq.empty) - - def findHaplogroupByPublication(publicationId: Int): Option[OriginalHaplogroupEntry] = - getOriginalHaplogroupEntries.find(_.publicationId == publicationId) - - def withHaplogroupEntry(entry: OriginalHaplogroupEntry): Biosample = { - val existing = getOriginalHaplogroupEntries.filterNot(_.publicationId == entry.publicationId) - copy(originalHaplogroups = Some(Json.toJson(existing :+ entry))) - } - - def withoutHaplogroupForPublication(publicationId: Int): Biosample = { - val remaining = getOriginalHaplogroupEntries.filterNot(_.publicationId == publicationId) - copy(originalHaplogroups = Some(Json.toJson(remaining))) - } -} - -case class OriginalHaplogroupEntry( - publicationId: Int, - yHaplogroupResult: Option[HaplogroupResult] = None, - mtHaplogroupResult: Option[HaplogroupResult] = None, - notes: Option[String] = None - ) - -object OriginalHaplogroupEntry { - implicit val format: OFormat[OriginalHaplogroupEntry] = Json.format[OriginalHaplogroupEntry] -} diff --git a/app/models/domain/genomics/BiosampleCallableLoci.scala b/app/models/domain/genomics/BiosampleCallableLoci.scala deleted file mode 100644 index 22b8229b..00000000 --- a/app/models/domain/genomics/BiosampleCallableLoci.scala +++ /dev/null @@ -1,20 +0,0 @@ -package models.domain.genomics - -import java.time.LocalDateTime -import java.util.UUID - -case class BiosampleCallableLoci( - id: Option[Int] = None, - sampleType: String, - sampleId: Int, - sampleGuid: Option[UUID], - chromosome: String, - totalCallableBp: Long, - regionCount: Option[Int], - bedFileHash: Option[String], - computedAt: LocalDateTime, - sourceTestTypeId: Option[Int], - yXdegenCallableBp: Option[Long], - yAmpliconicCallableBp: Option[Long], - yPalindromicCallableBp: Option[Long] -) diff --git a/app/models/domain/genomics/BiosampleHaplogroup.scala b/app/models/domain/genomics/BiosampleHaplogroup.scala deleted file mode 100644 index 93dd3259..00000000 --- a/app/models/domain/genomics/BiosampleHaplogroup.scala +++ /dev/null @@ -1,13 +0,0 @@ -package models.domain.genomics - -import java.util.UUID - -/** - * Represents the association between a biological sample and its haplogroups. - * - * @param sampleGuid The universally unique identifier (UUID) for the biological sample. - * @param yHaplogroupId An optional identifier for the Y-haplogroup associated with the sample. - * @param mtHaplogroupId An optional identifier for the mitochondrial (MT) haplogroup associated with the sample. - */ -case class BiosampleHaplogroup(sampleGuid: UUID, yHaplogroupId: Option[Int], mtHaplogroupId: Option[Int]) - diff --git a/app/models/domain/genomics/BiosampleType.scala b/app/models/domain/genomics/BiosampleType.scala deleted file mode 100644 index bf4f9049..00000000 --- a/app/models/domain/genomics/BiosampleType.scala +++ /dev/null @@ -1,22 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.* - - -enum BiosampleType { - case Standard, PGP, Citizen, Ancient -} - -object BiosampleType { - implicit val format: Format[BiosampleType] = new Format[BiosampleType] { - def reads(json: JsValue): JsResult[BiosampleType] = json match { - case JsString(s) => BiosampleType.valueOf(s) match { - case bt: BiosampleType => JsSuccess(bt) - case null => JsError(s"Unknown BiosampleType: $s") - } - case _ => JsError("String value expected") - } - - def writes(bt: BiosampleType): JsValue = JsString(bt.toString) - } -} diff --git a/app/models/domain/genomics/CitizenBiosample.scala b/app/models/domain/genomics/CitizenBiosample.scala deleted file mode 100644 index 235f528a..00000000 --- a/app/models/domain/genomics/CitizenBiosample.scala +++ /dev/null @@ -1,30 +0,0 @@ -package models.domain.genomics - -import com.vividsolutions.jts.geom.Point - -import java.time.{LocalDate, LocalDateTime} -import java.util.UUID - -/** - * Represents a biosample of type "Citizen", typically ingested from external sources/Firehose. - * Maps to the `citizen_biosample` table. - */ -case class CitizenBiosample( - id: Option[Int] = None, - atUri: Option[String], - accession: Option[String], - alias: Option[String], - sourcePlatform: Option[String], - collectionDate: Option[LocalDate], - sex: Option[BiologicalSex], - geocoord: Option[Point], - description: Option[String], - yHaplogroup: Option[HaplogroupResult] = None, - mtHaplogroup: Option[HaplogroupResult] = None, - sampleGuid: UUID, - deleted: Boolean = false, - atCid: Option[String] = None, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now(), - specimenDonorId: Option[Int] = None - ) diff --git a/app/models/domain/genomics/CoverageBenchmark.scala b/app/models/domain/genomics/CoverageBenchmark.scala deleted file mode 100644 index 372fb061..00000000 --- a/app/models/domain/genomics/CoverageBenchmark.scala +++ /dev/null @@ -1,53 +0,0 @@ -package models.domain.genomics - -/** - * Represents aggregated coverage benchmark data grouped by lab, test type, and contig. - * - * @param lab Laboratory name - * @param testType Type of test performed - * @param contig Common name of the contig - * @param meanReadLen Average read length - * @param minReadLen Minimum read length - * @param maxReadLen Maximum read length - * @param meanInsertLen Average insert size - * @param minInsertLen Minimum insert size - * @param maxInsertLen Maximum insert size - * @param meanDepthAvg Average of mean depth values - * @param meanDepthStddev Standard deviation of mean depth (for 95% CI) - * @param basesNoCoverageAvg Average of bases with no coverage - * @param basesNoCoverageStddev Standard deviation of bases with no coverage (for 95% CI) - * @param basesLowQualMappingAvg Average of bases with low quality mapping - * @param basesLowQualMappingStddev Standard deviation of bases with low quality mapping (for 95% CI) - * @param basesCallableAvg Average of callable bases - * @param basesCallableStddev Standard deviation of callable bases (for 95% CI) - * @param meanMappingQuality Average mapping quality - * @param numSamples Number of samples in the group - */ -case class CoverageBenchmark( - lab: String, - testType: String, - contig: String, - meanReadLen: Option[Double], - minReadLen: Option[Int], - maxReadLen: Option[Int], - meanInsertLen: Option[Double], - minInsertLen: Option[Int], - maxInsertLen: Option[Int], - meanDepthAvg: Option[Double], - meanDepthStddev: Option[Double], - basesNoCoverageAvg: Option[Double], - basesNoCoverageStddev: Option[Double], - basesLowQualMappingAvg: Option[Double], - basesLowQualMappingStddev: Option[Double], - basesCallableAvg: Option[Double], - basesCallableStddev: Option[Double], - meanMappingQuality: Option[Double], - numSamples: Int - ) - -object CoverageBenchmark { - - import play.api.libs.json.* - - implicit val coverageBenchmarkFormat: Format[CoverageBenchmark] = Json.format[CoverageBenchmark] -} \ No newline at end of file diff --git a/app/models/domain/genomics/CoverageExpectationProfile.scala b/app/models/domain/genomics/CoverageExpectationProfile.scala deleted file mode 100644 index 524026c8..00000000 --- a/app/models/domain/genomics/CoverageExpectationProfile.scala +++ /dev/null @@ -1,82 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDateTime - -case class CoverageExpectationProfile( - id: Option[Int] = None, - testTypeId: Int, - contigName: String, - variantClass: String = "SNP", - minDepthHigh: Double, - minDepthMedium: Double, - minDepthLow: Double, - minCoveragePct: Option[Double] = None, - minMappingQuality: Option[Double] = None, - minCallablePct: Option[Double] = None, - notes: Option[String] = None, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) { - - def confidenceForDepth(actualDepth: Double): String = { - if (actualDepth >= minDepthHigh) "high" - else if (actualDepth >= minDepthMedium) "medium" - else if (actualDepth >= minDepthLow) "low" - else "insufficient" - } -} - -object CoverageExpectationProfile { - implicit val format: OFormat[CoverageExpectationProfile] = Json.format[CoverageExpectationProfile] -} - -sealed trait VariantClass { - def dbValue: String -} - -object VariantClass { - case object SNP extends VariantClass { val dbValue = "SNP" } - case object STR extends VariantClass { val dbValue = "STR" } - case object INDEL extends VariantClass { val dbValue = "INDEL" } - - def fromString(s: String): Option[VariantClass] = s.toUpperCase match { - case "SNP" => Some(SNP) - case "STR" => Some(STR) - case "INDEL" => Some(INDEL) - case _ => None - } -} - -case class VariantCallingConfidence( - contigName: String, - variantClass: String, - depthConfidence: String, - coverageAdequate: Boolean, - mappingQualityAdequate: Boolean, - callableBasesAdequate: Boolean, - overallConfidence: String, - details: Map[String, String] = Map.empty - ) - -object VariantCallingConfidence { - implicit val format: OFormat[VariantCallingConfidence] = Json.format[VariantCallingConfidence] - - val HIGH = "high" - val MEDIUM = "medium" - val LOW = "low" - val INSUFFICIENT = "insufficient" -} - -case class SampleCoverageAssessment( - testTypeCode: String, - testTypeDisplayName: String, - isChipBased: Boolean, - confidences: Seq[VariantCallingConfidence], - overallConfidence: String - ) - -object SampleCoverageAssessment { - implicit val format: OFormat[SampleCoverageAssessment] = Json.format[SampleCoverageAssessment] -} diff --git a/app/models/domain/genomics/GenbankContig.scala b/app/models/domain/genomics/GenbankContig.scala deleted file mode 100644 index 9b234223..00000000 --- a/app/models/domain/genomics/GenbankContig.scala +++ /dev/null @@ -1,19 +0,0 @@ -package models.domain.genomics - -/** - * Represents a GenBank contig, a segment of DNA or RNA sequence, with associated information such as - * accession number, common name, reference genome, and sequence length. - * - * @param id An optional unique identifier for the GenBank contig. Typically used for internal purposes. - * @param accession The accession number of the contig, providing a unique reference to this sequence in an external database. - * @param commonName An optional common name assigned to the contig for easier identification. - * @param referenceGenome An optional reference genome name or identifier associated with the contig. - * @param seqLength The length of the DNA or RNA sequence represented by this contig. - */ -case class GenbankContig( - id: Option[Int] = None, - accession: String, - commonName: Option[String], - referenceGenome: Option[String], - seqLength: Int, - ) \ No newline at end of file diff --git a/app/models/domain/genomics/GeneAnnotation.scala b/app/models/domain/genomics/GeneAnnotation.scala deleted file mode 100644 index 29f1916e..00000000 --- a/app/models/domain/genomics/GeneAnnotation.scala +++ /dev/null @@ -1,9 +0,0 @@ -package models.domain.genomics - -case class GeneAnnotation( - id: Option[Long], - geneSymbol: Option[String], - geneId: Option[String], - description: Option[String], - representativeSequenceNodeId: Option[Int] - ) \ No newline at end of file diff --git a/app/models/domain/genomics/GenomeRegion.scala b/app/models/domain/genomics/GenomeRegion.scala deleted file mode 100644 index d086f4d3..00000000 --- a/app/models/domain/genomics/GenomeRegion.scala +++ /dev/null @@ -1,38 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{Format, JsValue, Json} - -/** - * Coordinate information for a specific reference genome build. - */ -case class RegionCoordinate( - contig: String, - start: Long, - end: Long -) - -object RegionCoordinate { - implicit val format: Format[RegionCoordinate] = Json.format[RegionCoordinate] -} - -/** - * Represents a structural region within a chromosome (or a cytoband). - * Supports multi-reference coordinates. - * - * @param id Optional unique identifier (region_id). - * @param regionType The type of region (e.g., "Centromere", "Cytoband", "PAR1"). - * @param name Optional name (e.g., "p11.32" for cytobands, "P1" for palindromes). - * @param coordinates Map of BuildName -> Coordinate (e.g., "GRCh38" -> {contig: "chrY", start: ...}). - * @param properties Additional properties as JSON (e.g., {"stain": "gpos75", "modifier": 0.5}). - */ -case class GenomeRegion( - id: Option[Int] = None, - regionType: String, - name: Option[String], - coordinates: Map[String, RegionCoordinate], - properties: JsValue -) - -object GenomeRegion { - implicit val format: Format[GenomeRegion] = Json.format[GenomeRegion] -} \ No newline at end of file diff --git a/app/models/domain/genomics/GenomeRegionVersion.scala b/app/models/domain/genomics/GenomeRegionVersion.scala deleted file mode 100644 index 138ca5a2..00000000 --- a/app/models/domain/genomics/GenomeRegionVersion.scala +++ /dev/null @@ -1,19 +0,0 @@ -package models.domain.genomics - -import java.time.Instant - -/** - * Tracks the data version for genome region data per reference build. - * Used for ETag generation and cache invalidation. - * - * @param id Optional unique identifier. - * @param referenceGenome The canonical reference genome name (e.g., "GRCh37", "GRCh38", "hs1"). - * @param dataVersion Semantic version string (e.g., "2024.12.1"). - * @param updatedAt Timestamp when the version was last updated. - */ -case class GenomeRegionVersion( - id: Option[Int] = None, - referenceGenome: String, - dataVersion: String, - updatedAt: Instant -) diff --git a/app/models/domain/genomics/GenotypeData.scala b/app/models/domain/genomics/GenotypeData.scala deleted file mode 100644 index b130dfa2..00000000 --- a/app/models/domain/genomics/GenotypeData.scala +++ /dev/null @@ -1,72 +0,0 @@ -package models.domain.genomics - -import models.atmosphere.FileInfo -import play.api.libs.json.* - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Genotype metrics - stored as JSONB to reduce tuple size. - * Contains quality metrics, dates, and haplogroup calls. - */ -case class GenotypeMetrics( - // Quality metrics - totalMarkersCalled: Option[Int] = None, - totalMarkersPossible: Option[Int] = None, - callRate: Option[Double] = None, - noCallRate: Option[Double] = None, - yMarkersCalled: Option[Int] = None, - yMarkersTotal: Option[Int] = None, - mtMarkersCalled: Option[Int] = None, - mtMarkersTotal: Option[Int] = None, - autosomalMarkersCalled: Option[Int] = None, - hetRate: Option[Double] = None, - // Dates - testDate: Option[LocalDateTime] = None, - processedAt: Option[LocalDateTime] = None, - // Derived haplogroups - derivedYHaplogroup: Option[HaplogroupResult] = None, - derivedMtHaplogroup: Option[HaplogroupResult] = None, - // Files - files: Option[Seq[FileInfo]] = None -) - -object GenotypeMetrics { - implicit val format: Format[GenotypeMetrics] = Json.format[GenotypeMetrics] -} - -/** - * Represents genotype data from SNP array/chip testing. - * - * @param id Auto-generated primary key - * @param atUri AT URI for this record (for citizen-owned data) - * @param atCid Content identifier for version tracking - * @param sampleGuid UUID of the associated sample - * @param testTypeId Foreign key to test_type_definition - * @param provider Testing provider (e.g., 23andMe, AncestryDNA) - * @param chipVersion Chip version identifier - * @param buildVersion Genome build version (GRCh37, GRCh38) - * @param sourceFileHash SHA-256 for deduplication - * @param metrics All quality metrics, dates, haplogroups, files (JSONB) - * @param populationBreakdownId Foreign key to population_breakdown - * @param deleted Soft delete flag - * @param createdAt Record creation timestamp - * @param updatedAt Record update timestamp - */ -case class GenotypeData( - id: Option[Int] = None, - atUri: Option[String] = None, - atCid: Option[String] = None, - sampleGuid: UUID, - testTypeId: Option[Int] = None, - provider: Option[String] = None, - chipVersion: Option[String] = None, - buildVersion: Option[String] = None, - sourceFileHash: Option[String] = None, - metrics: GenotypeMetrics = GenotypeMetrics(), - populationBreakdownId: Option[Int] = None, - deleted: Boolean = false, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) diff --git a/app/models/domain/genomics/HaplogroupReconciliation.scala b/app/models/domain/genomics/HaplogroupReconciliation.scala deleted file mode 100644 index 65b7c317..00000000 --- a/app/models/domain/genomics/HaplogroupReconciliation.scala +++ /dev/null @@ -1,116 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.* - -import java.time.LocalDateTime - -/** - * DNA type enum for reconciliation records. - */ -enum DnaType { - case Y_DNA, MT_DNA -} - -object DnaType { - def fromString(s: String): Option[DnaType] = s.toUpperCase match { - case "Y_DNA" => Some(Y_DNA) - case "MT_DNA" => Some(MT_DNA) - case _ => None - } - - implicit val format: Format[DnaType] = new Format[DnaType] { - def reads(json: JsValue): JsResult[DnaType] = json match { - case JsString(s) => fromString(s).map(JsSuccess(_)).getOrElse(JsError(s"Unknown DnaType: $s")) - case _ => JsError("String value expected") - } - - def writes(dt: DnaType): JsValue = JsString(dt.toString) - } -} - -/** - * Compatibility level enum for reconciliation status. - */ -enum CompatibilityLevel { - case COMPATIBLE, MINOR_DIVERGENCE, MAJOR_DIVERGENCE, INCOMPATIBLE -} - -object CompatibilityLevel { - def fromString(s: String): Option[CompatibilityLevel] = s.toUpperCase match { - case "COMPATIBLE" => Some(COMPATIBLE) - case "MINOR_DIVERGENCE" => Some(MINOR_DIVERGENCE) - case "MAJOR_DIVERGENCE" => Some(MAJOR_DIVERGENCE) - case "INCOMPATIBLE" => Some(INCOMPATIBLE) - case _ => None - } - - implicit val format: Format[CompatibilityLevel] = new Format[CompatibilityLevel] { - def reads(json: JsValue): JsResult[CompatibilityLevel] = json match { - case JsString(s) => fromString(s).map(JsSuccess(_)).getOrElse(JsError(s"Unknown CompatibilityLevel: $s")) - case _ => JsError("String value expected") - } - - def writes(cl: CompatibilityLevel): JsValue = JsString(cl.toString) - } -} - -/** - * Reconciliation status metrics - stored as JSONB to reduce tuple size. - * Contains: compatibilityLevel, consensusHaplogroup, statusConfidence, - * branchCompatibilityScore, snpConcordance, runCount, warnings - */ -case class ReconciliationStatus( - compatibilityLevel: Option[String] = None, // COMPATIBLE, MINOR_DIVERGENCE, MAJOR_DIVERGENCE, INCOMPATIBLE - consensusHaplogroup: Option[String] = None, - statusConfidence: Option[Double] = None, - branchCompatibilityScore: Option[Double] = None, - snpConcordance: Option[Double] = None, - runCount: Option[Int] = None, - warnings: Option[Seq[String]] = None -) - -object ReconciliationStatus { - implicit val format: Format[ReconciliationStatus] = Json.format[ReconciliationStatus] -} - -/** - * Represents haplogroup reconciliation results for a specimen donor. - * Stored at the donor level since a donor may have multiple biosamples/runs. - * - * @param id Auto-generated primary key - * @param atUri AT URI for this record - * @param atCid Content identifier for version tracking - * @param specimenDonorId Foreign key to specimen_donor - * @param dnaType Y_DNA or MT_DNA - * @param status Reconciliation status metrics (JSONB) - * @param runCalls Array of RunHaplogroupCall objects (stored as JSONB) - * @param snpConflicts Array of SnpConflict objects (stored as JSONB) - * @param heteroplasmyObservations Array of HeteroplasmyObservation objects (stored as JSONB) - * @param identityVerification Identity verification metrics (stored as JSONB) - * @param manualOverride Manual override if user corrected the consensus (stored as JSONB) - * @param auditLog Audit log of reconciliation changes (stored as JSONB) - * @param lastReconciliationAt When reconciliation was last performed - * @param deleted Soft delete flag - * @param createdAt Record creation timestamp - * @param updatedAt Record update timestamp - */ -case class HaplogroupReconciliation( - id: Option[Int] = None, - atUri: Option[String] = None, - atCid: Option[String] = None, - specimenDonorId: Int, - dnaType: DnaType, - // Status metrics consolidated into JSONB - status: ReconciliationStatus = ReconciliationStatus(), - // JSONB fields stored as play.api.libs.json.JsValue - runCalls: JsValue, // Required: array of RunHaplogroupCall - snpConflicts: Option[JsValue] = None, - heteroplasmyObservations: Option[JsValue] = None, - identityVerification: Option[JsValue] = None, - manualOverride: Option[JsValue] = None, - auditLog: Option[JsValue] = None, - lastReconciliationAt: Option[LocalDateTime] = None, - deleted: Boolean = false, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) diff --git a/app/models/domain/genomics/HaplogroupResult.scala b/app/models/domain/genomics/HaplogroupResult.scala deleted file mode 100644 index 238501c8..00000000 --- a/app/models/domain/genomics/HaplogroupResult.scala +++ /dev/null @@ -1,17 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{Json, OFormat} - -case class HaplogroupResult( - haplogroupName: String, - score: Double, - matchingSnps: Int, - mismatchingSnps: Int, - ancestralMatches: Int, - treeDepth: Int, - lineagePath: Seq[String] - ) - -object HaplogroupResult { - implicit val format: OFormat[HaplogroupResult] = Json.format[HaplogroupResult] -} diff --git a/app/models/domain/genomics/InstrumentAssociationProposal.scala b/app/models/domain/genomics/InstrumentAssociationProposal.scala deleted file mode 100644 index ced631ec..00000000 --- a/app/models/domain/genomics/InstrumentAssociationProposal.scala +++ /dev/null @@ -1,87 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDateTime - -case class InstrumentAssociationProposal( - id: Option[Int] = None, - instrumentId: String, - proposedLabName: String, - proposedManufacturer: Option[String] = None, - proposedModel: Option[String] = None, - existingLabId: Option[Int] = None, - observationCount: Int = 0, - distinctCitizenCount: Int = 0, - confidenceScore: Double = 0.0, - earliestObservation: Option[LocalDateTime] = None, - latestObservation: Option[LocalDateTime] = None, - status: ProposalStatus = ProposalStatus.Pending, - reviewedAt: Option[LocalDateTime] = None, - reviewedBy: Option[String] = None, - reviewNotes: Option[String] = None, - acceptedLabId: Option[Int] = None, - acceptedInstrumentId: Option[Int] = None, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) - -object InstrumentAssociationProposal { - implicit val format: OFormat[InstrumentAssociationProposal] = Json.format[InstrumentAssociationProposal] -} - -sealed trait ProposalStatus { - def dbValue: String -} - -object ProposalStatus { - case object Pending extends ProposalStatus { val dbValue = "PENDING" } - case object ReadyForReview extends ProposalStatus { val dbValue = "READY_FOR_REVIEW" } - case object UnderReview extends ProposalStatus { val dbValue = "UNDER_REVIEW" } - case object Accepted extends ProposalStatus { val dbValue = "ACCEPTED" } - case object Rejected extends ProposalStatus { val dbValue = "REJECTED" } - case object Superseded extends ProposalStatus { val dbValue = "SUPERSEDED" } - - def fromString(s: String): ProposalStatus = s.toUpperCase match { - case "PENDING" => Pending - case "READY_FOR_REVIEW" => ReadyForReview - case "UNDER_REVIEW" => UnderReview - case "ACCEPTED" => Accepted - case "REJECTED" => Rejected - case "SUPERSEDED" => Superseded - case other => throw new IllegalArgumentException(s"Unknown ProposalStatus: $other") - } - - implicit val format: play.api.libs.json.Format[ProposalStatus] = new play.api.libs.json.Format[ProposalStatus] { - def reads(json: play.api.libs.json.JsValue) = json match { - case play.api.libs.json.JsString(s) => play.api.libs.json.JsSuccess(fromString(s)) - case _ => play.api.libs.json.JsError("String value expected") - } - def writes(s: ProposalStatus) = play.api.libs.json.JsString(s.dbValue) - } -} - -sealed trait AssociationSource { - def dbValue: String -} - -object AssociationSource { - case object Curator extends AssociationSource { val dbValue = "CURATOR" } - case object Consensus extends AssociationSource { val dbValue = "CONSENSUS" } - case object Publication extends AssociationSource { val dbValue = "PUBLICATION" } - - def fromString(s: String): AssociationSource = s.toUpperCase match { - case "CURATOR" => Curator - case "CONSENSUS" => Consensus - case "PUBLICATION" => Publication - case other => throw new IllegalArgumentException(s"Unknown AssociationSource: $other") - } - - implicit val format: play.api.libs.json.Format[AssociationSource] = new play.api.libs.json.Format[AssociationSource] { - def reads(json: play.api.libs.json.JsValue) = json match { - case play.api.libs.json.JsString(s) => play.api.libs.json.JsSuccess(fromString(s)) - case _ => play.api.libs.json.JsError("String value expected") - } - def writes(s: AssociationSource) = play.api.libs.json.JsString(s.dbValue) - } -} diff --git a/app/models/domain/genomics/InstrumentObservation.scala b/app/models/domain/genomics/InstrumentObservation.scala deleted file mode 100644 index 5b1b6eaa..00000000 --- a/app/models/domain/genomics/InstrumentObservation.scala +++ /dev/null @@ -1,51 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDateTime - -case class InstrumentObservation( - id: Option[Int] = None, - atUri: String, - atCid: Option[String] = None, - instrumentId: String, - labName: String, - biosampleRef: String, - sequenceRunRef: Option[String] = None, - platform: Option[String] = None, - instrumentModel: Option[String] = None, - flowcellId: Option[String] = None, - runDate: Option[LocalDateTime] = None, - confidence: ObservationConfidence = ObservationConfidence.Inferred, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: Option[LocalDateTime] = None - ) - -object InstrumentObservation { - implicit val format: OFormat[InstrumentObservation] = Json.format[InstrumentObservation] -} - -sealed trait ObservationConfidence { - def dbValue: String -} - -object ObservationConfidence { - case object Known extends ObservationConfidence { val dbValue = "KNOWN" } - case object Inferred extends ObservationConfidence { val dbValue = "INFERRED" } - case object Guessed extends ObservationConfidence { val dbValue = "GUESSED" } - - def fromString(s: String): ObservationConfidence = s.toUpperCase match { - case "KNOWN" => Known - case "INFERRED" => Inferred - case "GUESSED" => Guessed - case other => throw new IllegalArgumentException(s"Unknown ObservationConfidence: $other") - } - - implicit val format: play.api.libs.json.Format[ObservationConfidence] = new play.api.libs.json.Format[ObservationConfidence] { - def reads(json: play.api.libs.json.JsValue) = json match { - case play.api.libs.json.JsString(s) => play.api.libs.json.JsSuccess(fromString(s)) - case _ => play.api.libs.json.JsError("String value expected") - } - def writes(c: ObservationConfidence) = play.api.libs.json.JsString(c.dbValue) - } -} diff --git a/app/models/domain/genomics/MinHashSketch.scala b/app/models/domain/genomics/MinHashSketch.scala deleted file mode 100644 index b336a26f..00000000 --- a/app/models/domain/genomics/MinHashSketch.scala +++ /dev/null @@ -1,31 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{Format, Json} - -import java.nio.ByteBuffer -import java.time.LocalDateTime - -case class MinHashSketch( - kmerHashes: Array[Long], // The actual MinHash values - finalHash: String // SHA256 of the sorted kmerHashes for quick identity checks - ) - -object MinHashSketch { - def computeJaccard(sketch1: MinHashSketch, sketch2: MinHashSketch): Double = { - val set1 = sketch1.kmerHashes.toSet - val set2 = sketch2.kmerHashes.toSet - val intersection = set1.intersect(set2).size - val union = set1.union(set2).size - intersection.toDouble / union - } -} - -case class SequenceLibrarySketch( - id: Option[Int] = None, - sequenceLibraryId: Int, - autosomalSketch: MinHashSketch, - yChromosomeSketch: Option[MinHashSketch], - mtDnaSketch: Option[MinHashSketch], - createdAt: LocalDateTime = LocalDateTime.now() - ) - diff --git a/app/models/domain/genomics/MutationType.scala b/app/models/domain/genomics/MutationType.scala deleted file mode 100644 index dbd7b055..00000000 --- a/app/models/domain/genomics/MutationType.scala +++ /dev/null @@ -1,94 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.* - -/** - * Represents the type of genetic mutation. - * - * Each mutation type has associated properties: - * - `dbValue`: The string stored in the database - * - `category`: Classification as Point, Repeat, or Structural - * - `displayName`: Human-readable name for UI display - */ -enum MutationType(val dbValue: String, val category: MutationCategory, val displayName: String) { - // Point mutations - single nucleotide or small changes - case SNP extends MutationType("SNP", MutationCategory.Point, "Single Nucleotide Polymorphism") - case INDEL extends MutationType("INDEL", MutationCategory.Point, "Insertion/Deletion") - case MNP extends MutationType("MNP", MutationCategory.Point, "Multi-Nucleotide Polymorphism") - - // Repeat variations - case STR extends MutationType("STR", MutationCategory.Repeat, "Short Tandem Repeat") - - // Structural variants - larger genomic rearrangements - case DEL extends MutationType("DEL", MutationCategory.Structural, "Deletion") - case DUP extends MutationType("DUP", MutationCategory.Structural, "Duplication") - case INS extends MutationType("INS", MutationCategory.Structural, "Insertion") - case INV extends MutationType("INV", MutationCategory.Structural, "Inversion") - case CNV extends MutationType("CNV", MutationCategory.Structural, "Copy Number Variant") - case TRANS extends MutationType("TRANS", MutationCategory.Structural, "Translocation") - - override def toString: String = dbValue - - def isPointMutation: Boolean = category == MutationCategory.Point - def isRepeat: Boolean = category == MutationCategory.Repeat - def isStructural: Boolean = category == MutationCategory.Structural -} - -/** - * Category of mutation types. - */ -enum MutationCategory { - case Point, Repeat, Structural -} - -object MutationType { - /** - * Parse a database string value to MutationType. - */ - def fromString(str: String): Option[MutationType] = str.toUpperCase match { - case "SNP" => Some(SNP) - case "INDEL" => Some(INDEL) - case "MNP" => Some(MNP) - case "STR" => Some(STR) - case "DEL" => Some(DEL) - case "DUP" => Some(DUP) - case "INS" => Some(INS) - case "INV" => Some(INV) - case "CNV" => Some(CNV) - case "TRANS" => Some(TRANS) - case _ => None - } - - /** - * Parse with a default fallback. - */ - def fromStringOrDefault(str: String, default: MutationType = SNP): MutationType = - fromString(str).getOrElse(default) - - /** - * All point mutation types. - */ - val pointTypes: Set[MutationType] = Set(SNP, INDEL, MNP) - - /** - * All structural variant types. - */ - val structuralTypes: Set[MutationType] = Set(DEL, DUP, INS, INV, CNV, TRANS) - - /** - * All mutation types. - */ - val allTypes: Set[MutationType] = MutationType.values.toSet - - // JSON serialization - implicit val reads: Reads[MutationType] = Reads.StringReads.flatMap { str => - fromString(str) match { - case Some(mt) => Reads.pure(mt) - case None => Reads.failed(s"Invalid MutationType: $str") - } - } - - implicit val writes: Writes[MutationType] = Writes.StringWrites.contramap(_.dbValue) - - implicit val format: Format[MutationType] = Format(reads, writes) -} diff --git a/app/models/domain/genomics/NamingStatus.scala b/app/models/domain/genomics/NamingStatus.scala deleted file mode 100644 index 2a3e8c36..00000000 --- a/app/models/domain/genomics/NamingStatus.scala +++ /dev/null @@ -1,60 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.* - -/** - * Represents the naming status of a variant. - * - * Each status has associated properties: - * - `dbValue`: The string stored in the database - * - `displayName`: Human-readable name for UI display - * - `isNamed`: Whether the variant has an official name - */ -enum NamingStatus(val dbValue: String, val displayName: String, val isNamed: Boolean) { - /** - * Variant has no official name - typically identified only by coordinates. - */ - case Unnamed extends NamingStatus("UNNAMED", "Unnamed", false) - - /** - * Variant has been submitted for naming review but not yet approved. - */ - case PendingReview extends NamingStatus("PENDING_REVIEW", "Pending Review", false) - - /** - * Variant has an official canonical name. - */ - case Named extends NamingStatus("NAMED", "Named", true) - - override def toString: String = dbValue -} - -object NamingStatus { - /** - * Parse a database string value to NamingStatus. - */ - def fromString(str: String): Option[NamingStatus] = str.toUpperCase match { - case "UNNAMED" => Some(Unnamed) - case "PENDING_REVIEW" => Some(PendingReview) - case "NAMED" => Some(Named) - case _ => None - } - - /** - * Parse with a default fallback. - */ - def fromStringOrDefault(str: String, default: NamingStatus = Unnamed): NamingStatus = - fromString(str).getOrElse(default) - - // JSON serialization - implicit val reads: Reads[NamingStatus] = Reads.StringReads.flatMap { str => - fromString(str) match { - case Some(ns) => Reads.pure(ns) - case None => Reads.failed(s"Invalid NamingStatus: $str") - } - } - - implicit val writes: Writes[NamingStatus] = Writes.StringWrites.contramap(_.dbValue) - - implicit val format: Format[NamingStatus] = Format(reads, writes) -} diff --git a/app/models/domain/genomics/Population.scala b/app/models/domain/genomics/Population.scala deleted file mode 100644 index dc58f809..00000000 --- a/app/models/domain/genomics/Population.scala +++ /dev/null @@ -1,13 +0,0 @@ -package models.domain.genomics - -/** - * Represents a genetic population group or demographic population with a unique name and identifier. - * - * @param id An optional unique identifier for the population, used for internal purposes. - * @param populationName The name of the population, which serves as a primary identifier. - */ -case class Population( - id: Option[Int], - populationName: String - // parentPopulationId: Option[Long] - ) diff --git a/app/models/domain/genomics/PopulationBreakdown.scala b/app/models/domain/genomics/PopulationBreakdown.scala deleted file mode 100644 index c6f2dea4..00000000 --- a/app/models/domain/genomics/PopulationBreakdown.scala +++ /dev/null @@ -1,115 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.* - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Represents an ancestry analysis breakdown using PCA projection onto reference populations. - * - * @param id Auto-generated primary key - * @param atUri AT URI for this record (for citizen-owned data) - * @param atCid Content identifier for version tracking - * @param sampleGuid UUID of the associated sample - * @param analysisMethod Analysis method used (e.g., PCA_PROJECTION_GMM, ADMIXTURE) - * @param panelType Panel type: "aims" (~5k SNPs) or "genome-wide" (~500k SNPs) - * @param referencePopulations Reference panel name (e.g., "1000G_HGDP_v1") - * @param snpsAnalyzed Total SNPs in the analysis panel - * @param snpsWithGenotype SNPs with valid genotype calls - * @param snpsMissing SNPs with no call or missing data - * @param confidenceLevel Overall confidence 0.0-1.0 - * @param pcaCoordinates First 3 PCA coordinates [x, y, z] - * @param analysisDate When the analysis was performed - * @param pipelineVersion Version of the analysis pipeline - * @param referenceVersion Version of the reference panel - * @param deleted Soft delete flag - * @param createdAt Record creation timestamp - * @param updatedAt Record update timestamp - */ -case class PopulationBreakdown( - id: Option[Int] = None, - atUri: Option[String] = None, - atCid: Option[String] = None, - sampleGuid: UUID, - analysisMethod: String, - panelType: Option[String] = None, - referencePopulations: Option[String] = None, - snpsAnalyzed: Option[Int] = None, - snpsWithGenotype: Option[Int] = None, - snpsMissing: Option[Int] = None, - confidenceLevel: Option[Double] = None, - pcaCoordinates: Option[PcaCoordinatesJsonb] = None, - analysisDate: Option[LocalDateTime] = None, - pipelineVersion: Option[String] = None, - referenceVersion: Option[String] = None, - deleted: Boolean = false, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) - -/** - * JSONB type for PCA coordinates stored as an array of 3 doubles. - */ -case class PcaCoordinatesJsonb( - x: Double, - y: Double, - z: Double - ) - -object PcaCoordinatesJsonb { - implicit val format: OFormat[PcaCoordinatesJsonb] = Json.format[PcaCoordinatesJsonb] -} - -/** - * Represents a single population component in an ancestry breakdown. - * - * @param id Auto-generated primary key - * @param populationBreakdownId Foreign key to parent population_breakdown - * @param populationCode Reference population code (e.g., CEU, YRI, CHB) - * @param populationName Human-readable population name - * @param superPopulation Continental grouping (e.g., European, African) - * @param percentage Ancestry percentage 0.0-100.0 - * @param confidenceLower 95% confidence interval lower bound - * @param confidenceUpper 95% confidence interval upper bound - * @param rank Display rank by percentage (1 = highest) - */ -case class PopulationComponent( - id: Option[Int] = None, - populationBreakdownId: Int, - populationCode: String, - populationName: Option[String] = None, - superPopulation: Option[String] = None, - percentage: Double, - confidenceLower: Option[Double] = None, - confidenceUpper: Option[Double] = None, - rank: Option[Int] = None - ) - -/** - * Represents a super-population (continental level) summary. - * - * @param id Auto-generated primary key - * @param populationBreakdownId Foreign key to parent population_breakdown - * @param superPopulation Continental grouping name - * @param percentage Combined percentage 0.0-100.0 - * @param populations Array of contributing population codes - */ -case class SuperPopulationSummary( - id: Option[Int] = None, - populationBreakdownId: Int, - superPopulation: String, - percentage: Double, - populations: Option[SuperPopulationListJsonb] = None - ) - -/** - * JSONB type for the list of populations in a super-population. - */ -case class SuperPopulationListJsonb( - populations: Seq[String] - ) - -object SuperPopulationListJsonb { - implicit val format: OFormat[SuperPopulationListJsonb] = Json.format[SuperPopulationListJsonb] -} diff --git a/app/models/domain/genomics/SequenceFile.scala b/app/models/domain/genomics/SequenceFile.scala deleted file mode 100644 index 44596540..00000000 --- a/app/models/domain/genomics/SequenceFile.scala +++ /dev/null @@ -1,36 +0,0 @@ -package models.domain.genomics - -import java.time.LocalDateTime -import models.domain.genomics.{SequenceFileAtpLocationJsonb, SequenceFileChecksumJsonb, SequenceFileHttpLocationJsonb} - -/** - * Represents a sequence file with metadata about its library association, format, alignment information, - * and timestamps for creation and updates. - * - * @param id An optional unique identifier for the sequence file, typically used for internal purposes. - * @param libraryId The identifier of the library to which this sequence file belongs. - * @param fileName The name of the file. - * @param fileSizeBytes The size of the file in bytes. - * @param fileFormat The format of the file (e.g., FASTQ, BAM, etc.). - * @param checksums A list of checksums associated with the file in JSONB format. - * @param httpLocations A list of HTTP locations where the file can be accessed, in JSONB format. - * @param atpLocation An optional AT Protocol location for the file, in JSONB format. - * @param aligner The name of the aligner tool used for processing the sequence data. - * @param targetReference The reference genome or target against which the sequence data was aligned. - * @param createdAt The timestamp when the sequence file was created in the system. - * @param updatedAt An optional timestamp indicating the last update time for the sequence file. - */ -case class SequenceFile( - id: Option[Int], - libraryId: Int, - fileName: String, - fileSizeBytes: Long, - fileFormat: String, - checksums: List[SequenceFileChecksumJsonb], - httpLocations: List[SequenceFileHttpLocationJsonb], - atpLocation: Option[SequenceFileAtpLocationJsonb], - aligner: String, - targetReference: String, - createdAt: LocalDateTime, - updatedAt: Option[LocalDateTime], - ) diff --git a/app/models/domain/genomics/SequenceFileJsonb.scala b/app/models/domain/genomics/SequenceFileJsonb.scala deleted file mode 100644 index fc0ebb9c..00000000 --- a/app/models/domain/genomics/SequenceFileJsonb.scala +++ /dev/null @@ -1,26 +0,0 @@ -package models.domain.genomics - -import java.time.LocalDateTime - -case class SequenceFileChecksumJsonb( - checksum: String, - algorithm: String, - verifiedAt: Option[LocalDateTime], - createdAt: LocalDateTime, - updatedAt: LocalDateTime - ) - -case class SequenceFileHttpLocationJsonb( - url: String, - urlHash: String, - createdAt: LocalDateTime, - updatedAt: LocalDateTime - ) - -case class SequenceFileAtpLocationJsonb( - repoDid: String, - recordUri: String, - cid: String, - createdAt: LocalDateTime, - updatedAt: LocalDateTime - ) diff --git a/app/models/domain/genomics/SequenceLibrary.scala b/app/models/domain/genomics/SequenceLibrary.scala deleted file mode 100644 index 0befb45e..00000000 --- a/app/models/domain/genomics/SequenceLibrary.scala +++ /dev/null @@ -1,37 +0,0 @@ -package models.domain.genomics - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Represents a library of sequencing data and its associated metadata. - * - * @param id An optional unique identifier for the sequencing library, typically used internally. - * @param sampleGuid A universally unique identifier (UUID) for the sample associated with this sequencing library. - * @param lab The name of the laboratory that processed or generated the sequencing data. - * @param testType The type of test performed to generate the sequencing data (e.g., WGS, RNA-Seq). - * @param runDate The date and time the sequencing run was performed. - * @param instrument The name or model of the sequencing instrument used. - * @param reads The number of reads generated in the sequencing run. - * @param readLength The length of each read in base pairs. - * @param pairedEnd Indicates whether the sequencing data is paired-end (true) or single-end (false). - * @param insertSize An optional median insert size for paired-end sequencing libraries, representing the distance between paired reads. - * @param created_at The timestamp indicating when this sequencing library record was created in the system. - * @param updated_at An optional timestamp indicating the last time this sequencing library record was updated. - */ -case class SequenceLibrary( - id: Option[Int], - sampleGuid: UUID, - lab: String, - testTypeId: Int, - runDate: LocalDateTime, - instrument: String, - reads: Int, - readLength: Int, - pairedEnd: Boolean, - insertSize: Option[Int], - atUri: Option[String], - atCid: Option[String], - created_at: LocalDateTime, - updated_at: Option[LocalDateTime], - ) diff --git a/app/models/domain/genomics/SequencerInstrument.scala b/app/models/domain/genomics/SequencerInstrument.scala deleted file mode 100644 index 6c76775e..00000000 --- a/app/models/domain/genomics/SequencerInstrument.scala +++ /dev/null @@ -1,34 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDateTime - -/** - * Represents a specific sequencing instrument used by a laboratory. - * - * @param id Unique identifier for the instrument record - * @param instrumentId The instrument ID found in BAM/CRAM read headers (e.g., 'A00123') - * @param labId Foreign key to the sequencing lab - * @param manufacturer Optional manufacturer name (e.g., 'Illumina', 'PacBio') - * @param model Optional model name (e.g., 'NovaSeq 6000', 'MiSeq') - * @param createdAt Timestamp when the record was created - * @param updatedAt Timestamp when the record was last updated - */ -case class SequencerInstrument( - id: Option[Int] = None, - instrumentId: String, - labId: Int, - manufacturer: Option[String] = None, - model: Option[String] = None, - source: Option[String] = Some("CURATOR"), - observationCount: Int = 0, - confidenceScore: Double = 1.0, - lastObservedAt: Option[LocalDateTime] = None, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: Option[LocalDateTime] = None - ) - -object SequencerInstrument { - implicit val format: OFormat[SequencerInstrument] = Json.format[SequencerInstrument] -} \ No newline at end of file diff --git a/app/models/domain/genomics/SequencingLab.scala b/app/models/domain/genomics/SequencingLab.scala deleted file mode 100644 index a7fd6bd5..00000000 --- a/app/models/domain/genomics/SequencingLab.scala +++ /dev/null @@ -1,30 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDateTime - -/** - * Represents a sequencing laboratory that processes genomic samples. - * - * @param id Unique identifier for the lab - * @param name Laboratory name (must be unique) - * @param isD2c Whether the lab offers direct-to-consumer services - * @param websiteUrl URL to the lab's official website - * @param descriptionMarkdown Rich text description (e.g., accreditation, methods) - * @param createdAt Timestamp when the record was created - * @param updatedAt Timestamp when the record was last updated - */ -case class SequencingLab( - id: Option[Int] = None, - name: String, - isD2c: Boolean = false, - websiteUrl: Option[String] = None, - descriptionMarkdown: Option[String] = None, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: Option[LocalDateTime] = None - ) - -object SequencingLab { - implicit val format: OFormat[SequencingLab] = Json.format[SequencingLab] -} diff --git a/app/models/domain/genomics/SpecimenDonor.scala b/app/models/domain/genomics/SpecimenDonor.scala deleted file mode 100644 index ec354aea..00000000 --- a/app/models/domain/genomics/SpecimenDonor.scala +++ /dev/null @@ -1,34 +0,0 @@ -package models.domain.genomics - -import com.vividsolutions.jts.geom.Point - -/** - * Represents a donor of a specimen, encapsulating key attributes related to the donor, - * their origin, biological characteristics, and other identifiers. - * - * @param id An optional unique identifier for the specimen donor. - * @param donorIdentifier A unique identifier for the donor, serving as an external reference. - * @param originBiobank The name of the biobank or organization from which the donor originated. - * @param donorType The type of biosample provided by the donor, expressed as a `BiosampleType`. - * @param sex An optional biological sex of the donor, represented as a `BiologicalSex` value. - * @param geocoord An optional geographical coordinate indicating the origin location of the donor. - * @param pgpParticipantId An optional identifier for the donor as a participant in the Personal Genome Project (PGP). - * @param citizenBiosampleDid An optional decentralized identifier (DID) for a citizen-defined biosample associated with the donor. - * @param dateRangeStart An optional starting year for the time range relevant to the donor or specimen data. - * @param dateRangeEnd An optional ending year for the time range relevant to the donor or specimen data. - */ -case class SpecimenDonor( - id: Option[Int] = None, - donorIdentifier: String, - originBiobank: String, - donorType: BiosampleType, - sex: Option[BiologicalSex], - geocoord: Option[Point], - pgpParticipantId: Option[String] = None, - atUri: Option[String] = None, - dateRangeStart: Option[Int] = None, - dateRangeEnd: Option[Int] = None, - yDnaReconciliationRef: Option[String] = None, - mtDnaReconciliationRef: Option[String] = None - ) - diff --git a/app/models/domain/genomics/TestType.scala b/app/models/domain/genomics/TestType.scala deleted file mode 100644 index 100fef49..00000000 --- a/app/models/domain/genomics/TestType.scala +++ /dev/null @@ -1,162 +0,0 @@ -package models.domain.genomics - -import play.api.mvc.QueryStringBindable - -/** - * Represents different types of genetic tests or sequencing methodologies. - * This enum provides a structured way to classify genomic data based on how it was generated. - */ -enum TestType { - case WGS, WES, TARGETED_Y, TARGETED_MT, SNP_ARRAY_23ANDME, SNP_ARRAY_ANCESTRY, - BIG_Y_700, BIG_Y_500, Y_ELITE, Y_PRIME, MT_FULL_SEQUENCE, MT_PLUS, - ARRAY_23ANDME_V4, ARRAY_ANCESTRY_V1, ARRAY_MYHERITAGE, ARRAY_LIVINGDNA, ARRAY_CUSTOM - - override def toString: String = this match { - case WGS => "WGS" - case WES => "WES" - case TARGETED_Y => "TARGETED_Y" - case TARGETED_MT => "TARGETED_MT" - case SNP_ARRAY_23ANDME => "SNP_ARRAY_23ANDME" - case SNP_ARRAY_ANCESTRY => "SNP_ARRAY_ANCESTRY" - case BIG_Y_700 => "BIG_Y_700" - case BIG_Y_500 => "BIG_Y_500" - case Y_ELITE => "Y_ELITE" - case Y_PRIME => "Y_PRIME" - case MT_FULL_SEQUENCE => "MT_FULL_SEQUENCE" - case MT_PLUS => "MT_PLUS" - case ARRAY_23ANDME_V4 => "ARRAY_23ANDME_V4" - case ARRAY_ANCESTRY_V1 => "ARRAY_ANCESTRY_V1" - case ARRAY_MYHERITAGE => "ARRAY_MYHERITAGE" - case ARRAY_LIVINGDNA => "ARRAY_LIVINGDNA" - case ARRAY_CUSTOM => "ARRAY_CUSTOM" - } -} - -/** - * Companion object for the TestType enumeration, providing utility methods and implicits. - */ -object TestType { - /** - * Converts a string representation to a TestType enum value. - * - * @param str The string to convert. - * @return An Option containing the corresponding TestType, or None if the string does not match. - */ - def fromString(str: String): Option[TestType] = str.toUpperCase match { - case "WGS" => Some(WGS) - case "WES" => Some(WES) - case "TARGETED_Y" => Some(TARGETED_Y) - case "TARGETED_MT" => Some(TARGETED_MT) - case "SNP_ARRAY_23ANDME" => Some(SNP_ARRAY_23ANDME) - case "SNP_ARRAY_ANCESTRY" => Some(SNP_ARRAY_ANCESTRY) - case "BIG_Y_700" => Some(BIG_Y_700) - case "BIG_Y_500" => Some(BIG_Y_500) - case "Y_ELITE" => Some(Y_ELITE) - case "Y_PRIME" => Some(Y_PRIME) - case "MT_FULL_SEQUENCE" => Some(MT_FULL_SEQUENCE) - case "MT_PLUS" => Some(MT_PLUS) - case "ARRAY_23ANDME_V4" => Some(ARRAY_23ANDME_V4) - case "ARRAY_ANCESTRY_V1" => Some(ARRAY_ANCESTRY_V1) - case "ARRAY_MYHERITAGE" => Some(ARRAY_MYHERITAGE) - case "ARRAY_LIVINGDNA" => Some(ARRAY_LIVINGDNA) - case "ARRAY_CUSTOM" => Some(ARRAY_CUSTOM) - case _ => None - } - - /** - * Implicit QueryStringBindable for TestType, allowing it to be used directly in Play routes. - */ - implicit val queryStringBindable: QueryStringBindable[TestType] = - new QueryStringBindable[TestType] { - def bind(key: String, params: Map[String, Seq[String]]): Option[Either[String, TestType]] = { - params.get(key).flatMap(_.headOption).map { value => - try { - fromString(value) match { - case Some(tt) => Right(tt) - case None => Left(s"Invalid TestType value: $value") - } - } catch { - case _: IllegalArgumentException => - Left(s"Invalid TestType value: $value") - } - } - } - - def unbind(key: String, value: TestType): String = { - s"$key=${value.toString}" - } - } -} - -/** - * Represents the method by which genomic data was generated. - */ -enum DataGenerationMethod { - case Sequencing, Genotyping - - override def toString: String = this match { - case Sequencing => "SEQUENCING" - case Genotyping => "GENOTYPING" - } -} - -object DataGenerationMethod { - def fromString(str: String): Option[DataGenerationMethod] = str.toUpperCase match { - case "SEQUENCING" => Some(Sequencing) - case "GENOTYPING" => Some(Genotyping) - case _ => None - } -} - -/** - * Represents the target region of a genetic test. - */ -enum TargetType { - case WholeGenome, YChromosome, MtDna, Autosomal, XChromosome, Mixed - - override def toString: String = this match { - case WholeGenome => "WHOLE_GENOME" - case YChromosome => "Y_CHROMOSOME" - case MtDna => "MT_DNA" - case Autosomal => "AUTOSOMAL" - case XChromosome => "X_CHROMOSOME" - case Mixed => "MIXED" - } -} - -object TargetType { - def fromString(str: String): Option[TargetType] = str.toUpperCase match { - case "WHOLE_GENOME" => Some(WholeGenome) - case "Y_CHROMOSOME" => Some(YChromosome) - case "MT_DNA" => Some(MtDna) - case "AUTOSOMAL" => Some(Autosomal) - case "X_CHROMOSOME" => Some(XChromosome) - case "MIXED" => Some(Mixed) - case _ => None - } -} - - -// Case class to represent a row in the test_type_definition table -case class TestTypeRow( - id: Option[Int] = None, - code: String, // Changed from name: TestType to code: String, as table column is VARCHAR - displayName: String, - category: DataGenerationMethod, // Added - vendor: Option[String] = None, - targetType: TargetType, // Added - expectedMinDepth: Option[Double] = None, - expectedTargetDepth: Option[Double] = None, - expectedMarkerCount: Option[Int] = None, - supportsHaplogroupY: Boolean, - supportsHaplogroupMt: Boolean, - supportsAutosomalIbd: Boolean, - supportsAncestry: Boolean, - typicalFileFormats: List[String], // Changed from Seq[String] to List[String] - version: Option[String] = None, - releaseDate: Option[java.time.LocalDate] = None, - deprecatedAt: Option[java.time.LocalDate] = None, - successorTestTypeId: Option[Int] = None, - description: Option[String] = None, - documentationUrl: Option[String] = None -) \ No newline at end of file diff --git a/app/models/domain/genomics/TestTypeTargetRegion.scala b/app/models/domain/genomics/TestTypeTargetRegion.scala deleted file mode 100644 index 6e6833db..00000000 --- a/app/models/domain/genomics/TestTypeTargetRegion.scala +++ /dev/null @@ -1,60 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{Json, OFormat} - -case class TestTypeTargetRegion( - id: Option[Int] = None, - testTypeId: Int, - contigName: String, - startPosition: Option[Int] = None, - endPosition: Option[Int] = None, - regionName: String, - regionType: String, - expectedCoveragePct: Option[Double] = None, - expectedMinDepth: Option[Double] = None - ) { - def regionSize: Option[Int] = for { - s <- startPosition - e <- endPosition - } yield e - s + 1 -} - -object TestTypeTargetRegion { - implicit val format: OFormat[TestTypeTargetRegion] = Json.format[TestTypeTargetRegion] -} - -case class RegionCoverageResult( - regionName: String, - contigName: String, - startPosition: Option[Int], - endPosition: Option[Int], - expectedCoveragePct: Option[Double], - expectedMinDepth: Option[Double], - actualMeanDepth: Option[Double], - actualCoveragePct: Option[Double], - meetsExpectation: Boolean - ) - -object RegionCoverageResult { - implicit val format: OFormat[RegionCoverageResult] = Json.format[RegionCoverageResult] -} - -case class TargetedCoverageAssessment( - testTypeCode: String, - testTypeDisplayName: String, - targetRegions: Seq[RegionCoverageResult], - overallCoveragePct: Double, - overallMeetsExpectation: Boolean, - qualityTier: String - ) - -object TargetedCoverageAssessment { - implicit val format: OFormat[TargetedCoverageAssessment] = Json.format[TargetedCoverageAssessment] - - def qualityTierFromCoverage(coveragePct: Double): String = { - if (coveragePct >= 0.95) "HIGH" - else if (coveragePct >= 0.80) "MEDIUM" - else if (coveragePct >= 0.50) "LOW" - else "INSUFFICIENT" - } -} diff --git a/app/models/domain/genomics/ValidationService.scala b/app/models/domain/genomics/ValidationService.scala deleted file mode 100644 index ad07b407..00000000 --- a/app/models/domain/genomics/ValidationService.scala +++ /dev/null @@ -1,11 +0,0 @@ -package models.domain.genomics - -import java.util.UUID - -case class ValidationService( - id: Option[Long], - guid: UUID, - name: String, - description: Option[String], - trustLevel: Option[String] - ) \ No newline at end of file diff --git a/app/models/domain/genomics/VariantV2.scala b/app/models/domain/genomics/VariantV2.scala deleted file mode 100644 index b84d5c33..00000000 --- a/app/models/domain/genomics/VariantV2.scala +++ /dev/null @@ -1,260 +0,0 @@ -package models.domain.genomics - -import play.api.libs.json.{__, JsValue, Json, OFormat, Format, Reads, Writes} - -import java.time.Instant - -/** - * Consolidated variant with JSONB coordinates and aliases. - * One row per logical variant across all reference genomes. - * - * @param variantId Unique identifier (auto-generated) - * @param canonicalName Primary name (e.g., "M269", "DYS456"); None for unnamed variants - * @param mutationType Variant type (SNP, INDEL, MNP, STR, DEL, DUP, INS, INV, CNV, TRANS) - * @param namingStatus Naming status (Unnamed, PendingReview, Named) - * @param aliases JSONB: {common_names: [], rs_ids: [], sources: {source: [names]}} - * @param coordinates JSONB: Per-assembly coordinates (structure varies by mutationType) - * @param definingHaplogroupId FK to haplogroup for parallel mutation disambiguation - * @param evidence JSONB: Evidence metadata (e.g., YSEQ test counts) - * @param primers JSONB: PCR primer information - * @param notes Free-text notes - * @param createdAt Creation timestamp - * @param updatedAt Last update timestamp - */ -case class VariantV2( - variantId: Option[Int] = None, - canonicalName: Option[String], - mutationType: MutationType, - namingStatus: NamingStatus = NamingStatus.Unnamed, - aliases: JsValue = Json.obj(), - coordinates: JsValue = Json.obj(), - definingHaplogroupId: Option[Int] = None, - evidence: JsValue = Json.obj(), - primers: JsValue = Json.obj(), - notes: Option[String] = None, - annotations: JsValue = Json.obj(), - createdAt: Instant = Instant.now(), - updatedAt: Instant = Instant.now() -) { - - /** - * Get coordinate entry for a specific reference genome. - */ - def getCoordinates(refGenome: String): Option[JsValue] = - (coordinates \ refGenome).toOption - - /** - * Check if variant has coordinates for a given reference. - */ - def hasCoordinates(refGenome: String): Boolean = - (coordinates \ refGenome).isDefined - - /** - * Get all reference genomes that have coordinates. - */ - def availableReferences: Set[String] = - coordinates.asOpt[Map[String, JsValue]].map(_.keySet).getOrElse(Set.empty) - - /** - * Get common names from aliases. - */ - def commonNames: Seq[String] = - (aliases \ "common_names").asOpt[Seq[String]].getOrElse(Seq.empty) - - /** - * Get rs IDs from aliases. - */ - def rsIds: Seq[String] = - (aliases \ "rs_ids").asOpt[Seq[String]].getOrElse(Seq.empty) - - /** - * Check if this is an STR marker. - */ - def isStr: Boolean = mutationType == MutationType.STR - - /** - * Check if this is a structural variant. - */ - def isStructuralVariant: Boolean = mutationType.isStructural - - /** - * Display name for UI (canonical name or coordinate-based fallback). - */ - def displayName: String = canonicalName.getOrElse { - // For unnamed variants, show coordinate-based identifier - getCoordinates("hs1").orElse(getCoordinates("GRCh38")).map { coords => - val contig = (coords \ "contig").asOpt[String].getOrElse("?") - val position = (coords \ "position").asOpt[Int].orElse((coords \ "start").asOpt[Int]).getOrElse(0) - val ref = (coords \ "ref").asOpt[String].getOrElse("") - val alt = (coords \ "alt").asOpt[String].getOrElse("") - if (ref.nonEmpty && alt.nonEmpty) s"$contig:$position:$ref>$alt" - else s"$contig:$position" - }.getOrElse(s"variant_${variantId.getOrElse(0)}") - } -} - -/** - * Helper case class for SNP/INDEL/MNP coordinates. - */ -case class PointVariantCoordinates( - contig: String, - position: Int, - ref: String, - alt: String -) - -object PointVariantCoordinates { - implicit val format: OFormat[PointVariantCoordinates] = Json.format[PointVariantCoordinates] -} - -/** - * Helper case class for STR coordinates. - */ -case class StrCoordinates( - contig: String, - start: Long, - end: Long, - period: Int, - repeatMotif: Option[String] = None, - referenceRepeats: Option[Int] = None -) - -object StrCoordinates { - implicit val format: OFormat[StrCoordinates] = Json.format[StrCoordinates] -} - -/** - * Helper case class for structural variant coordinates. - */ -case class SvCoordinates( - contig: String, - start: Long, - end: Long, - length: Long, - innerStart: Option[Long] = None, // For inversions - innerEnd: Option[Long] = None, // For inversions - referenceCopies: Option[Int] = None, // For CNVs - copyNumberRange: Option[Seq[Int]] = None // For CNVs -) - -object SvCoordinates { - implicit val format: OFormat[SvCoordinates] = Json.format[SvCoordinates] -} - -/** - * Helper case class for aliases structure. - */ -case class VariantAliases( - commonNames: Seq[String] = Seq.empty, - rsIds: Seq[String] = Seq.empty, - sources: Map[String, Seq[String]] = Map.empty -) - -object VariantAliases { - implicit val format: OFormat[VariantAliases] = Json.format[VariantAliases] - - val empty: VariantAliases = VariantAliases() - - /** - * Create from a single source with names. - */ - def fromSource(source: String, names: Seq[String], rsIds: Seq[String] = Seq.empty): VariantAliases = - VariantAliases( - commonNames = names, - rsIds = rsIds, - sources = Map(source -> names) - ) -} - -object VariantV2 { - // Custom format that handles enum serialization via dbValue strings - implicit val format: Format[VariantV2] = { - import play.api.libs.functional.syntax.* - - val reads: Reads[VariantV2] = ( - (__ \ "variantId").readNullable[Int] and - (__ \ "canonicalName").readNullable[String] and - (__ \ "mutationType").read[String].map(MutationType.fromStringOrDefault(_)) and - (__ \ "namingStatus").read[String].map(NamingStatus.fromStringOrDefault(_)) and - (__ \ "aliases").read[JsValue] and - (__ \ "coordinates").read[JsValue] and - (__ \ "definingHaplogroupId").readNullable[Int] and - (__ \ "evidence").read[JsValue] and - (__ \ "primers").read[JsValue] and - (__ \ "notes").readNullable[String] and - (__ \ "annotations").read[JsValue] and - (__ \ "createdAt").read[Instant] and - (__ \ "updatedAt").read[Instant] - )(VariantV2.apply) - - val writes: Writes[VariantV2] = ( - (__ \ "variantId").writeNullable[Int] and - (__ \ "canonicalName").writeNullable[String] and - (__ \ "mutationType").write[String].contramap[MutationType](_.dbValue) and - (__ \ "namingStatus").write[String].contramap[NamingStatus](_.dbValue) and - (__ \ "aliases").write[JsValue] and - (__ \ "coordinates").write[JsValue] and - (__ \ "definingHaplogroupId").writeNullable[Int] and - (__ \ "evidence").write[JsValue] and - (__ \ "primers").write[JsValue] and - (__ \ "notes").writeNullable[String] and - (__ \ "annotations").write[JsValue] and - (__ \ "createdAt").write[Instant] and - (__ \ "updatedAt").write[Instant] - )(v => (v.variantId, v.canonicalName, v.mutationType, v.namingStatus, v.aliases, - v.coordinates, v.definingHaplogroupId, v.evidence, v.primers, v.notes, - v.annotations, v.createdAt, v.updatedAt)) - - Format(reads, writes) - } - - /** - * Create a named SNP variant with coordinates for a single reference. - */ - def snp( - name: String, - refGenome: String, - contig: String, - position: Int, - ref: String, - alt: String, - source: Option[String] = None - ): VariantV2 = { - val coords = Json.obj( - refGenome -> Json.toJson(PointVariantCoordinates(contig, position, ref, alt)) - ) - val aliases = source.map { s => - Json.toJson(VariantAliases.fromSource(s, Seq(name))) - }.getOrElse(Json.toJson(VariantAliases(commonNames = Seq(name)))) - - VariantV2( - canonicalName = Some(name), - mutationType = MutationType.SNP, - namingStatus = NamingStatus.Named, - aliases = aliases, - coordinates = coords - ) - } - - /** - * Create an unnamed variant from coordinates. - */ - def unnamed( - refGenome: String, - contig: String, - position: Int, - ref: String, - alt: String, - variantType: MutationType = MutationType.SNP - ): VariantV2 = { - val coords = Json.obj( - refGenome -> Json.toJson(PointVariantCoordinates(contig, position, ref, alt)) - ) - VariantV2( - canonicalName = None, - mutationType = variantType, - namingStatus = NamingStatus.Unnamed, - coordinates = coords - ) - } -} diff --git a/app/models/domain/haplogroups/GenealogicalAnchor.scala b/app/models/domain/haplogroups/GenealogicalAnchor.scala deleted file mode 100644 index 32bf193b..00000000 --- a/app/models/domain/haplogroups/GenealogicalAnchor.scala +++ /dev/null @@ -1,51 +0,0 @@ -package models.domain.haplogroups - -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDateTime - -case class GenealogicalAnchor( - id: Option[Int] = None, - haplogroupId: Int, - anchorType: AnchorType, - dateCe: Int, - dateUncertaintyYears: Option[Int], - confidence: Option[BigDecimal], - description: Option[String], - source: Option[String], - carbonDateBp: Option[Int], - carbonDateSigma: Option[Int], - createdBy: Option[String], - createdAt: LocalDateTime = LocalDateTime.now() -) { - - /** Convert calendar year to YBP (years before 1950). */ - def toYbp: Int = 1950 - dateCe - - /** Get the age constraint as an AgeEstimate. */ - def toAgeEstimate: AgeEstimate = { - val ybp = toYbp - val lower = dateUncertaintyYears.map(u => ybp - u) - val upper = dateUncertaintyYears.map(u => ybp + u) - AgeEstimate(ybp, lower, upper) - } -} - -sealed trait AnchorType { - def dbValue: String -} - -object AnchorType { - case object KnownMrca extends AnchorType { val dbValue = "KNOWN_MRCA" } - case object Mdka extends AnchorType { val dbValue = "MDKA" } - case object AncientDna extends AnchorType { val dbValue = "ANCIENT_DNA" } - - def fromString(s: String): AnchorType = s match { - case "KNOWN_MRCA" => KnownMrca - case "MDKA" => Mdka - case "ANCIENT_DNA" => AncientDna - case other => throw new IllegalArgumentException(s"Unknown anchor type: $other") - } - - val values: Seq[AnchorType] = Seq(KnownMrca, Mdka, AncientDna) -} diff --git a/app/models/domain/haplogroups/Haplogroup.scala b/app/models/domain/haplogroups/Haplogroup.scala deleted file mode 100644 index d177ffee..00000000 --- a/app/models/domain/haplogroups/Haplogroup.scala +++ /dev/null @@ -1,91 +0,0 @@ -package models.domain.haplogroups - -import models.HaplogroupType - -import java.time.LocalDateTime - -/** - * Represents a haplogroup, which is a genetic population group of people who share a common ancestor - * on the paternal or maternal line. This case class captures details about a haplogroup, including its - * type, name, lineage, and other metadata. - * - * @param id An optional unique identifier for the haplogroup. Typically used for internal purposes. - * @param name The name of the haplogroup. This is required and serves as the primary identifier in a lineage context. - * @param lineage An optional description of the lineage to which the haplogroup belongs. - * @param description An optional textual description of the haplogroup providing additional context or details. - * @param haplogroupType The type of haplogroup (e.g., Y-DNA or mtDNA). This is represented as an enum. - * @param revisionId An integer that indicates the revision or version of the haplogroup data. - * @param source The source or origin of the haplogroup information for traceability purposes. - * @param confidenceLevel A textual representation of the confidence level associated with assigning this haplogroup. - * @param validFrom The timestamp indicating when this haplogroup record became valid or effective. - * @param validUntil An optional timestamp indicating when this haplogroup record is no longer valid. - */ -/** - * Represents age estimate for a haplogroup branch (formed date or TMRCA). - * Values are in years before present (YBP) with optional 95% confidence interval. - * - * @param ybp Point estimate in years before present - * @param ybpLower Lower bound of 95% confidence interval - * @param ybpUpper Upper bound of 95% confidence interval - */ -case class AgeEstimate( - ybp: Int, - ybpLower: Option[Int] = None, - ybpUpper: Option[Int] = None - ) { - /** - * Convert YBP to calendar year (AD/BC). - * Assumes present = 1950 CE (radiocarbon dating convention). - */ - def toCalendarYear: Int = 1950 - ybp - - /** - * Format as human-readable string (e.g., "2500 BC" or "500 AD"). - */ - def formatted: String = { - val year = toCalendarYear - if (year < 0) s"${-year} BC" else s"$year AD" - } - - /** - * Format with confidence interval if available. - */ - def formattedWithRange: String = { - (ybpLower, ybpUpper) match { - case (Some(lower), Some(upper)) => - val lowerYear = 1950 - upper // Note: higher YBP = older = lower calendar year - val upperYear = 1950 - lower - val lowerStr = if (lowerYear < 0) s"${-lowerYear} BC" else s"$lowerYear AD" - val upperStr = if (upperYear < 0) s"${-upperYear} BC" else s"$upperYear AD" - s"$formatted ($lowerStr – $upperStr)" - case _ => formatted - } - } -} - -case class Haplogroup( - id: Option[Int] = None, - name: String, - lineage: Option[String], - description: Option[String], - haplogroupType: HaplogroupType, - revisionId: Int, - source: String, - confidenceLevel: String, - validFrom: LocalDateTime, - validUntil: Option[LocalDateTime], - formedYbp: Option[Int] = None, - formedYbpLower: Option[Int] = None, - formedYbpUpper: Option[Int] = None, - tmrcaYbp: Option[Int] = None, - tmrcaYbpLower: Option[Int] = None, - tmrcaYbpUpper: Option[Int] = None, - ageEstimateSource: Option[String] = None, - provenance: Option[HaplogroupProvenance] = None - ) { - /** Get formed date as AgeEstimate if available */ - def formedEstimate: Option[AgeEstimate] = formedYbp.map(y => AgeEstimate(y, formedYbpLower, formedYbpUpper)) - - /** Get TMRCA as AgeEstimate if available */ - def tmrcaEstimate: Option[AgeEstimate] = tmrcaYbp.map(y => AgeEstimate(y, tmrcaYbpLower, tmrcaYbpUpper)) -} \ No newline at end of file diff --git a/app/models/domain/haplogroups/HaplogroupAncestralStr.scala b/app/models/domain/haplogroups/HaplogroupAncestralStr.scala deleted file mode 100644 index bd1c675e..00000000 --- a/app/models/domain/haplogroups/HaplogroupAncestralStr.scala +++ /dev/null @@ -1,33 +0,0 @@ -package models.domain.haplogroups - -import java.time.LocalDateTime - -case class HaplogroupAncestralStr( - id: Option[Int] = None, - haplogroupId: Int, - markerName: String, - ancestralValue: Option[Int], - ancestralValueAlt: Option[List[Int]], - confidence: Option[BigDecimal], - supportingSamples: Option[Int], - variance: Option[BigDecimal], - computedAt: LocalDateTime = LocalDateTime.now(), - method: MotifMethod = MotifMethod.Modal -) - -sealed trait MotifMethod { - def dbValue: String -} - -object MotifMethod { - case object Modal extends MotifMethod { val dbValue = "MODAL" } - case object Phylogenetic extends MotifMethod { val dbValue = "PHYLOGENETIC" } - case object Manual extends MotifMethod { val dbValue = "MANUAL" } - - def fromString(s: String): MotifMethod = s match { - case "MODAL" => Modal - case "PHYLOGENETIC" => Phylogenetic - case "MANUAL" => Manual - case other => throw new IllegalArgumentException(s"Unknown motif method: $other") - } -} diff --git a/app/models/domain/haplogroups/HaplogroupProvenance.scala b/app/models/domain/haplogroups/HaplogroupProvenance.scala deleted file mode 100644 index cb98a143..00000000 --- a/app/models/domain/haplogroups/HaplogroupProvenance.scala +++ /dev/null @@ -1,102 +0,0 @@ -package models.domain.haplogroups - -import play.api.libs.json.{Json, OFormat, Format, Reads, Writes} - -import java.time.LocalDateTime - -/** - * Tracks the provenance of a haplogroup node and its variants from multiple sources. - * - * Credit assignment follows a tiered model: - * - ISOGG credit is preserved on existing nodes (authoritative backbone) - * - Incoming sources get credit for new splits and terminal branches they contribute - * - * @param primaryCredit Source with primary discovery credit for this node - * @param nodeProvenance All sources that have contributed to this node's existence - * @param variantProvenance Per-variant source attribution (variant name -> set of sources) - * @param lastMergedAt Timestamp of the most recent merge operation affecting this node - * @param lastMergedFrom Source of the most recent merge operation - */ -case class HaplogroupProvenance( - primaryCredit: String, - nodeProvenance: Set[String] = Set.empty, - variantProvenance: Map[String, Set[String]] = Map.empty, - lastMergedAt: Option[LocalDateTime] = None, - lastMergedFrom: Option[String] = None -) { - - /** - * Add a source to nodeProvenance. - */ - def addNodeSource(source: String): HaplogroupProvenance = - copy(nodeProvenance = nodeProvenance + source) - - /** - * Add a source attribution for a specific variant. - */ - def addVariantSource(variantName: String, source: String): HaplogroupProvenance = - copy(variantProvenance = variantProvenance.updatedWith(variantName) { - case Some(sources) => Some(sources + source) - case None => Some(Set(source)) - }) - - /** - * Merge another provenance record into this one, combining all sources. - */ - def merge(other: HaplogroupProvenance): HaplogroupProvenance = { - val mergedVariants = (variantProvenance.keySet ++ other.variantProvenance.keySet).map { key => - key -> (variantProvenance.getOrElse(key, Set.empty) ++ other.variantProvenance.getOrElse(key, Set.empty)) - }.toMap - - HaplogroupProvenance( - primaryCredit = this.primaryCredit, // Preserve existing primary credit - nodeProvenance = nodeProvenance ++ other.nodeProvenance, - variantProvenance = mergedVariants, - lastMergedAt = Seq(lastMergedAt, other.lastMergedAt).flatten.maxOption, - lastMergedFrom = other.lastMergedFrom.orElse(lastMergedFrom) - ) - } - - /** - * Update merge timestamp and source. - */ - def withMergeInfo(source: String, timestamp: LocalDateTime): HaplogroupProvenance = - copy(lastMergedAt = Some(timestamp), lastMergedFrom = Some(source)) -} - -object HaplogroupProvenance { - // Custom JSON format to handle Set[String] and Map[String, Set[String]] - implicit val setStringFormat: Format[Set[String]] = Format( - Reads.seq[String].map(_.toSet), - Writes.seq[String].contramap(_.toSeq) - ) - - implicit val mapStringSetFormat: Format[Map[String, Set[String]]] = Format( - Reads.map[Set[String]], - Writes.map[Set[String]] - ) - - implicit val format: OFormat[HaplogroupProvenance] = Json.format[HaplogroupProvenance] - - val empty: HaplogroupProvenance = HaplogroupProvenance(primaryCredit = "") - - /** - * Create initial provenance for a new node from a source. - */ - def forNewNode(source: String, variants: Seq[String] = Seq.empty): HaplogroupProvenance = { - val variantProv = variants.map(v => v -> Set(source)).toMap - HaplogroupProvenance( - primaryCredit = source, - nodeProvenance = Set(source), - variantProvenance = variantProv, - lastMergedAt = Some(LocalDateTime.now()), - lastMergedFrom = Some(source) - ) - } - - /** - * Determine if ISOGG credit should be preserved (returns true if existing credit is ISOGG). - */ - def shouldPreserveCredit(existingCredit: String): Boolean = - existingCredit.equalsIgnoreCase("ISOGG") -} diff --git a/app/models/domain/haplogroups/HaplogroupRelationship.scala b/app/models/domain/haplogroups/HaplogroupRelationship.scala deleted file mode 100644 index 68263c5b..00000000 --- a/app/models/domain/haplogroups/HaplogroupRelationship.scala +++ /dev/null @@ -1,25 +0,0 @@ -package models.domain.haplogroups - -import java.time.LocalDateTime - -/** - * Represents a relationship between a child haplogroup and its parent haplogroup. - * This case class maintains the hierarchy and lineage history of haplogroups over time. - * - * @param id An optional unique identifier for this relationship record. - * @param childHaplogroupId The unique identifier of the child haplogroup in the relationship. - * @param parentHaplogroupId The unique identifier of the parent haplogroup in the relationship. - * @param revisionId An integer indicating the revision associated with this relationship record. - * @param validFrom The starting timestamp from which this relationship is considered valid. - * @param validUntil An optional timestamp indicating the expiration or invalidation of this relationship. - * @param source The source or origin of the information defining this relationship, for traceability. - */ -case class HaplogroupRelationship( - id: Option[Int] = None, - childHaplogroupId: Int, - parentHaplogroupId: Int, - revisionId: Int, - validFrom: LocalDateTime, - validUntil: Option[LocalDateTime], - source: String - ) \ No newline at end of file diff --git a/app/models/domain/haplogroups/HaplogroupVariant.scala b/app/models/domain/haplogroups/HaplogroupVariant.scala deleted file mode 100644 index 7a474a00..00000000 --- a/app/models/domain/haplogroups/HaplogroupVariant.scala +++ /dev/null @@ -1,10 +0,0 @@ -package models.domain.haplogroups - -/** - * Represents the association between a haplogroup and a genetic variant. - * - * @param id An optional unique identifier for the HaplogroupVariant record. - * @param haplogroupId The identifier of the haplogroup associated with this variant. - * @param variantId The identifier of the genetic variant associated with this haplogroup. - */ -case class HaplogroupVariant(id: Option[Int] = None, haplogroupId: Int, variantId: Int) diff --git a/app/models/domain/haplogroups/HaplogroupVariantMetadata.scala b/app/models/domain/haplogroups/HaplogroupVariantMetadata.scala deleted file mode 100644 index 22e98a12..00000000 --- a/app/models/domain/haplogroups/HaplogroupVariantMetadata.scala +++ /dev/null @@ -1,24 +0,0 @@ -package models.domain.haplogroups - -import java.time.LocalDateTime - -/** - * Represents metadata for changes or revisions associated with haplogroup variants. - * - * @param haplogroup_variant_id The unique identifier for the haplogroup variant associated with this metadata entry. - * @param revision_id An integer identifier for the specific revision of the haplogroup variant. - * @param author The name or identifier of the person or entity that authored this revision. - * @param timestamp The timestamp indicating when this revision was made. - * @param comment A textual comment or description associated with the revision, providing context or notes. - * @param change_type A description of the type of change represented by this revision (e.g., 'update', 'create', 'delete'). - * @param previous_revision_id An optional identifier for the previous revision in the sequence, if applicable. - */ -case class HaplogroupVariantMetadata( - haplogroup_variant_id: Int, - revision_id: Int, - author: String, - timestamp: LocalDateTime, - comment: String, - change_type: String, - previous_revision_id: Option[Int] - ) diff --git a/app/models/domain/haplogroups/RelationshipRevisionMetadata.scala b/app/models/domain/haplogroups/RelationshipRevisionMetadata.scala deleted file mode 100644 index f34b62e4..00000000 --- a/app/models/domain/haplogroups/RelationshipRevisionMetadata.scala +++ /dev/null @@ -1,25 +0,0 @@ -package models.domain.haplogroups - -import java.time.LocalDateTime - -/** - * Represents metadata related to a revision of a haplogroup relationship. This metadata captures - * important details about the changes made, the author of the revision, and other associated information. - * - * @param haplogroup_relationship_id The unique identifier of the haplogroup relationship being revised. - * @param revisionId A unique integer identifier representing this specific revision. - * @param author The name or identifier of the individual or entity that authored the revision. - * @param timestamp The timestamp indicating when the revision was made. - * @param comment A textual comment provided by the author to describe or explain the revision. - * @param changeType A string describing the type of change made (e.g., "update", "delete", "create"). - * @param previousRevisionId An optional integer identifier referencing the immediately preceding revision, if any. - */ -case class RelationshipRevisionMetadata( - haplogroup_relationship_id: Int, - revisionId: Int, - author: String, - timestamp: LocalDateTime, - comment: String, - changeType: String, - previousRevisionId: Option[Int] - ) \ No newline at end of file diff --git a/app/models/domain/haplogroups/TreeMergeStructures.scala b/app/models/domain/haplogroups/TreeMergeStructures.scala deleted file mode 100644 index 9b505125..00000000 --- a/app/models/domain/haplogroups/TreeMergeStructures.scala +++ /dev/null @@ -1,551 +0,0 @@ -package models.domain.haplogroups - -import models.HaplogroupType -import models.api.haplogroups.{ConflictStrategy, MergeConflict, MergeStatistics, PlacementAmbiguity, SourcePriorityConfig, SplitOperation} - -import java.time.LocalDateTime - -// ============================================================================ -// Tree Merge Data Structures -// ============================================================================ -// -// These structures support the "Identify-Match-Graft" tree merge algorithm -// used for merging external haplogroup trees into the DecodingUs baseline. -// ============================================================================ - -// ============================================================================ -// Formal Algorithm: Merge Case Classification -// ============================================================================ - -/** - * Explicit classification of merge cases based on Set Inclusion relationships. - * - * This implements the "Three-Way Logic" from formal tree grafting algorithms, - * extended to handle the disjoint case common in phylogenetic merges. - * - * Given: - * - S₁ = source node's variant set (from incoming tree T₁) - * - S₀ = existing node's variant set (from baseline tree T₀) - * - * We classify the relationship to determine the correct merge action. - */ -sealed trait MergeCase { - def description: String -} - -object MergeCase { - /** - * Case A: Full Match (S₁ = S₀) - * - * The source and existing nodes are equivalent - they define the same - * phylogenetic branch point. - * - * Action: Merge metadata (age estimates, provenance); continue to children. - */ - case class FullMatch( - sourceVariants: Set[String], - existingVariants: Set[String] - ) extends MergeCase { - override def description: String = "FULL_MATCH" - } - - /** - * Case B: Source Is Ancestor (S₁ ⊂ S₀) - * - * The source node represents an ANCESTOR of the existing node. - * This indicates T₁ has finer resolution - it's splitting what T₀ - * considered a single branch into multiple segments. - * - * Action: Node Contraction - insert source as intermediate ancestor, - * reparent existing node under source. This is "injecting a median node." - * - * Example: T₀ has A→C, T₁ has A→B→C - * When processing B: S₁(B) ⊂ S₀(C), so B is ancestor of C - * Result: Create B, reparent C under B - */ - case class SourceIsAncestor( - sourceVariants: Set[String], - existingVariants: Set[String], - existingNode: ExistingTreeNode - ) extends MergeCase { - override def description: String = "SOURCE_IS_ANCESTOR" - } - - /** - * Case C: Source Is Descendant (S₁ ⊃ S₀) - * - * The source node represents a DESCENDANT of the existing node. - * T₁ is providing additional downstream resolution. - * - * Action: Insert source as a new child of existing node. - */ - case class SourceIsDescendant( - sourceVariants: Set[String], - existingVariants: Set[String] - ) extends MergeCase { - override def description: String = "SOURCE_IS_DESCENDANT" - } - - /** - * Case D: Disjoint Branch (S₁ ∩ S₀ = ∅ or partial overlap without subset) - * - * The source and existing nodes represent different lineages that - * diverged from a common ancestor. - * - * Action: Create new branch (bifurcation) at the last common ancestor. - * This may involve finding the LCA by walking up cumulative variants. - */ - case class DisjointBranch( - sourceVariants: Set[String], - sharedVariants: Set[String] - ) extends MergeCase { - override def description: String = "DISJOINT_BRANCH" - } - - /** - * Special case: No existing node at this position - * - * Action: Create new node. May trigger grafting if descendants exist. - */ - case class NoExistingMatch( - sourceVariants: Set[String] - ) extends MergeCase { - override def description: String = "NO_EXISTING_MATCH" - } - - /** - * Classify the relationship between source and existing variant sets. - * - * @param sourceNodeVariants U(T₁) - variants defined at the source node - * @param existingNodeOpt The matched existing node (if any) - * @return The classified MergeCase - */ - def classify( - sourceNodeVariants: Set[String], - existingNodeOpt: Option[ExistingTreeNode] - ): MergeCase = { - existingNodeOpt match { - case None => - NoExistingMatch(sourceNodeVariants) - - case Some(existing) => - val existingNodeVariants = existing.nodeVariants - val intersection = sourceNodeVariants.intersect(existingNodeVariants) - - if (sourceNodeVariants == existingNodeVariants) { - // S₁ = S₀: Exact match - FullMatch(sourceNodeVariants, existingNodeVariants) - } else if (sourceNodeVariants.subsetOf(existingNodeVariants)) { - // S₁ ⊂ S₀: Source is ancestor (has fewer, more basal variants) - SourceIsAncestor(sourceNodeVariants, existingNodeVariants, existing) - } else if (existingNodeVariants.subsetOf(sourceNodeVariants)) { - // S₁ ⊃ S₀: Source is descendant (has more, derived variants) - SourceIsDescendant(sourceNodeVariants, existingNodeVariants) - } else { - // Disjoint or partial overlap - DisjointBranch(sourceNodeVariants, intersection) - } - } - } -} - -/** - * Index of existing haplogroups for efficient lookup. - */ -case class VariantIndex( - variantToHaplogroup: Map[String, Seq[Haplogroup]], - haplogroupByName: Map[String, Haplogroup] -) - -/** - * Context for merge operations. - */ -case class MergeContext( - haplogroupType: HaplogroupType, - sourceName: String, - priorityConfig: SourcePriorityConfig, - conflictStrategy: ConflictStrategy, - timestamp: LocalDateTime, - changeSetId: Option[Int] = None, // For change tracking integration - stagingMode: Boolean = true // When true, only record changes, don't apply to production -) - -/** - * Accumulator for merge statistics and results. - * - * Tracks all outcomes during the merge including ambiguous placements - * that require human curator review. - */ -case class MergeAccumulator( - statistics: MergeStatistics, - conflicts: List[MergeConflict], - splits: List[SplitOperation], - ambiguities: List[PlacementAmbiguity], - errors: List[String] -) - -object MergeAccumulator { - val empty: MergeAccumulator = MergeAccumulator( - statistics = MergeStatistics.empty, - conflicts = List.empty, - splits = List.empty, - ambiguities = List.empty, - errors = List.empty - ) -} - -/** - * Pre-loaded variant lookup cache. - * Maps variant name (uppercase) -> variant ID. - * Built once at the start of merge to avoid N+1 queries. - */ -case class VariantCache( - nameToVariantId: Map[String, Int] -) - -/** - * In-memory representation of the existing tree T₀ with O(1) lookup indexes. - * - * == Formal Role: Baseline Tree (T₀) == - * - * This structure represents the existing/baseline phylogenetic tree that - * the incoming tree T₁ will be merged into. It provides: - * - * - byName: Index for conflict detection (O(1) name lookup) - * - byVariant: Index for Set Intersection matching (O(1) variant lookup) - * - * Matching is based purely on mutation signatures (Set Inclusion Property), - * not names, to handle different naming conventions across sources. - * - * == Set Inclusion Property == - * - * If C(N₁) ⊂ C(N₂), then N₁ is an ancestor of N₂ in the phylogenetic tree. - * This is the fundamental property that enables correct tree grafting. - */ -case class ExistingTree( - root: ExistingTreeNode, - byName: Map[String, ExistingTreeNode], // name (uppercase) -> node (for conflict detection only) - byVariant: Map[String, Seq[ExistingTreeNode]], // variant name (uppercase) -> nodes with that variant in cumulative set - byId: Map[Int, ExistingTreeNode] = Map.empty // haplogroup ID -> node (for parent lookup) -) { - /** Find node by name - O(1) - used only for conflict detection */ - def findByName(name: String): Option[ExistingTreeNode] = - byName.get(name.toUpperCase) - - /** Find node by haplogroup ID - O(1) */ - def findById(id: Int): Option[ExistingTreeNode] = - byId.get(id) - - /** - * Find best matching node for source cumulative variants. - * - * == Formal Role: BFS Alignment Step == - * - * This implements the SNP set intersection search from the Identify-Match-Graft - * algorithm. Given C(source), find the node in T₀ with maximum overlap. - * - * @param sourceCumulativeVariants C(T₁) - cumulative variants of source node - * @return The node in T₀ with highest variant overlap, or None - */ - def findMatchByVariants(sourceCumulativeVariants: Set[String]): Option[ExistingTreeNode] = { - if (sourceCumulativeVariants.isEmpty) return None - - // Find all nodes that share cumulative variants, count overlaps - val candidates = sourceCumulativeVariants - .flatMap(v => byVariant.getOrElse(v, Seq.empty)) - .groupBy(identity) - .view.mapValues(_.size) - .toSeq - .sortBy(-_._2) - - // Return node with highest overlap - candidates.headOption.filter(_._2 >= 1).map(_._1) - } - - /** - * Find a phylogenetically compatible node for grafting. - * - * == Formal Role: Node Contraction / Grafting Validation == - * - * This method validates the Set Inclusion Property before grafting: - * C(parent) ⊆ C(candidate) - the candidate must be downstream in our lineage - * - * This prevents the critical error of grafting nodes from unrelated lineages - * (e.g., grafting A-branch nodes under B-branch) which can occur when - * recurrent SNPs (back-mutations) create false positive matches. - * - * == Recurrent SNP Guard == - * - * Recurrent SNPs are the same mutation occurring independently in different - * lineages. Without phylogenetic validation, these could cause: - * - A1b1-M9431 being grafted under B2b1a2 (wrong!) - * - * The Set Inclusion check ensures we only graft nodes that share our - * ancestral path, not just a single variant. - * - * @param parentCumulativeVariants C(parent) - the ancestral lineage signature - * @param childNodeVariants U(child) - variants that define this specific branch - * @return The best matching node that is phylogenetically compatible, or None - */ - def findPhylogeneticMatch( - parentCumulativeVariants: Set[String], - childNodeVariants: Set[String] - ): Option[ExistingTreeNode] = { - if (childNodeVariants.isEmpty) return None - - // Step 1: Find candidate nodes that have at least one of the child's defining variants - val candidatesWithVariant = childNodeVariants - .flatMap(v => byVariant.getOrElse(v, Seq.empty)) - .toSeq.distinct - - // Step 2: Apply Set Inclusion Property - filter to same lineage only - // Their cumulative variants must CONTAIN our parent's cumulative variants - // This is the key recurrent SNP guard - val phylogeneticallyCompatible = candidatesWithVariant.filter { node => - parentCumulativeVariants.subsetOf(node.cumulativeVariants) - } - - if (phylogeneticallyCompatible.isEmpty) return None - - // Step 3: Score by overlap with child's node-specific variants - val scored = phylogeneticallyCompatible.map { node => - val overlap = node.nodeVariants.intersect(childNodeVariants).size - (node, overlap) - }.filter(_._2 > 0).sortBy(-_._2) - - scored.headOption.map(_._1) - } - - /** - * Find ALL phylogenetically compatible nodes for star cluster sibling sweeping. - * - * == Formal Role: Star Cluster Resolution == - * - * In phylogenetics, a "star cluster" is a polytomy where multiple lineages - * diverge simultaneously from a single ancestor. When T₁ provides finer - * resolution (grouping some of these siblings under a new intermediate node), - * we need to identify ALL siblings that should be reparented together. - * - * This method returns all nodes that: - * 1. Are phylogenetically compatible (C(parent) ⊆ C(node)) - * 2. Share at least one variant with the child signature - * - * == Example == - * - * T₀ has star cluster: A → {B1, B2, B3, C1, C2} - * T₁ groups B-variants: A → B → {B1, B2, B3} - * - * When processing B's children, this method returns [B1, B2, B3] so all - * can be reparented under B simultaneously. - * - * @param parentCumulativeVariants C(parent) - the ancestral lineage signature - * @param childNodeVariants U(child) - variants that define this branch - * @return All matching nodes that should be reparented together - */ - def findAllPhylogeneticMatches( - parentCumulativeVariants: Set[String], - childNodeVariants: Set[String] - ): Seq[ExistingTreeNode] = { - if (childNodeVariants.isEmpty) return Seq.empty - - // Step 1: Find candidate nodes that have at least one of the child's defining variants - val candidatesWithVariant = childNodeVariants - .flatMap(v => byVariant.getOrElse(v, Seq.empty)) - .toSeq.distinct - - // Step 2: Apply Set Inclusion Property - filter to same lineage only - val phylogeneticallyCompatible = candidatesWithVariant.filter { node => - parentCumulativeVariants.subsetOf(node.cumulativeVariants) - } - - // Step 3: Return all nodes with overlapping node-specific variants - phylogeneticallyCompatible.filter { node => - node.nodeVariants.intersect(childNodeVariants).nonEmpty - } - } - - /** - * Find a matching node by variant overlap with RELAXED phylogenetic check. - * - * == Formal Role: Global Variant Match for Cross-Tree Merging == - * - * This method is used when merging trees with DIFFERENT intermediate structures. - * For example: - * - ISOGG tree: R1b → R1b1 → R1b1a → ... → R1b-L21 (defined by L21) - * - DecodingUs tree: R1b → [different structure] → R1b1a1b1a1a2c1 (has L21) - * - * When ISOGG's intermediate nodes (R1b1, R1b1a) don't match existing structure, - * we still want R1b-L21 to find R1b1a1b1a1a2c1 by their shared L21 variant. - * - * == Recurrent SNP Guard == - * - * To prevent cross-lineage false matches (R1b-L21 matching I-L21), we require - * SOME cumulative variant overlap between the source's ancestry and the - * candidate's ancestry. This ensures they're in the same major lineage. - * - * For example: - * - R1b-L21's cumulative includes M343 (R1b's marker) - * - R1b1a1b1a1a2c1's cumulative also includes M343 → MATCH OK - * - I-L21's cumulative would NOT include M343 → REJECTED - * - * @param sourceNodeVariants U(source) - variants defined at the source node - * @param sourceCumulativeVariants C(source) - all variants from source's ancestry - * @return The best matching node by variant overlap in same lineage, or None - */ - def findGlobalVariantMatch( - sourceNodeVariants: Set[String], - sourceCumulativeVariants: Set[String] - ): Option[ExistingTreeNode] = { - if (sourceNodeVariants.isEmpty) return None - - // Step 1: Find all candidate nodes that share any variant - val candidatesWithVariant = sourceNodeVariants - .flatMap(v => byVariant.getOrElse(v, Seq.empty)) - .toSeq.distinct - - if (candidatesWithVariant.isEmpty) return None - - // Step 2: LINEAGE GUARD - require cumulative overlap to prevent cross-lineage matches - // This catches recurrent SNPs like L21 appearing in both R1b and I lineages - val sameLineageCandidates = candidatesWithVariant.filter { node => - // Require at least 1 shared cumulative variant (ancestral marker) - // This ensures the candidate is in the same major lineage as the source - val cumulativeOverlap = node.cumulativeVariants.intersect(sourceCumulativeVariants) - cumulativeOverlap.nonEmpty - } - - if (sameLineageCandidates.isEmpty) return None - - // Step 3: Score candidates by NODE-LEVEL variant overlap (not cumulative) - // This finds the node where these variants are actually DEFINED, not just inherited - val scored = sameLineageCandidates.map { node => - val overlap = node.nodeVariants.intersect(sourceNodeVariants).size - (node, overlap) - }.filter(_._2 > 0).sortBy(-_._2) - - // Step 4: Safety checks to avoid false matches - scored.headOption.flatMap { case (node, overlap) => - val overlapRatio = overlap.toDouble / sourceNodeVariants.size - - // Check 1: Require significant overlap from source's perspective - // Either ≥2 shared variants OR ≥50% of source variants - val hasSignificantOverlap = overlap >= 2 || overlapRatio >= 0.5 - - // Check 2: DESCENDANT GUARD - prevent matching to a much deeper node - // If the existing node has MANY more variants than the source, it's likely - // a descendant, not a true match. The source should create intermediates - // leading TO that node, not collapse into it. - // - // Example: Source G2a2b2a1a1a1a1a1a1 has 2 variants, G-Y38189 has 78 - // This is NOT a match - ISOGG is providing intermediate structure that - // should be preserved, leading down to G-Y38189. - // - // We allow up to 10x difference (e.g., source=5, existing=50 is OK) - // but reject extreme cases (source=2, existing=78) - val nodeVariantRatio = if (sourceNodeVariants.nonEmpty) { - node.nodeVariants.size.toDouble / sourceNodeVariants.size - } else Double.MaxValue - - val isNotDescendantMismatch = nodeVariantRatio <= 10.0 - - if (hasSignificantOverlap && isNotDescendantMismatch) { - Some(node) - } else { - None // Either weak match or descendant mismatch - let normal flow handle it - } - } - } -} - -object ExistingTree { - /** Build tree with indexes from a root node */ - def fromRoot(root: ExistingTreeNode): ExistingTree = { - val byName = scala.collection.mutable.Map.empty[String, ExistingTreeNode] - val byVariant = scala.collection.mutable.Map.empty[String, List[ExistingTreeNode]] - val byId = scala.collection.mutable.Map.empty[Int, ExistingTreeNode] - - def index(node: ExistingTreeNode): Unit = { - byName(node.haplogroup.name.toUpperCase) = node - node.haplogroup.id.foreach(id => byId(id) = node) - // Index by CUMULATIVE variants (the full mutation signature) - node.cumulativeVariants.foreach { v => - byVariant(v) = node :: byVariant.getOrElse(v, Nil) - } - node.children.foreach(index) - } - - index(root) - ExistingTree(root, byName.toMap, byVariant.toMap, byId.toMap) - } -} - -/** - * In-memory tree node for existing haplogroups. - * - * == Formal Role: Normalized Node with U(N) and C(N) == - * - * Each node in the phylogenetic tree is defined by two variant sets: - * - * - U(N) = nodeVariants: Unique SNP set - variants defined AT this node only - * - C(N) = cumulativeVariants: Cumulative SNP set - all variants from root to N - * - * The relationship: C(N) = C(parent) ∪ U(N) - * - * This normalization enables the Set Inclusion Property for tree comparison: - * If C(N₁) ⊂ C(N₂), then N₁ is an ancestor of N₂ - * - * == Path Enumeration == - * - * The cumulativeVariants field implements Path Enumeration - the complete - * mutational signature from Y-Adam (root) to this node. This signature - * uniquely identifies the node's position in the phylogeny regardless of - * naming conventions. - * - * @param haplogroup The haplogroup data (name, metadata, provenance) - * @param nodeVariants U(N) - Variants defined at THIS node (not inherited) - * @param cumulativeVariants C(N) - All variants from root to this node (inherited + own) - * @param children Child nodes in the tree - */ -case class ExistingTreeNode( - haplogroup: Haplogroup, - nodeVariants: Set[String], - cumulativeVariants: Set[String], - children: Seq[ExistingTreeNode] -) { - /** - * Find a matching node within a bounded depth of this node. - * - * == Formal Role: Depth-Limited BFS for Granularity Mismatch == - * - * Different sources may split branches at different points (star clusters). - * This method handles cases where T₁ and T₀ have different resolution - * by searching within a depth limit. - * - * @param sourceName Name of the source node - * @param sourceNodeVariants U(T₁) - Variants defined at the source node (not cumulative) - * @param maxDepth Maximum levels to search (default 5 for star cluster variations) - * @return The best matching node within depth limit, or None - */ - def findMatchWithinDepth(sourceName: String, sourceNodeVariants: Set[String], maxDepth: Int = 5): Option[ExistingTreeNode] = { - if (maxDepth <= 0) return None - - // First: exact name match at this level (highest priority) - val nameMatch = children.find(_.haplogroup.name.equalsIgnoreCase(sourceName)) - if (nameMatch.isDefined) return nameMatch - - // Second: variant overlap match at this level - if (sourceNodeVariants.nonEmpty) { - val variantMatches = children.map { child => - val overlap = child.nodeVariants.intersect(sourceNodeVariants).size - (child, overlap) - }.filter(_._2 > 0).sortBy(-_._2) - - if (variantMatches.nonEmpty) { - return Some(variantMatches.head._1) - } - } - - // Third: recurse into children with reduced depth - children.iterator.flatMap(_.findMatchWithinDepth(sourceName, sourceNodeVariants, maxDepth - 1)).nextOption() - } -} diff --git a/app/models/domain/haplogroups/TreeVersioning.scala b/app/models/domain/haplogroups/TreeVersioning.scala deleted file mode 100644 index b0dad98a..00000000 --- a/app/models/domain/haplogroups/TreeVersioning.scala +++ /dev/null @@ -1,408 +0,0 @@ -package models.domain.haplogroups - -import models.HaplogroupType -import play.api.libs.json.{Format, Json, OFormat, Reads, Writes} - -import java.time.LocalDateTime - -/** - * Domain models for the Tree Versioning System. - * - * Supports Production/WIP tree versioning for bulk merge operations. - * Change sets track groups of changes from external sources (ISOGG, ytree.net). - * Individual changes are recorded for curator review before promotion to Production. - */ - -// ============================================================================ -// Enums -// ============================================================================ - -/** - * Status of a change set in its lifecycle. - */ -enum ChangeSetStatus: - case Draft // Being built (merge in progress) - case ReadyForReview // Merge complete, awaiting curator review - case UnderReview // Curator actively reviewing - case Applied // Changes applied to Production - case Discarded // Changes abandoned - -object ChangeSetStatus { - def fromString(s: String): ChangeSetStatus = s.toUpperCase match { - case "DRAFT" => ChangeSetStatus.Draft - case "READY_FOR_REVIEW" => ChangeSetStatus.ReadyForReview - case "UNDER_REVIEW" => ChangeSetStatus.UnderReview - case "APPLIED" => ChangeSetStatus.Applied - case "DISCARDED" => ChangeSetStatus.Discarded - case other => throw new IllegalArgumentException(s"Unknown ChangeSetStatus: $other") - } - - def toDbString(status: ChangeSetStatus): String = status match { - case ChangeSetStatus.Draft => "DRAFT" - case ChangeSetStatus.ReadyForReview => "READY_FOR_REVIEW" - case ChangeSetStatus.UnderReview => "UNDER_REVIEW" - case ChangeSetStatus.Applied => "APPLIED" - case ChangeSetStatus.Discarded => "DISCARDED" - } - - implicit val reads: Reads[ChangeSetStatus] = Reads.StringReads.map(fromString) - implicit val writes: Writes[ChangeSetStatus] = Writes.StringWrites.contramap(toDbString) - implicit val format: Format[ChangeSetStatus] = Format(reads, writes) -} - -/** - * Type of change recorded in a tree change. - */ -enum TreeChangeType: - case Create // New haplogroup created - case Update // Haplogroup metadata updated - case Delete // Haplogroup deleted (soft) - case Reparent // Parent relationship changed - case AddVariant // Variant associated with haplogroup - case RemoveVariant // Variant disassociated from haplogroup - -object TreeChangeType { - def fromString(s: String): TreeChangeType = s.toUpperCase match { - case "CREATE" => TreeChangeType.Create - case "UPDATE" => TreeChangeType.Update - case "DELETE" => TreeChangeType.Delete - case "REPARENT" => TreeChangeType.Reparent - case "ADD_VARIANT" => TreeChangeType.AddVariant - case "REMOVE_VARIANT" => TreeChangeType.RemoveVariant - case other => throw new IllegalArgumentException(s"Unknown TreeChangeType: $other") - } - - def toDbString(changeType: TreeChangeType): String = changeType match { - case TreeChangeType.Create => "CREATE" - case TreeChangeType.Update => "UPDATE" - case TreeChangeType.Delete => "DELETE" - case TreeChangeType.Reparent => "REPARENT" - case TreeChangeType.AddVariant => "ADD_VARIANT" - case TreeChangeType.RemoveVariant => "REMOVE_VARIANT" - } - - implicit val reads: Reads[TreeChangeType] = Reads.StringReads.map(fromString) - implicit val writes: Writes[TreeChangeType] = Writes.StringWrites.contramap(toDbString) - implicit val format: Format[TreeChangeType] = Format(reads, writes) -} - -/** - * Status of an individual tree change. - */ -enum ChangeStatus: - case Pending // Not yet applied - case Applied // Successfully applied to Production - case Reverted // Undone by curator - case Skipped // Excluded from promotion by curator - -object ChangeStatus { - def fromString(s: String): ChangeStatus = s.toUpperCase match { - case "PENDING" => ChangeStatus.Pending - case "APPLIED" => ChangeStatus.Applied - case "REVERTED" => ChangeStatus.Reverted - case "SKIPPED" => ChangeStatus.Skipped - case other => throw new IllegalArgumentException(s"Unknown ChangeStatus: $other") - } - - def toDbString(status: ChangeStatus): String = status match { - case ChangeStatus.Pending => "PENDING" - case ChangeStatus.Applied => "APPLIED" - case ChangeStatus.Reverted => "REVERTED" - case ChangeStatus.Skipped => "SKIPPED" - } - - implicit val reads: Reads[ChangeStatus] = Reads.StringReads.map(fromString) - implicit val writes: Writes[ChangeStatus] = Writes.StringWrites.contramap(toDbString) - implicit val format: Format[ChangeStatus] = Format(reads, writes) -} - -// ============================================================================ -// Domain Models -// ============================================================================ - -/** - * Statistics from a merge operation, stored with the change set. - */ -case class ChangeSetStatistics( - nodesProcessed: Int = 0, - nodesCreated: Int = 0, - nodesUpdated: Int = 0, - nodesUnchanged: Int = 0, - variantsAdded: Int = 0, - relationshipsCreated: Int = 0, - relationshipsUpdated: Int = 0, - splitOperations: Int = 0, - ambiguityCount: Int = 0 -) - -object ChangeSetStatistics { - implicit val format: OFormat[ChangeSetStatistics] = Json.format[ChangeSetStatistics] - - val empty: ChangeSetStatistics = ChangeSetStatistics() -} - -/** - * A change set groups related changes from a single merge operation. - * - * Lifecycle: - * DRAFT -> READY_FOR_REVIEW -> UNDER_REVIEW -> APPLIED - * \-> DISCARDED - * - * @param id Unique identifier - * @param haplogroupType Y or MT tree - * @param name Unique name within type (e.g., "isogg-2025-12") - * @param description Optional description of the change set - * @param sourceName Source of the changes (e.g., "ISOGG", "ytree.net") - * @param createdAt When the change set was created - * @param createdBy Who created it (curator ID or "system") - * @param finalizedAt When merge completed and set moved to READY_FOR_REVIEW - * @param appliedAt When changes were applied to Production - * @param appliedBy Who applied the changes - * @param discardedAt When changes were discarded - * @param discardedBy Who discarded the changes - * @param discardReason Why the changes were discarded - * @param status Current lifecycle status - * @param statistics Merge statistics snapshot - * @param ambiguityReportPath Path to generated ambiguity report file - */ -case class ChangeSet( - id: Option[Int], - haplogroupType: HaplogroupType, - name: String, - description: Option[String], - sourceName: String, - createdAt: LocalDateTime, - createdBy: String, - finalizedAt: Option[LocalDateTime] = None, - appliedAt: Option[LocalDateTime] = None, - appliedBy: Option[String] = None, - discardedAt: Option[LocalDateTime] = None, - discardedBy: Option[String] = None, - discardReason: Option[String] = None, - status: ChangeSetStatus = ChangeSetStatus.Draft, - statistics: ChangeSetStatistics = ChangeSetStatistics.empty, - ambiguityReportPath: Option[String] = None -) - -object ChangeSet { - implicit val format: OFormat[ChangeSet] = Json.format[ChangeSet] -} - -/** - * An individual change within a change set. - * - * Tracks a single operation (create, update, reparent, etc.) for audit and review. - * - * @param id Unique identifier - * @param changeSetId Parent change set - * @param changeType Type of change - * @param haplogroupId Target haplogroup (for UPDATE/DELETE/REPARENT) - * @param variantId Target variant (for ADD_VARIANT/REMOVE_VARIANT) - * @param oldParentId Previous parent (for REPARENT) - * @param newParentId New parent (for CREATE and REPARENT) - * @param haplogroupData Full haplogroup data for CREATE, or new values for UPDATE - * @param oldData Previous state for UPDATE (audit trail) - * @param createdHaplogroupId For CREATE, the ID assigned after apply - * @param sequenceNum Order within change set (for replay) - * @param status Current status of this change - * @param reviewedAt When curator reviewed this change - * @param reviewedBy Who reviewed it - * @param reviewNotes Curator's notes - * @param createdAt When change was recorded - * @param appliedAt When change was applied to Production - * @param ambiguityType If this relates to an ambiguity, the type - * @param ambiguityConfidence If this relates to an ambiguity, the confidence score - */ -case class TreeChange( - id: Option[Int], - changeSetId: Int, - changeType: TreeChangeType, - haplogroupId: Option[Int] = None, - variantId: Option[Int] = None, - oldParentId: Option[Int] = None, - newParentId: Option[Int] = None, - haplogroupData: Option[String] = None, // JSON string - oldData: Option[String] = None, // JSON string - createdHaplogroupId: Option[Int] = None, - sequenceNum: Int, - status: ChangeStatus = ChangeStatus.Pending, - reviewedAt: Option[LocalDateTime] = None, - reviewedBy: Option[String] = None, - reviewNotes: Option[String] = None, - createdAt: LocalDateTime = LocalDateTime.now(), - appliedAt: Option[LocalDateTime] = None, - ambiguityType: Option[String] = None, - ambiguityConfidence: Option[Double] = None -) - -object TreeChange { - implicit val format: OFormat[TreeChange] = Json.format[TreeChange] -} - -/** - * A comment on a change set or specific change for curator collaboration. - */ -case class ChangeSetComment( - id: Option[Int], - changeSetId: Int, - treeChangeId: Option[Int], - author: String, - content: String, - createdAt: LocalDateTime, - updatedAt: Option[LocalDateTime] = None -) - -object ChangeSetComment { - implicit val format: OFormat[ChangeSetComment] = Json.format[ChangeSetComment] -} - -// ============================================================================ -// View Models for API responses -// ============================================================================ - -/** - * Summary view of a change set for list displays. - */ -case class ChangeSetSummary( - id: Int, - haplogroupType: HaplogroupType, - name: String, - sourceName: String, - status: ChangeSetStatus, - createdAt: LocalDateTime, - createdBy: String, - statistics: ChangeSetStatistics, - totalChanges: Int, - pendingChanges: Int, - reviewedChanges: Int -) - -object ChangeSetSummary { - implicit val format: OFormat[ChangeSetSummary] = Json.format[ChangeSetSummary] -} - -/** - * Detailed view of a change set including all metadata. - */ -case class ChangeSetDetails( - changeSet: ChangeSet, - totalChanges: Int, - changesByType: Map[String, Int], // String keys for JSON compatibility - changesByStatus: Map[String, Int], // String keys for JSON compatibility - comments: List[ChangeSetComment] -) - -object ChangeSetDetails { - implicit val format: OFormat[ChangeSetDetails] = Json.format[ChangeSetDetails] -} - -/** - * A tree change with additional context for review UI. - */ -case class TreeChangeView( - change: TreeChange, - changeSetName: String, - sourceName: String, - haplogroupName: Option[String], - parentName: Option[String], - variantName: Option[String] -) - -object TreeChangeView { - implicit val format: OFormat[TreeChangeView] = Json.format[TreeChangeView] -} - -// ============================================================================ -// Tree Diff Models (Phase 3) -// ============================================================================ - -/** - * Type of difference between Production and WIP trees. - */ -enum DiffType: - case Added // Node exists in WIP but not Production - case Removed // Node exists in Production but not WIP (from DELETE changes) - case Modified // Node exists in both but has changes - case Reparented // Node's parent changed - -object DiffType { - def fromString(s: String): DiffType = s.toUpperCase match { - case "ADDED" => DiffType.Added - case "REMOVED" => DiffType.Removed - case "MODIFIED" => DiffType.Modified - case "REPARENTED" => DiffType.Reparented - case other => throw new IllegalArgumentException(s"Unknown DiffType: $other") - } - - def toDbString(dt: DiffType): String = dt match { - case DiffType.Added => "ADDED" - case DiffType.Removed => "REMOVED" - case DiffType.Modified => "MODIFIED" - case DiffType.Reparented => "REPARENTED" - } - - implicit val reads: Reads[DiffType] = Reads.StringReads.map(fromString) - implicit val writes: Writes[DiffType] = Writes.StringWrites.contramap(toDbString) - implicit val format: Format[DiffType] = Format(reads, writes) -} - -/** - * A single difference between Production and WIP trees. - */ -case class TreeDiffEntry( - diffType: DiffType, - haplogroupId: Option[Int], // ID if existing node - haplogroupName: String, - oldParentName: Option[String], // For REPARENTED - newParentName: Option[String], // For REPARENTED or ADDED - changeDescription: String, // Human-readable description - changeIds: List[Int], // Related tree_change IDs - variantsAdded: List[String] = List.empty, - variantsRemoved: List[String] = List.empty -) - -object TreeDiffEntry { - implicit val format: OFormat[TreeDiffEntry] = Json.format[TreeDiffEntry] -} - -/** - * Complete diff between Production and WIP trees. - */ -case class TreeDiff( - changeSetId: Int, - changeSetName: String, - haplogroupType: HaplogroupType, - entries: List[TreeDiffEntry], - summary: TreeDiffSummary -) - -object TreeDiff { - implicit val format: OFormat[TreeDiff] = Json.format[TreeDiff] - - val empty: TreeDiff = TreeDiff( - changeSetId = 0, - changeSetName = "", - haplogroupType = HaplogroupType.Y, - entries = List.empty, - summary = TreeDiffSummary.empty - ) -} - -/** - * Summary statistics for a tree diff. - */ -case class TreeDiffSummary( - totalChanges: Int, - nodesAdded: Int, - nodesRemoved: Int, - nodesModified: Int, - nodesReparented: Int, - variantsAdded: Int, - variantsRemoved: Int -) - -object TreeDiffSummary { - implicit val format: OFormat[TreeDiffSummary] = Json.format[TreeDiffSummary] - - val empty: TreeDiffSummary = TreeDiffSummary(0, 0, 0, 0, 0, 0, 0) -} diff --git a/app/models/domain/ibd/IbdDiscoveryIndex.scala b/app/models/domain/ibd/IbdDiscoveryIndex.scala deleted file mode 100644 index d50b48b3..00000000 --- a/app/models/domain/ibd/IbdDiscoveryIndex.scala +++ /dev/null @@ -1,21 +0,0 @@ -package models.domain.ibd - -import java.time.ZonedDateTime -import java.util.UUID - -case class IbdDiscoveryIndex( - id: Option[Long], - sampleGuid1: UUID, - sampleGuid2: UUID, - pangenomeGraphId: Int, - matchRegionType: String, - totalSharedCmApprox: Option[Double], - numSharedSegmentsApprox: Option[Int], - isPubliclyDiscoverable: Boolean, - consensusStatus: String, - lastConsensusUpdate: ZonedDateTime, - validationServiceGuid: Option[UUID], - validationTimestamp: Option[ZonedDateTime], - indexedByService: Option[String], - indexedDate: ZonedDateTime - ) \ No newline at end of file diff --git a/app/models/domain/ibd/IbdPdsAttestation.scala b/app/models/domain/ibd/IbdPdsAttestation.scala deleted file mode 100644 index 7bc583eb..00000000 --- a/app/models/domain/ibd/IbdPdsAttestation.scala +++ /dev/null @@ -1,16 +0,0 @@ -package models.domain.ibd - -import java.time.ZonedDateTime -import java.util.UUID - -case class IbdPdsAttestation( - id: Option[Long], - ibdDiscoveryIndexId: Long, - attestingPdsGuid: UUID, - attestingSampleGuid: UUID, - attestationTimestamp: ZonedDateTime, - attestationSignature: String, - matchSummaryHash: String, - attestationType: String, - attestationNotes: Option[String] - ) \ No newline at end of file diff --git a/app/models/domain/ibd/MatchConsentTracking.scala b/app/models/domain/ibd/MatchConsentTracking.scala deleted file mode 100644 index 2ca34c71..00000000 --- a/app/models/domain/ibd/MatchConsentTracking.scala +++ /dev/null @@ -1,18 +0,0 @@ -package models.domain.ibd - -import java.time.ZonedDateTime -import java.util.UUID -import play.api.libs.json.JsValue - -case class MatchConsentTracking( - id: Option[Long], - atUri: String, - consentingDid: String, - sampleGuid: UUID, - consentLevel: String, - allowedMatchTypes: Option[JsValue], - shareContactInfo: Boolean, - consentedAt: ZonedDateTime, - expiresAt: Option[ZonedDateTime], - revokedAt: Option[ZonedDateTime] - ) diff --git a/app/models/domain/ibd/MatchRequestTracking.scala b/app/models/domain/ibd/MatchRequestTracking.scala deleted file mode 100644 index 30978063..00000000 --- a/app/models/domain/ibd/MatchRequestTracking.scala +++ /dev/null @@ -1,22 +0,0 @@ -package models.domain.ibd - -import java.time.ZonedDateTime -import java.util.UUID -import play.api.libs.json.JsValue - -case class MatchRequestTracking( - id: Option[Long], - atUri: String, - requesterDid: String, - targetDid: Option[String], - fromSampleGuid: UUID, - toSampleGuid: UUID, - requestType: String, - status: String, - discoveryReason: Option[JsValue], - message: Option[String], - createdAt: ZonedDateTime, - updatedAt: ZonedDateTime, - expiresAt: Option[ZonedDateTime], - completedAt: Option[ZonedDateTime] - ) diff --git a/app/models/domain/ibd/MatchSuggestion.scala b/app/models/domain/ibd/MatchSuggestion.scala deleted file mode 100644 index 43351b44..00000000 --- a/app/models/domain/ibd/MatchSuggestion.scala +++ /dev/null @@ -1,17 +0,0 @@ -package models.domain.ibd - -import java.time.ZonedDateTime -import java.util.UUID -import play.api.libs.json.JsValue - -case class MatchSuggestion( - id: Option[Long], - targetSampleGuid: UUID, - suggestedSampleGuid: UUID, - suggestionType: String, - score: Double, - metadata: Option[JsValue], - status: String, - createdAt: ZonedDateTime, - expiresAt: Option[ZonedDateTime] - ) diff --git a/app/models/domain/ibd/PopulationBreakdownCache.scala b/app/models/domain/ibd/PopulationBreakdownCache.scala deleted file mode 100644 index df0142e0..00000000 --- a/app/models/domain/ibd/PopulationBreakdownCache.scala +++ /dev/null @@ -1,14 +0,0 @@ -package models.domain.ibd - -import java.time.ZonedDateTime -import java.util.UUID -import play.api.libs.json.JsValue - -case class PopulationBreakdownCache( - id: Option[Long], - sampleGuid: UUID, - breakdown: JsValue, - breakdownHash: String, - cachedAt: ZonedDateTime, - sourceAtUri: Option[String] - ) diff --git a/app/models/domain/ibd/PopulationOverlapScore.scala b/app/models/domain/ibd/PopulationOverlapScore.scala deleted file mode 100644 index 5ef5932a..00000000 --- a/app/models/domain/ibd/PopulationOverlapScore.scala +++ /dev/null @@ -1,12 +0,0 @@ -package models.domain.ibd - -import java.time.ZonedDateTime -import java.util.UUID - -case class PopulationOverlapScore( - id: Option[Long], - sampleGuid1: UUID, - sampleGuid2: UUID, - overlapScore: Double, - computedAt: ZonedDateTime - ) diff --git a/app/models/domain/pangenome/CanonicalPangenomeVariant.scala b/app/models/domain/pangenome/CanonicalPangenomeVariant.scala deleted file mode 100644 index dbf7bed4..00000000 --- a/app/models/domain/pangenome/CanonicalPangenomeVariant.scala +++ /dev/null @@ -1,19 +0,0 @@ -package models.domain.pangenome - -import java.time.ZonedDateTime - -case class CanonicalPangenomeVariant( - id: Option[Long], - pangenomeGraphId: Long, - variantType: String, - variantNodes: List[Int], - variantEdges: List[Int], - referencePathId: Option[Long], - referenceStartPosition: Option[Int], - referenceEndPosition: Option[Int], - referenceAlleleSequence: Option[String], - alternateAlleleSequence: Option[String], - canonicalHash: String, - description: Option[String], - creationDate: ZonedDateTime - ) diff --git a/app/models/domain/pangenome/PangenomeAlignmentCoverage.scala b/app/models/domain/pangenome/PangenomeAlignmentCoverage.scala deleted file mode 100644 index 480ab0f6..00000000 --- a/app/models/domain/pangenome/PangenomeAlignmentCoverage.scala +++ /dev/null @@ -1,16 +0,0 @@ -package models.domain.pangenome - -case class PangenomeAlignmentCoverage( - alignmentMetadataId: Long, - meanDepth: Option[Double], - medianDepth: Option[Double], - percentCoverageAt1x: Option[Double], - percentCoverageAt5x: Option[Double], - percentCoverageAt10x: Option[Double], - percentCoverageAt20x: Option[Double], - percentCoverageAt30x: Option[Double], - basesNoCoverage: Option[Long], - basesLowQualityMapping: Option[Long], - basesCallable: Option[Long], - meanMappingQuality: Option[Double] - ) diff --git a/app/models/domain/pangenome/PangenomeAlignmentMetadata.scala b/app/models/domain/pangenome/PangenomeAlignmentMetadata.scala deleted file mode 100644 index bc73ceac..00000000 --- a/app/models/domain/pangenome/PangenomeAlignmentMetadata.scala +++ /dev/null @@ -1,23 +0,0 @@ -package models.domain.pangenome - -import play.api.libs.json.JsValue - -import java.time.ZonedDateTime - -case class PangenomeAlignmentMetadata( - id: Option[Long], - sequenceFileId: Long, - pangenomeGraphId: Int, - metricLevel: String, - pangenomePathId: Option[Int], - pangenomeNodeId: Option[Int], - regionStartNodeId: Option[Int], - regionEndNodeId: Option[Int], - regionName: Option[String], - regionLengthBp: Option[Long], - metricsDate: ZonedDateTime, - analysisTool: String, - analysisToolVersion: Option[String], - notes: Option[String], - metadata: Option[JsValue] - ) \ No newline at end of file diff --git a/app/models/domain/pangenome/PangenomeGraph.scala b/app/models/domain/pangenome/PangenomeGraph.scala deleted file mode 100644 index 359a4f69..00000000 --- a/app/models/domain/pangenome/PangenomeGraph.scala +++ /dev/null @@ -1,11 +0,0 @@ -package models.domain.pangenome - -import java.time.ZonedDateTime - -case class PangenomeGraph( - id: Option[Long], - graphName: String, - sourceGfaFile: Option[String], - description: Option[String], - creationDate: ZonedDateTime - ) \ No newline at end of file diff --git a/app/models/domain/pangenome/PangenomeNode.scala b/app/models/domain/pangenome/PangenomeNode.scala deleted file mode 100644 index c60b1e65..00000000 --- a/app/models/domain/pangenome/PangenomeNode.scala +++ /dev/null @@ -1,8 +0,0 @@ -package models.domain.pangenome - -case class PangenomeNode( - id: Option[Long], - graphId: Long, - nodeName: String, - sequenceLength: Option[Long] - ) diff --git a/app/models/domain/pangenome/PangenomePath.scala b/app/models/domain/pangenome/PangenomePath.scala deleted file mode 100644 index 193c582b..00000000 --- a/app/models/domain/pangenome/PangenomePath.scala +++ /dev/null @@ -1,10 +0,0 @@ -package models.domain.pangenome - -case class PangenomePath( - id: Option[Long], - graphId: Long, - pathName: String, - isReference: Boolean, - lengthBp: Option[Long], - description: Option[String] - ) diff --git a/app/models/domain/pangenome/PangenomeVariantLink.scala b/app/models/domain/pangenome/PangenomeVariantLink.scala deleted file mode 100644 index c371bb72..00000000 --- a/app/models/domain/pangenome/PangenomeVariantLink.scala +++ /dev/null @@ -1,13 +0,0 @@ -package models.domain.pangenome - -import java.time.ZonedDateTime - -case class PangenomeVariantLink( - id: Option[Long], - variantId: Int, - canonicalPangenomeVariantId: Int, - pangenomeGraphId: Int, - description: Option[String], - mappingSource: String, - mappingDate: ZonedDateTime - ) \ No newline at end of file diff --git a/app/models/domain/pangenome/ReportedVariantPangenome.scala b/app/models/domain/pangenome/ReportedVariantPangenome.scala deleted file mode 100644 index f96dfcfc..00000000 --- a/app/models/domain/pangenome/ReportedVariantPangenome.scala +++ /dev/null @@ -1,31 +0,0 @@ -package models.domain.pangenome - -import play.api.libs.json.JsValue - -import java.time.ZonedDateTime -import java.util.UUID - -case class ReportedVariantPangenome( - id: Option[Long], - sampleGuid: UUID, - graphId: Int, - variantType: String, - referencePathId: Option[Int], - referenceStartPosition: Option[Int], - referenceEndPosition: Option[Int], - variantNodes: List[Int], - variantEdges: List[Int], - alternateAlleleSequence: Option[String], - referenceAlleleSequence: Option[String], - referenceRepeatCount: Option[Int], - alternateRepeatCount: Option[Int], - alleleFraction: Option[Double], - depth: Option[Int], - reportedDate: ZonedDateTime, - provenance: String, - confidenceScore: Double, - notes: Option[String], - status: String, - zygosity: Option[String], - haplotypeInformation: Option[JsValue] - ) \ No newline at end of file diff --git a/app/models/domain/pds/PdsNode.scala b/app/models/domain/pds/PdsNode.scala deleted file mode 100644 index cd84e9c2..00000000 --- a/app/models/domain/pds/PdsNode.scala +++ /dev/null @@ -1,73 +0,0 @@ -package models.domain.pds - -import play.api.libs.json.{Json, JsValue, OFormat} - -import java.time.LocalDateTime - -case class PdsNode( - id: Option[Int] = None, - did: String, - pdsUrl: String, - handle: Option[String] = None, - nodeName: Option[String] = None, - softwareVersion: Option[String] = None, - status: String = "UNKNOWN", - capabilities: JsValue = Json.obj(), - lastHeartbeat: Option[LocalDateTime] = None, - lastCommitCid: Option[String] = None, - lastCommitRev: Option[String] = None, - ipAddress: Option[String] = None, - osInfo: Option[String] = None, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) - -object PdsNode { - implicit val format: OFormat[PdsNode] = Json.format[PdsNode] - - val ValidStatuses: Set[String] = Set("ONLINE", "OFFLINE", "BUSY", "ERROR", "UNKNOWN") -} - -case class PdsHeartbeatLog( - id: Option[Int] = None, - pdsNodeId: Int, - status: String, - softwareVersion: Option[String] = None, - loadMetrics: Option[JsValue] = None, - processingQueueSize: Option[Int] = Some(0), - errorMessage: Option[String] = None, - recordedAt: LocalDateTime = LocalDateTime.now() - ) - -object PdsHeartbeatLog { - implicit val format: OFormat[PdsHeartbeatLog] = Json.format[PdsHeartbeatLog] -} - -case class PdsFleetConfig( - id: Option[Int] = None, - configKey: String, - configValue: String, - description: Option[String] = None, - updatedBy: Option[String] = None, - updatedAt: LocalDateTime = LocalDateTime.now() - ) - -object PdsFleetConfig { - implicit val format: OFormat[PdsFleetConfig] = Json.format[PdsFleetConfig] -} - -case class PdsFleetSummary( - totalNodes: Int, - onlineNodes: Int, - offlineNodes: Int, - busyNodes: Int, - errorNodes: Int, - unknownNodes: Int, - targetVersion: Option[String], - nodesOnTargetVersion: Int, - nodesOutdated: Int - ) - -object PdsFleetSummary { - implicit val format: OFormat[PdsFleetSummary] = Json.format[PdsFleetSummary] -} diff --git a/app/models/domain/pds/PdsSubmission.scala b/app/models/domain/pds/PdsSubmission.scala deleted file mode 100644 index 58b7023f..00000000 --- a/app/models/domain/pds/PdsSubmission.scala +++ /dev/null @@ -1,47 +0,0 @@ -package models.domain.pds - -import play.api.libs.json.{JsValue, Json, OFormat} - -import java.time.LocalDateTime -import java.util.UUID - -case class PdsSubmission( - id: Option[Int] = None, - pdsNodeId: Int, - submissionType: String, - biosampleId: Option[Int] = None, - biosampleGuid: Option[UUID] = None, - proposedValue: String, - confidenceScore: Option[Double] = None, - algorithmVersion: Option[String] = None, - softwareVersion: Option[String] = None, - payload: Option[JsValue] = None, - status: String = "PENDING", - reviewedBy: Option[String] = None, - reviewedAt: Option[LocalDateTime] = None, - reviewNotes: Option[String] = None, - atUri: Option[String] = None, - atCid: Option[String] = None, - createdAt: LocalDateTime = LocalDateTime.now() - ) - -object PdsSubmission { - implicit val format: OFormat[PdsSubmission] = Json.format[PdsSubmission] - - val ValidTypes: Set[String] = Set("HAPLOGROUP_CALL", "VARIANT_CALL", "BRANCH_PROPOSAL", "PRIVATE_VARIANT", "STR_PROFILE") - val ValidStatuses: Set[String] = Set("PENDING", "ACCEPTED", "REJECTED", "SUPERSEDED") -} - -case class SubmissionSummary( - pdsNodeId: Int, - did: String, - totalSubmissions: Int, - pendingCount: Int, - acceptedCount: Int, - rejectedCount: Int, - acceptanceRate: Double - ) - -object SubmissionSummary { - implicit val format: OFormat[SubmissionSummary] = Json.format[SubmissionSummary] -} diff --git a/app/models/domain/publications/GenomicStudy.scala b/app/models/domain/publications/GenomicStudy.scala deleted file mode 100644 index ca2a2b44..00000000 --- a/app/models/domain/publications/GenomicStudy.scala +++ /dev/null @@ -1,56 +0,0 @@ -package models.domain.publications - -import play.api.libs.json.* - -enum StudySource: - case ENA, NCBI_BIOPROJECT, NCBI_GENBANK - -object StudySource { - implicit val studySourceReads: Reads[StudySource] = Reads { json => - json.validate[String].map(s => StudySource.valueOf(s)) - } - - implicit val studySourceWrites: Writes[StudySource] = Writes { source => - JsString(source.toString) - } - - implicit val studySourceFormat: Format[StudySource] = - Format(studySourceReads, studySourceWrites) -} - -/** - * Represents an ENA (European Nucleotide Archive) study with details about its accession, - * title, institution, and other metadata. - * - * @param id An optional unique identifier for the study. - * @param accession The accession number for the study, typically used as a unique reference in databases. - * @param title The title of the study, providing a brief description or summary. - * @param centerName The name of the center or institution responsible for the study. - * @param studyName The name of the study, potentially providing additional context about its purpose or scope. - * @param details A textual description or additional information related to the study. - */ -case class GenomicStudy( - id: Option[Int] = None, - accession: String, - title: String, - centerName: String, - studyName: String, - details: String, - source: StudySource, - submissionDate: Option[java.time.LocalDate] = None, - lastUpdate: Option[java.time.LocalDate] = None, - bioProjectId: Option[String] = None, - molecule: Option[String] = None, - topology: Option[String] = None, - taxonomyId: Option[Int] = None, - version: Option[String] = None - ) - -/** - * Companion object for the `EnaStudy` case class. - * - * Provides implicit JSON formatting support for serializing and deserializing instances of `EnaStudy`. - */ -object GenomicStudy { - implicit val enaStudyFormat: OFormat[GenomicStudy] = Json.format[GenomicStudy] -} \ No newline at end of file diff --git a/app/models/domain/publications/Publication.scala b/app/models/domain/publications/Publication.scala deleted file mode 100644 index 17782a6d..00000000 --- a/app/models/domain/publications/Publication.scala +++ /dev/null @@ -1,34 +0,0 @@ -package models.domain.publications - -import play.api.libs.json.{Json, OFormat} - -import java.time.LocalDate - -case class Publication( - id: Option[Int], - openAlexId: Option[String], - pubmedId: Option[String], - doi: Option[String], - title: String, - authors: Option[String], - abstractSummary: Option[String], - journal: Option[String], - publicationDate: Option[LocalDate], - url: Option[String], - citationNormalizedPercentile: Option[Float], - citedByCount: Option[Int], - openAccessStatus: Option[String], - openAccessUrl: Option[String], - primaryTopic: Option[String], - publicationType: Option[String], - publisher: Option[String] - ) - -/** - * Companion object for the `Publication` case class. - * - * Provides implicit JSON formatting support for serializing and deserializing instances of `Publication`. - */ -object Publication { - implicit val publicationFormat: OFormat[Publication] = Json.format[Publication] -} diff --git a/app/models/domain/publications/PublicationBiosample.scala b/app/models/domain/publications/PublicationBiosample.scala deleted file mode 100644 index a21aaf60..00000000 --- a/app/models/domain/publications/PublicationBiosample.scala +++ /dev/null @@ -1,9 +0,0 @@ -package models.domain.publications - -/** - * Represents an association between a publication and a biological sample within the system. - * - * @param publicationId The unique identifier of the publication associated with the biosample. - * @param biosampleId The unique identifier of the biosample associated with the publication. - */ -case class PublicationBiosample(publicationId: Int, biosampleId: Int) diff --git a/app/models/domain/publications/PublicationCandidate.scala b/app/models/domain/publications/PublicationCandidate.scala deleted file mode 100644 index f68c8f2d..00000000 --- a/app/models/domain/publications/PublicationCandidate.scala +++ /dev/null @@ -1,26 +0,0 @@ -package models.domain.publications - -import java.time.{LocalDate, LocalDateTime} -import java.util.UUID -import play.api.libs.json.{JsValue, Json, OFormat} - -case class PublicationCandidate( - id: Option[Int], - openAlexId: String, - doi: Option[String], - title: String, - `abstract`: Option[String], - publicationDate: Option[LocalDate], - journalName: Option[String], - relevanceScore: Option[Double], - discoveryDate: LocalDateTime, - status: String, // pending, accepted, rejected, deferred - reviewedBy: Option[UUID], - reviewedAt: Option[LocalDateTime], - rejectionReason: Option[String], - rawMetadata: Option[JsValue] - ) - -object PublicationCandidate { - implicit val format: OFormat[PublicationCandidate] = Json.format[PublicationCandidate] -} diff --git a/app/models/domain/publications/PublicationCitizenBiosample.scala b/app/models/domain/publications/PublicationCitizenBiosample.scala deleted file mode 100644 index 263e20cc..00000000 --- a/app/models/domain/publications/PublicationCitizenBiosample.scala +++ /dev/null @@ -1,3 +0,0 @@ -package models.domain.publications - -case class PublicationCitizenBiosample(publicationId: Int, citizenBiosampleId: Int) diff --git a/app/models/domain/publications/PublicationGenomicStudy.scala b/app/models/domain/publications/PublicationGenomicStudy.scala deleted file mode 100644 index 0627f53d..00000000 --- a/app/models/domain/publications/PublicationGenomicStudy.scala +++ /dev/null @@ -1,15 +0,0 @@ -package models.domain.publications - -/** - * Represents the relationship between a publication and an ENA (European Nucleotide Archive) study. - * - * This case class is used to associate metadata from a publication with a specific ENA study. - * It provides two identifiers: - * - `publicationId`: The unique identifier of the publication. - * - `studyId`: The unique identifier of the ENA study. - * - * These identifiers can be used to establish a link between the academic research or - * findings (publication) and the corresponding study stored in the ENA, facilitating - * traceability and integration of research data. - */ -case class PublicationGenomicStudy(publicationId: Int, studyId: Int) diff --git a/app/models/domain/publications/PublicationSearchConfig.scala b/app/models/domain/publications/PublicationSearchConfig.scala deleted file mode 100644 index f8790a61..00000000 --- a/app/models/domain/publications/PublicationSearchConfig.scala +++ /dev/null @@ -1,19 +0,0 @@ -package models.domain.publications - -import java.time.LocalDateTime -import play.api.libs.json.{JsValue, Json, OFormat} - -case class PublicationSearchConfig( - id: Option[Int], - name: String, - searchQuery: String, - concepts: Option[JsValue], - journals: Option[JsValue], - enabled: Boolean, - lastRun: Option[LocalDateTime], - createdAt: LocalDateTime - ) - -object PublicationSearchConfig { - implicit val format: OFormat[PublicationSearchConfig] = Json.format[PublicationSearchConfig] -} diff --git a/app/models/domain/publications/PublicationSearchRun.scala b/app/models/domain/publications/PublicationSearchRun.scala deleted file mode 100644 index 3f3c06d1..00000000 --- a/app/models/domain/publications/PublicationSearchRun.scala +++ /dev/null @@ -1,18 +0,0 @@ -package models.domain.publications - -import java.time.LocalDateTime -import play.api.libs.json.{Json, OFormat} - -case class PublicationSearchRun( - id: Option[Int], - configId: Int, - runAt: LocalDateTime, - candidatesFound: Int, - newCandidates: Int, - queryUsed: Option[String], - durationMs: Option[Int] - ) - -object PublicationSearchRun { - implicit val format: OFormat[PublicationSearchRun] = Json.format[PublicationSearchRun] -} diff --git a/app/models/domain/social/Conversation.scala b/app/models/domain/social/Conversation.scala deleted file mode 100644 index 0d5b72f1..00000000 --- a/app/models/domain/social/Conversation.scala +++ /dev/null @@ -1,11 +0,0 @@ -package models.domain.social - -import java.time.LocalDateTime -import java.util.UUID - -case class Conversation( - id: UUID = UUID.randomUUID(), - `type`: String, // 'DIRECT', 'GROUP', 'SYSTEM', 'RECRUITMENT' - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) diff --git a/app/models/domain/social/ConversationParticipant.scala b/app/models/domain/social/ConversationParticipant.scala deleted file mode 100644 index 19ae9452..00000000 --- a/app/models/domain/social/ConversationParticipant.scala +++ /dev/null @@ -1,12 +0,0 @@ -package models.domain.social - -import java.time.LocalDateTime -import java.util.UUID - -case class ConversationParticipant( - conversationId: UUID, - userDid: String, - role: String = "MEMBER", // 'ADMIN', 'MEMBER' - lastReadAt: Option[LocalDateTime] = None, - joinedAt: LocalDateTime = LocalDateTime.now() - ) diff --git a/app/models/domain/social/FeedPost.scala b/app/models/domain/social/FeedPost.scala deleted file mode 100644 index 87d1c98b..00000000 --- a/app/models/domain/social/FeedPost.scala +++ /dev/null @@ -1,16 +0,0 @@ -package models.domain.social - -import java.time.LocalDateTime -import java.util.UUID - -case class FeedPost( - id: UUID = UUID.randomUUID(), - authorDid: String, - content: String, - parentPostId: Option[UUID] = None, - rootPostId: Option[UUID] = None, - topic: Option[String] = None, - authorReputationScore: Int = 0, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) diff --git a/app/models/domain/social/Message.scala b/app/models/domain/social/Message.scala deleted file mode 100644 index 8aea16d8..00000000 --- a/app/models/domain/social/Message.scala +++ /dev/null @@ -1,14 +0,0 @@ -package models.domain.social - -import java.time.LocalDateTime -import java.util.UUID - -case class Message( - id: UUID = UUID.randomUUID(), - conversationId: UUID, - senderDid: String, - content: String, - contentType: String = "TEXT", // 'TEXT', 'MARKDOWN', 'JSON_PAYLOAD' - createdAt: LocalDateTime = LocalDateTime.now(), - isEdited: Boolean = false - ) diff --git a/app/models/domain/social/ReputationEvent.scala b/app/models/domain/social/ReputationEvent.scala deleted file mode 100644 index 9212443b..00000000 --- a/app/models/domain/social/ReputationEvent.scala +++ /dev/null @@ -1,16 +0,0 @@ -package models.domain.social - -import java.time.LocalDateTime -import java.util.UUID - -case class ReputationEvent( - id: Option[UUID] = None, - userId: UUID, - eventTypeId: UUID, - actualPointsChange: Int, - sourceUserId: Option[UUID] = None, - relatedEntityType: Option[String] = None, - relatedEntityId: Option[UUID] = None, - notes: Option[String] = None, - createdAt: LocalDateTime = LocalDateTime.now() - ) \ No newline at end of file diff --git a/app/models/domain/social/ReputationEventType.scala b/app/models/domain/social/ReputationEventType.scala deleted file mode 100644 index e85db90d..00000000 --- a/app/models/domain/social/ReputationEventType.scala +++ /dev/null @@ -1,15 +0,0 @@ -package models.domain.social - -import java.time.LocalDateTime -import java.util.UUID - -case class ReputationEventType( - id: Option[UUID] = None, - name: String, - description: Option[String] = None, - defaultPointsChange: Int, - isPositive: Boolean, - isSystemGenerated: Boolean, - createdAt: LocalDateTime = LocalDateTime.now(), - updatedAt: LocalDateTime = LocalDateTime.now() - ) \ No newline at end of file diff --git a/app/models/domain/social/UserBlock.scala b/app/models/domain/social/UserBlock.scala deleted file mode 100644 index 2d68ff1d..00000000 --- a/app/models/domain/social/UserBlock.scala +++ /dev/null @@ -1,10 +0,0 @@ -package models.domain.social - -import java.time.LocalDateTime - -case class UserBlock( - blockerDid: String, - blockedDid: String, - reason: Option[String] = None, - createdAt: LocalDateTime = LocalDateTime.now() - ) diff --git a/app/models/domain/social/UserReputationScore.scala b/app/models/domain/social/UserReputationScore.scala deleted file mode 100644 index 2268394f..00000000 --- a/app/models/domain/social/UserReputationScore.scala +++ /dev/null @@ -1,11 +0,0 @@ -package models.domain.social - -import java.time.ZonedDateTime -import java.time.LocalDateTime -import java.util.UUID - -case class UserReputationScore( - userId: UUID, - score: Long, - lastCalculatedAt: LocalDateTime - ) \ No newline at end of file diff --git a/app/models/domain/support/ContactMessage.scala b/app/models/domain/support/ContactMessage.scala deleted file mode 100644 index b4e8ed86..00000000 --- a/app/models/domain/support/ContactMessage.scala +++ /dev/null @@ -1,65 +0,0 @@ -package models.domain.support - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Represents a contact message from either an authenticated user or anonymous visitor. - * - * @param id UUID Primary Key - * @param userId Optional - linked user if authenticated - * @param senderName Name provided by anonymous users - * @param senderEmail Email provided by anonymous users - * @param subject Message subject - * @param message Message content - * @param status Message status: new, read, replied, closed - * @param ipAddressHash Hashed IP address for spam tracking - * @param userAgent Browser user agent - * @param createdAt When the message was sent - * @param updatedAt Last update timestamp - * @param userLastViewedAt When the user last viewed this message thread - */ -case class ContactMessage( - id: Option[UUID], - userId: Option[UUID], - senderName: Option[String], - senderEmail: Option[String], - subject: String, - message: String, - status: MessageStatus, - ipAddressHash: Option[String], - userAgent: Option[String], - createdAt: LocalDateTime, - updatedAt: LocalDateTime, - userLastViewedAt: Option[LocalDateTime] = None -) { - /** - * Returns the display name for the sender. - */ - def displayName: String = senderName.getOrElse("Authenticated User") - - /** - * Returns true if the message is from an anonymous user. - */ - def isAnonymous: Boolean = userId.isEmpty -} - -/** - * Status of a contact message. - */ -enum MessageStatus(val value: String) { - case New extends MessageStatus("new") - case Read extends MessageStatus("read") - case Replied extends MessageStatus("replied") - case Closed extends MessageStatus("closed") -} - -object MessageStatus { - def fromString(s: String): Option[MessageStatus] = s.toLowerCase match { - case "new" => Some(New) - case "read" => Some(Read) - case "replied" => Some(Replied) - case "closed" => Some(Closed) - case _ => None - } -} diff --git a/app/models/domain/support/MessageReply.scala b/app/models/domain/support/MessageReply.scala deleted file mode 100644 index e035d192..00000000 --- a/app/models/domain/support/MessageReply.scala +++ /dev/null @@ -1,25 +0,0 @@ -package models.domain.support - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Represents an admin reply to a contact message. - * - * @param id UUID Primary Key - * @param messageId Foreign key to the contact message - * @param adminUserId The admin user who replied - * @param replyText Reply content - * @param emailSent Whether an email was sent (for anonymous users) - * @param emailSentAt When the email was sent - * @param createdAt When the reply was created - */ -case class MessageReply( - id: Option[UUID], - messageId: UUID, - adminUserId: UUID, - replyText: String, - emailSent: Boolean, - emailSentAt: Option[LocalDateTime], - createdAt: LocalDateTime -) diff --git a/app/models/domain/user/User.scala b/app/models/domain/user/User.scala deleted file mode 100644 index e2282644..00000000 --- a/app/models/domain/user/User.scala +++ /dev/null @@ -1,15 +0,0 @@ -package models.domain.user - -import java.time.LocalDateTime -import java.util.UUID - -case class User( - id: Option[UUID], - email: Option[String], - did: String, - handle: Option[String], - displayName: Option[String], - createdAt: LocalDateTime, - updatedAt: LocalDateTime, - isActive: Boolean - ) \ No newline at end of file diff --git a/app/models/domain/user/UserPdsInfo.scala b/app/models/domain/user/UserPdsInfo.scala deleted file mode 100644 index 1e781d8c..00000000 --- a/app/models/domain/user/UserPdsInfo.scala +++ /dev/null @@ -1,26 +0,0 @@ -package models.domain.user - -import java.time.LocalDateTime -import java.util.UUID - -/** - * Stores the home PDS information for a user's AT Protocol identity. - * Lives in auth schema as it's primarily used for authentication. - * - * @param id UUID Primary Key - * @param userId Foreign Key to users.id - * @param pdsUrl The resolved PDS endpoint URL (e.g., https://bsky.social) - * @param did The user's DID (e.g., did:plc:xxx) - * @param handle Cached handle for quick lookups (e.g., alice.bsky.social) - * @param createdAt Record creation timestamp - * @param updatedAt Record update timestamp - */ -case class UserPdsInfo( - id: Option[UUID], - userId: UUID, - pdsUrl: String, - did: String, - handle: Option[String] = None, - createdAt: LocalDateTime, - updatedAt: LocalDateTime - ) \ No newline at end of file diff --git a/app/models/forms/Contact.scala b/app/models/forms/Contact.scala deleted file mode 100644 index ee831486..00000000 --- a/app/models/forms/Contact.scala +++ /dev/null @@ -1,67 +0,0 @@ -package models.forms - -import play.api.data.Form -import play.api.data.Forms.{email, mapping, nonEmptyText, text} - -/** - * Contains definitions and utilities for managing contact-related functionality, such as a contact form. - */ -object Contact { - /** - * Data Transfer Object (DTO) representing a contact form submission. - * - * @param name The name of the individual submitting the contact request. This field is required and should not be empty. - * @param email The email address of the individual submitting the request. This field is required and should follow standard email format. - * @param subject The subject of the message. This field is required and serves as a title for the contact request. - * @param message The main content of the contact request. This field is required and provides detailed information. - * @param phoneNumber A honeypot field used for spam prevention. Legitimate submissions should leave this field empty. - */ - case class ContactDTO( - name: String, - email: String, - subject: String, - message: String, - phoneNumber: String // honeypot field - ) - - /** - * Companion object for the ContactDTO case class. - * - * Provides utility methods for working with instances of ContactDTO, including pattern matching - * support via the custom unapply method. The unapply method allows deconstruction of a ContactDTO - * instance into a tuple of its constituent fields - name, email, subject, message, and phoneNumber. - */ - object ContactDTO { - def unapply(dto: ContactDTO): Option[(String, String, String, String, String)] = - Some((dto.name, dto.email, dto.subject, dto.message, dto.phoneNumber)) - } - - /** - * Represents a form definition for capturing and validating contact form submissions. - * - * This form utilizes the Play Framework's `Form` and `mapping` DSL to define a structured input - * schema for `ContactDTO`. Each field is validated based on its requirements: - * - `name`: Must be non-empty and have a length between 1 and 64 characters. - * - `email`: Must be a valid email address. - * - `subject`: Must be non-empty and have a length between 1 and 64 characters. - * - `message`: Must be non-empty and can have a maximum length of 2048 characters. - * - `phoneNumber`: A honeypot field that should be left empty to avoid spam submissions. - * - * The `verifying` function adds a custom validation to check that the `phoneNumber` field is empty. - * If the field is filled, it likely indicates spam, and the form submission will be marked invalid. - */ - val form: Form[ContactDTO] = Form( - mapping( - "name" -> nonEmptyText(1, 64), - "email" -> email, - "subject" -> nonEmptyText(1, 64), - "message" -> nonEmptyText(1, 2048), - "phoneNumber" -> text // honeypot field - )(ContactDTO.apply)(ContactDTO.unapply).verifying( - "Invalid form submission", - fields => fields match { - case dto: ContactDTO => dto.phoneNumber.isEmpty // If honeypot is filled, it's probably spam - } - ) - ) -} diff --git a/app/models/forms/DoiSubmissionForm.scala b/app/models/forms/DoiSubmissionForm.scala deleted file mode 100644 index bfcf9bc8..00000000 --- a/app/models/forms/DoiSubmissionForm.scala +++ /dev/null @@ -1,28 +0,0 @@ -package models.forms - -import play.api.data.Form -import play.api.data.Forms.* - -case class DoiSubmission(doi: String) - -object DoiSubmission { - def apply(doi: String): DoiSubmission = new DoiSubmission(doi) - - def unapply(submission: DoiSubmission): Option[String] = Some(submission.doi) -} - -object DoiSubmissionForm { - val form = Form( - mapping( - "doi" -> nonEmptyText.transform[String]( - _.trim, - identity - ).verifying( - "Invalid DOI format", - doi => doi.matches("^10\\.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$") - ) - )(DoiSubmission.apply)(DoiSubmission.unapply) - ) -} - - diff --git a/app/models/forms/EnaSubmissionForm.scala b/app/models/forms/EnaSubmissionForm.scala deleted file mode 100644 index 2d6d7946..00000000 --- a/app/models/forms/EnaSubmissionForm.scala +++ /dev/null @@ -1,26 +0,0 @@ -package models.forms - -import play.api.data.Form -import play.api.data.Forms.* - -case class EnaAccessionSubmission(accession: String) - -object EnaAccessionSubmission { - def apply(accession: String): EnaAccessionSubmission = new EnaAccessionSubmission(accession) - - def unapply(submission: EnaAccessionSubmission): Option[String] = Some(submission.accession) -} - -object EnaAccessionSubmissionForm { - val form = Form( - mapping( - "accession" -> nonEmptyText.transform[String]( - _.trim, - identity - ).verifying( - "Invalid ENA accession format", - accession => accession.matches("^PRJ[EDN][A-Z]\\d+$") - ) - )(EnaAccessionSubmission.apply)(EnaAccessionSubmission.unapply) - ) -} \ No newline at end of file diff --git a/app/models/forms/PaperSubmission.scala b/app/models/forms/PaperSubmission.scala deleted file mode 100644 index 8cd66eeb..00000000 --- a/app/models/forms/PaperSubmission.scala +++ /dev/null @@ -1,63 +0,0 @@ - -package models.forms - -import play.api.data.Form -import play.api.data.Forms.* - -case class PaperSubmission( - doi: String, - enaAccession: Option[String], - forceRefresh: Boolean) - -object PaperSubmission { - def unapply(submission: PaperSubmission): Option[(String, Option[String], Boolean)] = { - Some((submission.doi, submission.enaAccession, submission.forceRefresh)) - } -} - -object PaperSubmissionForm { - private def extractDoi(input: String): String = { - input.trim match { - case url if url.startsWith("https://doi.org/") => url.substring("https://doi.org/".length) - case url if url.startsWith("http://doi.org/") => url.substring("http://doi.org/".length) - case doi => doi - } - } - - val form = Form( - mapping( - "doi" -> nonEmptyText.transform[String]( - extractDoi, - identity - ).verifying( - "Invalid DOI format", - doi => doi.matches("^10\\.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$") - ), - "enaAccession" -> optional( - text.transform[String]( - _.trim, - identity - ).verifying( - "Invalid study accession format", - accession => accession.isEmpty || - // ENA Projects - accession.matches("^PRJEB\\d+$") || - // ENA SRA - accession.matches("^(ERR|ERX|ERS|ERA|ERZ|ERP)\\d+$") || - // NCBI BioProjects - accession.matches("^PRJNA\\d+$") || - // NCBI SRA - accession.matches("^(SRR|SRX|SRS|SRP)\\d+$") || - // NCBI GenBank/RefSeq - accession.matches("^(NM|NP|XM|XP|NR|XR|WP)_\\d+(\\.\\d+)?$") || - // Traditional GenBank - (accession.matches("^[A-Z]\\d{5}(\\.\\d+)?$") || - accession.matches("^[A-Z]{2}\\d{6}(\\.\\d+)?$") || - accession.matches("^[A-Z]{4}01\\d{6}(\\.\\d+)?$")) - ) - ), - "forceRefresh" -> boolean - )(PaperSubmission.apply)(PaperSubmission.unapply) - ) -} - diff --git a/app/models/view/TreeViewModels.scala b/app/models/view/TreeViewModels.scala deleted file mode 100644 index 7f47ad31..00000000 --- a/app/models/view/TreeViewModels.scala +++ /dev/null @@ -1,57 +0,0 @@ -package models.view - -import models.api.* - -/** - * Represents a tree node after layout calculation, ready for SVG rendering in the view. - * Contains original TreeNodeDTO data plus calculated x, y coordinates. - */ -case class TreeNodeViewModel( - name: String, - variantsCount: Option[Int], - children: List[TreeNodeViewModel], - fillColor: String, - isBackbone: Boolean, - isRecentlyUpdated: Boolean, - formedYbp: Option[Int], - tmrcaYbp: Option[Int], - x: Double, // Calculated vertical position for SVG - y: Double // Calculated horizontal position (depth) for SVG - ) { - /** Format formed date as calendar year (AD/BC) */ - def formedFormatted: Option[String] = formedYbp.map(ybp => formatYbp(ybp)) - - /** Format TMRCA as calendar year (AD/BC) */ - def tmrcaFormatted: Option[String] = tmrcaYbp.map(ybp => formatYbp(ybp)) - - private def formatYbp(ybp: Int): String = { - val year = 1950 - ybp - if (year < 0) s"${-year} BC" else s"$year AD" - } -} - -/** - * Represents a link between two tree nodes, with pre-calculated SVG path data, ready for the view. - */ -case class TreeLinkViewModel( - sourceName: String, // Useful for debugging or associating with original nodes - targetName: String, - pathData: String // The 'd' attribute for SVG - ) - -/** - * The top-level View Model holding all data needed to render the entire SVG tree. - * - * @param rootNode The root of the tree hierarchy in view model format. - * @param allNodes A flat list of all nodes in view model format for easier iteration. - * @param allLinks A flat list of all links in view model format. - * @param svgWidth Calculated width for the SVG viewport. - * @param svgHeight Calculated height for the SVG viewport. - */ -case class TreeViewModel( - rootNode: TreeNodeViewModel, - allNodes: Seq[TreeNodeViewModel], - allLinks: Seq[TreeLinkViewModel], - svgWidth: Double, - svgHeight: Double - ) \ No newline at end of file diff --git a/app/modules/ApiSecurityModule.scala b/app/modules/ApiSecurityModule.scala deleted file mode 100644 index d9aec5fd..00000000 --- a/app/modules/ApiSecurityModule.scala +++ /dev/null @@ -1,39 +0,0 @@ -package modules - -import actions.{ApiSecurityAction, DevelopmentSecureApiAction, ProductionSecureApiAction} -import com.google.inject.{AbstractModule, Singleton} -import play.api.{Configuration, Environment, Logging, Mode} - -/** - * A Guice module for configuring API security action bindings based on the application mode. - * - * The `ApiSecurityModule` extends `AbstractModule`, enabling dependency injection for handling - * secure API actions. Depending on the environment mode, it binds the `ApiSecurityAction` interface - * to either a production or development implementation: - * - * - In `Mode.Prod`, binds `ApiSecurityAction` to `ProductionSecureApiAction` - * for enforcing strict API key validation. - * - In other modes, binds `ApiSecurityAction` to `DevelopmentSecureApiAction` - * where API key validation is disabled for ease of local development. - * - * The module uses the application's environment to determine the appropriate binding, - * enabling seamless toggling of security configurations between production and development. - */ -class ApiSecurityModule(environment: Environment, configuration: Configuration) extends AbstractModule with Logging { - override def configure(): Unit = { - environment.mode match { - case Mode.Prod => - logger.info("Binding ProductionSecureApiAction for API security") - bind(classOf[ApiSecurityAction]) - .to(classOf[ProductionSecureApiAction]) - .in(classOf[Singleton]) - - case _ => - logger.info("Binding DevelopmentSecureApiAction for API security (API key validation disabled)") - bind(classOf[ApiSecurityAction]) - .to(classOf[DevelopmentSecureApiAction]) - .in(classOf[Singleton]) - } - } -} - diff --git a/app/modules/ApplicationModule.scala b/app/modules/ApplicationModule.scala deleted file mode 100644 index cb372734..00000000 --- a/app/modules/ApplicationModule.scala +++ /dev/null @@ -1,22 +0,0 @@ -package modules - -import actors.{GenomicStudyUpdateActor, MatchDiscoveryActor, PublicationUpdateActor, VariantExportActor, YBrowseVariantUpdateActor} -import com.google.inject.AbstractModule -import play.api.libs.concurrent.PekkoGuiceSupport -import services.ibd.{MatchDiscoveryService, MatchDiscoveryServiceImpl, PopulationAnalysisService, PopulationAnalysisServiceImpl} - -class ApplicationModule extends AbstractModule with PekkoGuiceSupport { - override def configure(): Unit = { - bindActor[PublicationUpdateActor]("publication-update-actor") - bindActor[GenomicStudyUpdateActor]("genomic-study-update-actor") - bindActor[actors.PublicationDiscoveryActor]("publication-discovery-actor") - bindActor[YBrowseVariantUpdateActor]("ybrowse-variant-update-actor") - bindActor[VariantExportActor]("variant-export-actor") - bindActor[MatchDiscoveryActor]("match-discovery-actor") - - bind(classOf[PopulationAnalysisService]).to(classOf[PopulationAnalysisServiceImpl]) - bind(classOf[MatchDiscoveryService]).to(classOf[MatchDiscoveryServiceImpl]) - - bind(classOf[Scheduler]).asEagerSingleton() - } -} \ No newline at end of file diff --git a/app/modules/BaseModule.scala b/app/modules/BaseModule.scala deleted file mode 100644 index 7b1a20a7..00000000 --- a/app/modules/BaseModule.scala +++ /dev/null @@ -1,228 +0,0 @@ -package modules - -import com.google.inject.AbstractModule -import repositories.* -import services.{AccessionNumberGenerator, BiosampleAccessionGenerator} -import startup.SecurityStartupCheck - -/** - * A Guice module for configuring bindings between repository interfaces and their concrete implementations. - * - * This class extends `AbstractModule` and defines the dependency injection setup for various repository - * interfaces used in the application. All bindings are configured using the `bind(...).to(...)` syntax, where - * each interface is mapped to its corresponding implementation. - * - * This module ensures that instances of the respective interfaces are automatically injected with their - * implementations wherever needed in the application, promoting loose coupling and easier testing. - */ -class BaseModule extends AbstractModule { - override def configure(): Unit = { - // Eagerly bind security startup check so it runs at boot - bind(classOf[SecurityStartupCheck]).asEagerSingleton() - bind(classOf[BiosampleRepository]) - .to(classOf[BiosampleRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[PublicationRepository]).to(classOf[PublicationRepositoryImpl]) - bind(classOf[PublicationCandidateRepository]).to(classOf[PublicationCandidateRepositoryImpl]) - bind(classOf[PublicationSearchConfigRepository]).to(classOf[PublicationSearchConfigRepositoryImpl]) - bind(classOf[PublicationSearchRunRepository]).to(classOf[PublicationSearchRunRepositoryImpl]) - bind(classOf[PublicationBiosampleRepository]).to(classOf[PublicationBiosampleRepositoryImpl]) - - bind(classOf[RoleRepository]).asEagerSingleton() - bind(classOf[UserRoleRepository]).asEagerSingleton() - - bind(classOf[GenbankContigRepository]).to(classOf[GenbankContigRepositoryImpl]) - bind(classOf[VariantV2Repository]).to(classOf[VariantV2RepositoryImpl]) - bind(classOf[HaplogroupCoreRepository]).to(classOf[HaplogroupCoreRepositoryImpl]) - bind(classOf[HaplogroupRelationshipRepository]).to(classOf[HaplogroupRelationshipRepositoryImpl]) - bind(classOf[HaplogroupRevisionMetadataRepository]).to(classOf[HaplogroupRevisionMetadataRepositoryImpl]) - bind(classOf[HaplogroupRevisionRepository]).to(classOf[HaplogroupRevisionRepositoryImpl]) - bind(classOf[HaplogroupVariantMetadataRepository]).to(classOf[HaplogroupVariantMetadataRepositoryImpl]) - bind(classOf[HaplogroupVariantRepository]).to(classOf[HaplogroupVariantRepositoryImpl]) - - bind(classOf[services.TestTypeService]) - .to(classOf[services.TestTypeServiceImpl]) - .asEagerSingleton() - - bind(classOf[GenomicStudyRepository]) - .to(classOf[GenomicStudyRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[PublicationGenomicStudyRepository]) - .to(classOf[PublicationGenomicStudyRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[BiosampleOriginalHaplogroupRepository]) - .to(classOf[BiosampleOriginalHaplogroupRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[InstrumentObservationRepository]) - .to(classOf[InstrumentObservationRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[InstrumentProposalRepository]) - .to(classOf[InstrumentProposalRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[TestTypeTargetRegionRepository]) - .to(classOf[TestTypeTargetRegionRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[SequenceFileRepository]) - .to(classOf[SequenceFileRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[SequenceLibraryRepository]) - .to(classOf[SequenceLibraryRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[AccessionNumberGenerator]) - .to(classOf[BiosampleAccessionGenerator]) - .asEagerSingleton() - - bind(classOf[SpecimenDonorRepository]) - .to(classOf[SpecimenDonorRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[SequencerInstrumentRepository]).to(classOf[SequencerInstrumentRepositoryImpl]) - - bind(classOf[SequencingLabRepository]) - .to(classOf[SequencingLabRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[CitizenBiosampleRepository]) - .to(classOf[CitizenBiosampleRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[ProjectRepository]) - .to(classOf[ProjectRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[PublicationCitizenBiosampleRepository]) - .to(classOf[PublicationCitizenBiosampleRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[AlignmentRepository]) - .to(classOf[AlignmentRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[TestTypeRepository]) - .to(classOf[TestTypeRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[PopulationBreakdownRepository]) - .to(classOf[PopulationBreakdownRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[GenotypeDataRepository]) - .to(classOf[GenotypeDataRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[HaplogroupReconciliationRepository]) - .to(classOf[HaplogroupReconciliationRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[GenomeRegionsRepository]) - .to(classOf[GenomeRegionsRepositoryImpl]) - .asEagerSingleton() - - // Discovery System - bind(classOf[PrivateVariantRepository]) - .to(classOf[PrivateVariantRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[ProposedBranchRepository]) - .to(classOf[ProposedBranchRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[CuratorActionRepository]) - .to(classOf[CuratorActionRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[BiosampleHaplogroupRepository]) - .to(classOf[BiosampleHaplogroupRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[BiosampleCallableLociRepository]) - .to(classOf[BiosampleCallableLociRepositoryImpl]) - .asEagerSingleton() - - // STR age estimation - bind(classOf[StrMutationRateRepository]) - .to(classOf[StrMutationRateRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[BiosampleVariantCallRepository]) - .to(classOf[BiosampleVariantCallRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[HaplogroupCharacterStateRepository]) - .to(classOf[HaplogroupCharacterStateRepositoryImpl]) - .asEagerSingleton() - - // Age estimation — anchors and ancestral STR motifs - bind(classOf[GenealogicalAnchorRepository]) - .to(classOf[GenealogicalAnchorRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[HaplogroupAncestralStrRepository]) - .to(classOf[HaplogroupAncestralStrRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[CoverageExpectationProfileRepository]) - .to(classOf[CoverageExpectationProfileRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[GroupProjectRepository]) - .to(classOf[GroupProjectRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[GroupProjectMemberRepository]) - .to(classOf[GroupProjectMemberRepositoryImpl]) - .asEagerSingleton() - - // PDS Fleet Management - bind(classOf[PdsNodeRepository]) - .to(classOf[PdsNodeRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[PdsHeartbeatLogRepository]) - .to(classOf[PdsHeartbeatLogRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[PdsFleetConfigRepository]) - .to(classOf[PdsFleetConfigRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[PdsSubmissionRepository]) - .to(classOf[PdsSubmissionRepositoryImpl]) - .asEagerSingleton() - - // Billing / Patronage - bind(classOf[PatronSubscriptionRepository]) - .to(classOf[PatronSubscriptionRepositoryImpl]) - .asEagerSingleton() - - // IBD Match Discovery - bind(classOf[MatchSuggestionRepository]) - .to(classOf[MatchSuggestionRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[PopulationBreakdownCacheRepository]) - .to(classOf[PopulationBreakdownCacheRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[PopulationOverlapScoreRepository]) - .to(classOf[PopulationOverlapScoreRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[MatchRequestTrackingRepository]) - .to(classOf[MatchRequestTrackingRepositoryImpl]) - .asEagerSingleton() - - bind(classOf[MatchConsentTrackingRepository]) - .to(classOf[MatchConsentTrackingRepositoryImpl]) - .asEagerSingleton() - } -} diff --git a/app/modules/PDSRegistrationModule.scala b/app/modules/PDSRegistrationModule.scala deleted file mode 100644 index b7f23bed..00000000 --- a/app/modules/PDSRegistrationModule.scala +++ /dev/null @@ -1,13 +0,0 @@ -package modules - -import com.google.inject.AbstractModule -import repositories.PDSRegistrationRepository -import services.{ATProtocolClient, PDSRegistrationService} - -class PDSRegistrationModule extends AbstractModule { - override def configure(): Unit = { - bind(classOf[ATProtocolClient]).asEagerSingleton() - bind(classOf[PDSRegistrationRepository]).asEagerSingleton() - bind(classOf[PDSRegistrationService]).asEagerSingleton() - } -} diff --git a/app/modules/RecaptchaModule.scala b/app/modules/RecaptchaModule.scala deleted file mode 100644 index 59c735c5..00000000 --- a/app/modules/RecaptchaModule.scala +++ /dev/null @@ -1,30 +0,0 @@ -package modules - -import com.google.inject.{AbstractModule, Provides} -import com.nappin.play.recaptcha.{RecaptchaSettings, RecaptchaVerifier, ResponseParser} -import play.api.libs.ws.WSClient -import play.api.{Configuration, Environment} -import utils.MockRecaptchaVerifier - -import javax.inject.Singleton -import scala.concurrent.ExecutionContext - -class RecaptchaModule(environment: Environment, configuration: Configuration) extends AbstractModule { - override def configure(): Unit = {} - - @Provides - @Singleton - def provideRecaptchaVerifier( - settings: RecaptchaSettings, - parser: ResponseParser, - wsClient: WSClient, - configuration: Configuration - )(implicit ec: ExecutionContext): RecaptchaVerifier = { - val enableRecaptcha = configuration.getOptional[Boolean]("recaptcha.enable").getOrElse(false) - if (enableRecaptcha) { - new RecaptchaVerifier(settings, parser, wsClient) - } else { - new MockRecaptchaVerifier(settings, parser, wsClient) - } - } -} diff --git a/app/modules/Scheduler.scala b/app/modules/Scheduler.scala deleted file mode 100644 index aadd2634..00000000 --- a/app/modules/Scheduler.scala +++ /dev/null @@ -1,70 +0,0 @@ -package modules - -import actors.PublicationUpdateActor.UpdateAllPublications -import actors.{MatchDiscoveryActor, VariantExportActor, YBrowseVariantUpdateActor} -import jakarta.inject.{Inject, Named, Singleton} -import org.apache.pekko.actor.{ActorRef, ActorSystem} -import org.apache.pekko.extension.quartz.QuartzSchedulerExtension -import play.api.Logging - -/** - * Schedules the various background jobs using Pekko Quartz. - * This class is eager-loaded to ensure jobs are scheduled on application startup. - */ -@Singleton -class Scheduler @Inject()( - system: ActorSystem, - @Named("publication-update-actor") publicationUpdateActor: ActorRef, - @Named("publication-discovery-actor") publicationDiscoveryActor: ActorRef, - @Named("ybrowse-variant-update-actor") ybrowseVariantUpdateActor: ActorRef, - @Named("variant-export-actor") variantExportActor: ActorRef, - @Named("match-discovery-actor") matchDiscoveryActor: ActorRef - ) extends Logging { - - private val quartz = QuartzSchedulerExtension(system) - - // Schedule the PublicationUpdater job - try { - quartz.schedule("PublicationUpdater", publicationUpdateActor, UpdateAllPublications) - logger.info("Successfully scheduled 'PublicationUpdater' job to send UpdateAllPublications message.") - } catch { - case e: Exception => - logger.error(s"Failed to schedule 'PublicationUpdater' job: ${e.getMessage}", e) - } - - // Schedule the PublicationDiscovery job - try { - quartz.schedule("PublicationDiscovery", publicationDiscoveryActor, actors.PublicationDiscoveryActor.RunDiscovery) - logger.info("Successfully scheduled 'PublicationDiscovery' job.") - } catch { - case e: Exception => - logger.error(s"Failed to schedule 'PublicationDiscovery' job: ${e.getMessage}", e) - } - - // Schedule the YBrowseVariantUpdate job - try { - quartz.schedule("YBrowseVariantUpdate", ybrowseVariantUpdateActor, YBrowseVariantUpdateActor.RunUpdate) - logger.info("Successfully scheduled 'YBrowseVariantUpdate' job.") - } catch { - case e: Exception => - logger.error(s"Failed to schedule 'YBrowseVariantUpdate' job: ${e.getMessage}", e) - } - - // Schedule the VariantExport job - try { - quartz.schedule("VariantExport", variantExportActor, VariantExportActor.RunExport) - logger.info("Successfully scheduled 'VariantExport' job.") - } catch { - case e: Exception => - logger.error(s"Failed to schedule 'VariantExport' job: ${e.getMessage}", e) - } - - // Schedule the MatchDiscovery job - try { - quartz.schedule("MatchDiscovery", matchDiscoveryActor, MatchDiscoveryActor.RunDiscovery) - logger.info("Successfully scheduled 'MatchDiscovery' job.") - } catch { - case e: Exception => - logger.error(s"Failed to schedule 'MatchDiscovery' job: ${e.getMessage}", e) - } -} \ No newline at end of file diff --git a/app/modules/ServicesModule.scala b/app/modules/ServicesModule.scala deleted file mode 100644 index 8a34f08f..00000000 --- a/app/modules/ServicesModule.scala +++ /dev/null @@ -1,47 +0,0 @@ -package modules - -import com.google.inject.AbstractModule -import play.api.Mode.Prod -import play.api.{Configuration, Environment, Mode} -import services.genomics.{SpecimenDonorService, SpecimenDonorServiceImpl} -import services.{AwsSesEmailService, EmailService, LoggingEmailService, TreeVersioningService, TreeVersioningServiceImpl} -import repositories.{TreeVersioningRepository, TreeVersioningRepositoryImpl, WipTreeRepository, WipTreeRepositoryImpl} - -class ServicesModule(environment: Environment, configuration: Configuration) extends AbstractModule { - override def configure(): Unit = { - val emailService = environment.mode match { - case Prod => classOf[AwsSesEmailService] - case _ => classOf[LoggingEmailService] - } - - bind(classOf[EmailService]).to(emailService) - - bind(classOf[SpecimenDonorService]) - - .to(classOf[SpecimenDonorServiceImpl]) - - .asEagerSingleton() - - - bind(classOf[repositories.CitizenSequenceRepository]) - - .to(classOf[repositories.SlickCitizenSequenceRepository]) - - bind(classOf[services.PublicationDiscoveryService]).asEagerSingleton() - bind(classOf[services.UserPermissionHelper]).asEagerSingleton() - bind(classOf[services.TreeMergeStagingHelper]).asEagerSingleton() - bind(classOf[services.HaplogroupTreeMergeService]).asEagerSingleton() - - // Biosample Domain Facade - bind(classOf[services.BiosampleDomainService]).asEagerSingleton() - - // Tree Versioning System - bind(classOf[TreeVersioningRepository]).to(classOf[TreeVersioningRepositoryImpl]) - bind(classOf[TreeVersioningService]).to(classOf[TreeVersioningServiceImpl]).asEagerSingleton() - - // WIP Shadow Tables for staging merge changes - bind(classOf[WipTreeRepository]).to(classOf[WipTreeRepositoryImpl]) - } -} - - \ No newline at end of file diff --git a/app/modules/StartupModule.scala b/app/modules/StartupModule.scala deleted file mode 100644 index dca57f7f..00000000 --- a/app/modules/StartupModule.scala +++ /dev/null @@ -1,51 +0,0 @@ -package modules - -import jakarta.inject.Inject -import play.api.Logging -import play.api.inject.{ApplicationLifecycle, SimpleModule, bind} -import services.TreeInitializationService - -import scala.concurrent.{ExecutionContext, Future} - -class StartupModule extends SimpleModule(bind[StartupService].toSelf.eagerly()) - -/** - * Service responsible for executing startup tasks necessary for the application. - * - * The `StartupService` initializes and imports tree data (e.g., haplogroup trees) at application startup by invoking - * the corresponding methods from the `TreeInitializationService`. Initialization ensures any missing tree data - * is populated from external files if required. This process is logged to provide visibility into its success or failure. - * - * Dependencies required by the service are injected and include: - * - * @param treeInitService `TreeInitializationService` responsible for managing tree data initialization. - * @param lifecycle `ApplicationLifecycle` for managing application lifecycle hooks (e.g., startup and shutdown). - * @param ec Implicit `ExecutionContext` for handling asynchronous operations. - * - * The initialization process is executed asynchronously. Logging is provided to report the status of the initialization - * for each tree type (e.g., indicating whether data was successfully imported, skipped, or encountered errors). - * If the initialization process fails, the error is logged. - */ -class StartupService @Inject()( - treeInitService: TreeInitializationService, - lifecycle: ApplicationLifecycle, - )(implicit ec: ExecutionContext) extends Logging { - logger.info("StartupService: Application is starting...") - - treeInitService.initializeIfNeeded().map { results => - results.foreach { case (treeType, wasImported) => - if (wasImported) { - logger.info(s"$treeType tree data successfully imported") - } else { - logger.info(s"$treeType tree import skipped - either already populated or file missing") - } - } - }.recover { - case ex => logger.error("Failed during tree initialization", ex) - } - - lifecycle.addStopHook { () => - logger.info("StartupService: Application is shutting down...") - Future.successful(()) - } -} diff --git a/app/repositories/AlignmentRepository.scala b/app/repositories/AlignmentRepository.scala deleted file mode 100644 index e583f77b..00000000 --- a/app/repositories/AlignmentRepository.scala +++ /dev/null @@ -1,116 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.genomics.{AlignmentMetadata, EmbeddedCoverage, MetricLevel} -import play.api.db.slick.DatabaseConfigProvider -import play.api.libs.json.Json - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository interface for managing alignment metadata and coverage statistics. - */ -trait AlignmentRepository { - def createMetadata(metadata: AlignmentMetadata): Future[AlignmentMetadata] - - def updateCoverage(metadataId: Long, coverage: EmbeddedCoverage): Future[Boolean] - - def findMetadataBySequenceFile(sequenceFileId: Long): Future[Seq[AlignmentMetadata]] - - def findMetadataByContig(genbankContigId: Int, metricLevel: Option[MetricLevel] = None): Future[Seq[AlignmentMetadata]] - - def findMetadataById(metadataId: Long): Future[Option[AlignmentMetadata]] - - def findAllBySequenceFile(sequenceFileId: Long): Future[Seq[AlignmentMetadata]] - - def deleteMetadata(metadataId: Long): Future[Int] - - def findRegionalMetadata(genbankContigId: Int, startPos: Long, endPos: Long): Future[Seq[AlignmentMetadata]] -} - -@Singleton -class AlignmentRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with AlignmentRepository { - - import models.dal.MyPostgresProfile.api.* - - private val metadataTable = DatabaseSchema.domain.genomics.alignmentMetadata - - override def createMetadata(metadata: AlignmentMetadata): Future[AlignmentMetadata] = { - val insertQuery = (metadataTable returning metadataTable.map(_.id) - into ((m, id) => m.copy(id = Some(id)))) - .+=(metadata) - - db.run(insertQuery.transactionally) - } - - override def updateCoverage(metadataId: Long, coverage: EmbeddedCoverage): Future[Boolean] = { - db.run( - metadataTable.filter(_.id === metadataId) - .map(_.coverage) - .update(Some(Json.toJson(coverage))) - ).map(_ > 0) - } - - override def findMetadataBySequenceFile(sequenceFileId: Long): Future[Seq[AlignmentMetadata]] = { - db.run( - metadataTable - .filter(_.sequenceFileId === sequenceFileId) - .result - ) - } - - override def findMetadataByContig(genbankContigId: Int, metricLevel: Option[MetricLevel] = None): Future[Seq[AlignmentMetadata]] = { - val baseQuery = metadataTable.filter(_.genbankContigId === genbankContigId) - val filteredQuery = metricLevel match { - case Some(level) => baseQuery.filter(_.metricLevel === level) - case None => baseQuery - } - - db.run(filteredQuery.result) - } - - override def findMetadataById(metadataId: Long): Future[Option[AlignmentMetadata]] = { - db.run( - metadataTable - .filter(_.id === metadataId) - .result - .headOption - ) - } - - override def findAllBySequenceFile(sequenceFileId: Long): Future[Seq[AlignmentMetadata]] = { - db.run( - metadataTable - .filter(_.sequenceFileId === sequenceFileId) - .result - ) - } - - override def deleteMetadata(metadataId: Long): Future[Int] = { - db.run( - metadataTable - .filter(_.id === metadataId) - .delete - ) - } - - override def findRegionalMetadata(genbankContigId: Int, startPos: Long, endPos: Long): Future[Seq[AlignmentMetadata]] = { - db.run( - metadataTable - .filter(m => - m.genbankContigId === genbankContigId && - m.metricLevel === MetricLevel.REGION && - m.regionStartPos.isDefined && - m.regionEndPos.isDefined && - m.regionStartPos <= endPos && - m.regionEndPos >= startPos - ) - .result - ) - } -} diff --git a/app/repositories/BaseRepository.scala b/app/repositories/BaseRepository.scala deleted file mode 100644 index 3b0d67f6..00000000 --- a/app/repositories/BaseRepository.scala +++ /dev/null @@ -1,213 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.{DatabaseSchema, MyPostgresProfile} -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.dbio.Effect.All -import slick.jdbc.{GetResult, SimpleJdbcAction} - -import java.sql.PreparedStatement -import scala.concurrent.{ExecutionContext, Future} - -/** - * The BaseRepository class provides an abstract layer for accessing the database - * using the MyPostgresProfile and Slick. It includes various helper methods for - * common database operations, such as query execution, pagination, raw SQL execution, - * and more. It serves as a base class for other repository implementations. - * - * Constructor Parameters: - * - * @param dbConfigProvider Injected DatabaseConfigProvider that provides the database - * configuration for Slick. - * @param ec Implicit ExecutionContext for handling asynchronous database operations. - * - * Features: - * - Provides access to Slick's database configuration and API. - * - Supports transactional and non-transactional queries. - * - Includes pagination utilities for query results. - * - Enables safe execution of raw SQL queries with type mapping. - * - Offers helper methods for executing Common Table Expression (CTE) queries. - * - Allows counting rows with optional filtering and distinct column selection. - * - Supports additional custom functions such as fetching paginated raw SQL results. - * - * The class can be extended to define repositories specific to individual models - * or business entities, facilitating DRY principles by reusing the provided abstraction. - */ -abstract class BaseRepository @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit protected val ec: ExecutionContext) - extends HasDatabaseConfigProvider[MyPostgresProfile] { - - // Required for Slick operators - protected val api = models.dal.MyPostgresProfile.api - - import api.* // This brings === and other operators into scope - - // ============================================================================ - // Query Extension Methods - // ============================================================================ - - /** - * Extension method for optional filtering on Slick queries. - * Applies a filter only when the option has a value. - * - * Usage: - * {{{ - * val filtered = baseQuery - * .filterOpt(maybeType)((row, t) => row.typeColumn === t) - * .filterOpt(maybeStatus)((row, s) => row.status === s) - * }}} - */ - implicit class QueryFilterOps[E, U, C[_]](val query: Query[E, U, C]) { - def filterOpt[V](opt: Option[V])(f: (E, V) => Rep[Boolean]): Query[E, U, C] = - opt match { - case Some(v) => query.filter(e => f(e, v)) - case None => query - } - } - - // Common schema access - protected val schema = DatabaseSchema - - // Common query helpers - protected def runQuery[T](query: DBIO[T]): Future[T] = db.run(query) - - protected def runTransactionally[T](actions: DBIO[T]): Future[T] = { - db.run(actions.transactionally) - } - - // Common pagination helper - protected def paginate[E, U](query: Query[E, U, Seq], page: Int, pageSize: Int): Query[E, U, Seq] = { - query.drop((page - 1) * pageSize).take(pageSize) - } - - // Common raw SQL helper with type safety - protected def rawSQL[T](sql: String)(implicit rconv: GetResult[T]): Future[Seq[T]] = { - runQuery(sql"#$sql".as[T]) - } - - // Common single result helper - protected def single[T](query: Query[_, T, Seq]): Future[Option[T]] = { - runQuery(query.result.headOption) - } - - // New helper methods - protected def paginatedQuery[T]( - baseSQL: String, - page: Int, - pageSize: Int, - params: (String, Any)* - )(implicit rconv: GetResult[T]): Future[Seq[T]] = { - val offset = (page - 1) * pageSize - val paginatedSQL = - s""" - ${baseSQL} - LIMIT $pageSize OFFSET $offset - """ - rawSQL(paginatedSQL) - } - - protected def countQuery( - tableName: String, - whereClause: String = "", - distinctColumn: String = "*" - ): Future[Long] = { - val sql = - s""" - SELECT COUNT(DISTINCT $distinctColumn) - FROM $tableName - ${if (whereClause.nonEmpty) s"WHERE $whereClause" else ""} - """ - rawSQL[Long](sql).map(_.head) - } - - // Helper for WITH queries - protected def withCTE[T]( - cteDefinition: String, - mainQuery: String - )(implicit rconv: GetResult[T]): Future[Seq[T]] = { - val sql = - s""" - WITH $cteDefinition - $mainQuery - """ - rawSQL(sql) - } - - // ============================================================================ - // SimpleDBIO Helpers for PostgreSQL enum/jsonb type handling - // ============================================================================ - - /** - * Execute an UPDATE statement with proper JDBC handling for PostgreSQL enums. - * Returns true if at least one row was affected. - * - * Usage: - * {{{ - * executeUpdate("UPDATE table SET status = CAST(? AS my_enum) WHERE id = ?") { ps => - * ps.setString(1, "VALUE") - * ps.setInt(2, id) - * } - * }}} - */ - protected def executeUpdate(sql: String)(setParams: PreparedStatement => Unit): Future[Boolean] = { - import api.SimpleDBIO - val action = SimpleDBIO[Int] { session => - val ps = session.connection.prepareStatement(sql) - try { - setParams(ps) - ps.executeUpdate() - } finally { - ps.close() - } - } - db.run(action).map(_ > 0) - } - - /** - * Execute an UPDATE statement returning the count of affected rows. - */ - protected def executeUpdateCount(sql: String)(setParams: PreparedStatement => Unit): Future[Int] = { - import api.SimpleDBIO - val action = SimpleDBIO[Int] { session => - val ps = session.connection.prepareStatement(sql) - try { - setParams(ps) - ps.executeUpdate() - } finally { - ps.close() - } - } - db.run(action) - } - - /** - * Execute an INSERT statement with RETURNING clause for PostgreSQL. - * Returns the generated ID. - * - * Usage: - * {{{ - * executeInsertReturningId( - * "INSERT INTO table (col1, col2) VALUES (?, CAST(? AS my_enum)) RETURNING id" - * ) { ps => - * ps.setString(1, "value1") - * ps.setString(2, "ENUM_VALUE") - * } - * }}} - */ - protected def executeInsertReturningId(sql: String)(setParams: PreparedStatement => Unit): Future[Int] = { - import api.SimpleDBIO - val action = SimpleDBIO[Int] { session => - val ps = session.connection.prepareStatement(sql) - try { - setParams(ps) - val rs = ps.executeQuery() - rs.next() - rs.getInt(1) - } finally { - ps.close() - } - } - db.run(action) - } -} diff --git a/app/repositories/BiosampleCallableLociRepository.scala b/app/repositories/BiosampleCallableLociRepository.scala deleted file mode 100644 index 4e6dc2f9..00000000 --- a/app/repositories/BiosampleCallableLociRepository.scala +++ /dev/null @@ -1,45 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.domain.genomics.BiosampleCallableLoci -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait BiosampleCallableLociRepository { - def findBySample(sampleType: String, sampleId: Int, chromosome: String): Future[Option[BiosampleCallableLoci]] - def findBySampleGuid(sampleGuid: UUID, chromosome: String): Future[Option[BiosampleCallableLoci]] - def upsert(loci: BiosampleCallableLoci): Future[Int] - def findAllForSample(sampleType: String, sampleId: Int): Future[Seq[BiosampleCallableLoci]] -} - -class BiosampleCallableLociRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with BiosampleCallableLociRepository - with Logging { - - import models.dal.DatabaseSchema.domain.genomics.biosampleCallableLoci - import models.dal.MyPostgresProfile.api.* - - override def findBySample(sampleType: String, sampleId: Int, chromosome: String): Future[Option[BiosampleCallableLoci]] = - runQuery(biosampleCallableLoci - .filter(r => r.sampleType === sampleType && r.sampleId === sampleId && r.chromosome === chromosome) - .result.headOption) - - override def findBySampleGuid(sampleGuid: UUID, chromosome: String): Future[Option[BiosampleCallableLoci]] = - runQuery(biosampleCallableLoci - .filter(r => r.sampleGuid === sampleGuid && r.chromosome === chromosome) - .result.headOption) - - override def upsert(loci: BiosampleCallableLoci): Future[Int] = - runQuery(biosampleCallableLoci.insertOrUpdate(loci)) - - override def findAllForSample(sampleType: String, sampleId: Int): Future[Seq[BiosampleCallableLoci]] = - runQuery(biosampleCallableLoci - .filter(r => r.sampleType === sampleType && r.sampleId === sampleId) - .result) -} diff --git a/app/repositories/BiosampleHaplogroupRepository.scala b/app/repositories/BiosampleHaplogroupRepository.scala deleted file mode 100644 index 1a19433f..00000000 --- a/app/repositories/BiosampleHaplogroupRepository.scala +++ /dev/null @@ -1,58 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.HaplogroupType -import models.domain.genomics.BiosampleHaplogroup -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait BiosampleHaplogroupRepository { - def findBySampleGuid(sampleGuid: UUID): Future[Option[BiosampleHaplogroup]] - def upsert(bh: BiosampleHaplogroup): Future[Int] - def updateYHaplogroup(sampleGuid: UUID, haplogroupId: Int): Future[Boolean] - def updateMtHaplogroup(sampleGuid: UUID, haplogroupId: Int): Future[Boolean] - def findByHaplogroupId(haplogroupId: Int, haplogroupType: HaplogroupType): Future[Seq[BiosampleHaplogroup]] -} - -class BiosampleHaplogroupRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with BiosampleHaplogroupRepository - with Logging { - - import models.dal.DatabaseSchema.domain.genomics.biosampleHaplogroups - import models.dal.MyPostgresProfile.api.* - - override def findBySampleGuid(sampleGuid: UUID): Future[Option[BiosampleHaplogroup]] = - runQuery(biosampleHaplogroups.filter(_.sampleGuid === sampleGuid).result.headOption) - - override def upsert(bh: BiosampleHaplogroup): Future[Int] = - runQuery(biosampleHaplogroups.insertOrUpdate(bh)) - - override def updateYHaplogroup(sampleGuid: UUID, haplogroupId: Int): Future[Boolean] = - runQuery(biosampleHaplogroups - .filter(_.sampleGuid === sampleGuid) - .map(_.yHaplogroupId) - .update(Some(haplogroupId)) - .map(_ > 0)) - - override def updateMtHaplogroup(sampleGuid: UUID, haplogroupId: Int): Future[Boolean] = - runQuery(biosampleHaplogroups - .filter(_.sampleGuid === sampleGuid) - .map(_.mtHaplogroupId) - .update(Some(haplogroupId)) - .map(_ > 0)) - - override def findByHaplogroupId(haplogroupId: Int, haplogroupType: HaplogroupType): Future[Seq[BiosampleHaplogroup]] = { - haplogroupType match { - case HaplogroupType.Y => - runQuery(biosampleHaplogroups.filter(_.yHaplogroupId === haplogroupId).result) - case HaplogroupType.MT => - runQuery(biosampleHaplogroups.filter(_.mtHaplogroupId === haplogroupId).result) - } - } -} diff --git a/app/repositories/BiosampleOriginalHaplogroupRepository.scala b/app/repositories/BiosampleOriginalHaplogroupRepository.scala deleted file mode 100644 index 7cb16320..00000000 --- a/app/repositories/BiosampleOriginalHaplogroupRepository.scala +++ /dev/null @@ -1,73 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.genomics.{Biosample, OriginalHaplogroupEntry} -import play.api.db.slick.DatabaseConfigProvider -import play.api.libs.json.Json - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository for managing original haplogroup data embedded as JSONB on biosample. - */ -trait BiosampleOriginalHaplogroupRepository { - def findByBiosampleId(biosampleId: Int): Future[Seq[OriginalHaplogroupEntry]] - def findByBiosampleAndPublication(biosampleId: Int, publicationId: Int): Future[Option[OriginalHaplogroupEntry]] - def upsert(biosampleId: Int, entry: OriginalHaplogroupEntry): Future[Boolean] - def delete(biosampleId: Int, publicationId: Int): Future[Boolean] - def deleteAllByBiosampleId(biosampleId: Int): Future[Boolean] -} - -@Singleton -class BiosampleOriginalHaplogroupRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with BiosampleOriginalHaplogroupRepository { - - import models.dal.MyPostgresProfile.api.* - - private val biosamples = DatabaseSchema.domain.genomics.biosamples - - override def findByBiosampleId(biosampleId: Int): Future[Seq[OriginalHaplogroupEntry]] = { - db.run(biosamples.filter(_.id === biosampleId).map(_.originalHaplogroups).result.headOption).map { - case Some(Some(json)) => json.asOpt[Seq[OriginalHaplogroupEntry]].getOrElse(Seq.empty) - case _ => Seq.empty - } - } - - override def findByBiosampleAndPublication(biosampleId: Int, publicationId: Int): Future[Option[OriginalHaplogroupEntry]] = { - findByBiosampleId(biosampleId).map(_.find(_.publicationId == publicationId)) - } - - override def upsert(biosampleId: Int, entry: OriginalHaplogroupEntry): Future[Boolean] = { - findByBiosampleId(biosampleId).flatMap { existing => - val updated = existing.filterNot(_.publicationId == entry.publicationId) :+ entry - db.run( - biosamples.filter(_.id === biosampleId) - .map(_.originalHaplogroups) - .update(Some(Json.toJson(updated))) - ).map(_ > 0) - } - } - - override def delete(biosampleId: Int, publicationId: Int): Future[Boolean] = { - findByBiosampleId(biosampleId).flatMap { existing => - val updated = existing.filterNot(_.publicationId == publicationId) - db.run( - biosamples.filter(_.id === biosampleId) - .map(_.originalHaplogroups) - .update(Some(Json.toJson(updated))) - ).map(_ > 0) - } - } - - override def deleteAllByBiosampleId(biosampleId: Int): Future[Boolean] = { - db.run( - biosamples.filter(_.id === biosampleId) - .map(_.originalHaplogroups) - .update(Some(Json.toJson(Seq.empty[OriginalHaplogroupEntry]))) - ).map(_ > 0) - } -} diff --git a/app/repositories/BiosampleRepository.scala b/app/repositories/BiosampleRepository.scala deleted file mode 100644 index 5e6602ea..00000000 --- a/app/repositories/BiosampleRepository.scala +++ /dev/null @@ -1,455 +0,0 @@ -package repositories - -import com.vividsolutions.jts.geom.Point -import com.vividsolutions.jts.io.WKBReader -import jakarta.inject.{Inject, Singleton} -import models.api.* -import models.dal.domain.genomics.BiosamplesTable -import models.dal.{DatabaseSchema, MyPostgresProfile} -import models.domain.genomics.{Biosample, BiosampleType, SpecimenDonor} -import play.api.db.slick.DatabaseConfigProvider -import play.api.libs.json.Json -import slick.jdbc.{GetResult, SQLActionBuilder, SetParameter} - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Represents a repository interface for managing biosample data. This trait provides methods to interact - * with biosamples and their related data, including fetching, pagination, and counting operations for publications. - */ -trait BiosampleRepository { - /** - * Retrieves a biosample and its associated specimen donor information by the given identifier. - * - * @param id the unique identifier of the biosample to retrieve - * @return a future containing an optional tuple, where the first element is the biosample and the second element - * is an optional specimen donor associated with the biosample - */ - def findById(id: Int): Future[Option[(Biosample, Option[SpecimenDonor])]] - - /** - * Creates a new biosample record. - * - * @param biosample the biosample to create - * @return a future containing the created biosample with its assigned ID - */ - def create(biosample: Biosample): Future[Biosample] - - /** - * Updates specific fields of a biosample - * - * @param biosample The biosample with updated fields - * @return Future[Boolean] indicating success - */ - def update(biosample: Biosample): Future[Boolean] - - /** - * Sets the lock status for a biosample - * - * @param id The ID of the biosample to lock/unlock - * @param locked The desired lock status - * @return Future[Boolean] indicating success - */ - def setLocked(id: Int, locked: Boolean): Future[Boolean] - - /** - * Retrieves a biosample by its accession number. - * - * @param accession the accession number of the biosample - * @return a future containing an optional biosample if found - */ - def findByAccession(accession: String): Future[Option[(Biosample, Option[SpecimenDonor])]] - - - /** - * Retrieves all biosamples associated with a specific publication, including their origin metadata. - * - * @param publicationId the unique identifier of the publication for which biosamples are being queried - * @return a future containing a sequence of biosamples with their origin information - */ - def findBiosamplesWithOriginForPublication(publicationId: Int): Future[Seq[BiosampleWithOrigin]] - - /** - * Retrieves a paginated list of biosamples, including their origin metadata, associated with a specific publication. - * - * @param publicationId the unique identifier of the publication for which biosamples are being queried - * @param page the page number to retrieve, starting from 1 - * @param pageSize the number of items to include on each page - * @return a future containing a sequence of biosamples with their origin information for the specified page - */ - def findPaginatedBiosamplesWithOriginForPublication(publicationId: Int, page: Int, pageSize: Int): Future[Seq[BiosampleWithOrigin]] - - /** - * Counts the number of biosamples associated with a specific publication. - * - * @param publicationId the unique identifier of the publication for which biosamples are being counted - * @return a future containing the count of biosamples linked to the specified publication - */ - def countBiosamplesForPublication(publicationId: Int): Future[Long] - - /** - * Upserts (updates or inserts) multiple biosamples in a single transaction. - * - * @param biosamples sequence of biosamples to upsert - * @return future sequence of the upserted biosamples with their IDs - */ - def upsertMany(biosamples: Seq[Biosample]): Future[Seq[Biosample]] - - /** - * Retrieves all samples with their associated studies and original haplogroup assignments - * - * @return a future containing a sequence of samples with their studies and assignments - */ - def findAllWithStudies(): Future[Seq[SampleWithStudies]] - - def findByAliasOrAccession(query: String): Future[Option[(Biosample, Option[SpecimenDonor])]] - - /** - * Retrieves a biosample by its GUID (Globally Unique Identifier). - * - * @param guid the UUID of the biosample to be retrieved - * @return a future containing an optional biosample if found - */ - def findByGuid(guid: UUID): Future[Option[(Biosample, Option[SpecimenDonor])]] - - def getAllGeoLocations: Future[Seq[(Point, Int)]] - - def delete(id: Int): Future[Boolean] -} - -@Singleton -class BiosampleRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with BiosampleRepository { - - import models.dal.MyPostgresProfile.api.* - - private val biosamplesTable = DatabaseSchema.domain.genomics.biosamples - private val specimenDonorsTable = DatabaseSchema.domain.genomics.specimenDonors - - private def readPoint(pgObj: AnyRef): Option[GeoCoord] = pgObj match { - case null => None - case _ => - try { - val wkbReader = new WKBReader() - val point = wkbReader.read(WKBReader.hexToBytes(pgObj.toString)).asInstanceOf[Point] - Some(GeoCoord(point.getY, point.getX)) - } catch { - case e: Exception => - println(s"Error reading WKB: ${pgObj.toString} - ${e.getMessage}") - None - } - } - - // Helper method to get biosample with its donor - private def getBiosampleWithDonor(query: Query[BiosamplesTable, Biosample, Seq]): Future[Option[(Biosample, Option[SpecimenDonor])]] = { - db.run( - query - .joinLeft(specimenDonorsTable) - .on(_.specimenDonorId === _.id) - .result - .headOption - .map(_.map { case (biosample, donor) => (biosample, donor) }) - ) - } - - protected implicit val getBiosampleWithOriginResult: GetResult[BiosampleWithOrigin] = GetResult(r => - BiosampleWithOrigin( - sampleName = r.nextStringOption(), - accession = r.nextString(), - sampleType = BiosampleType.valueOf(r.nextString()), - sex = r.nextStringOption(), - geoCoord = r.nextObjectOption().flatMap(readPoint), - yDnaHaplogroup = r.nextStringOption(), - mtDnaHaplogroup = r.nextStringOption(), - reads = r.nextIntOption(), - readLen = r.nextIntOption(), - bestFitPopulation = (r.nextStringOption(), r.nextBigDecimalOption(), r.nextStringOption()) match { - case (Some(popName), Some(prob), Some(methodName)) => - Some(PopulationInfo(popName, prob, methodName)) - case _ => None - }, - dateRangeStart = r.nextIntOption(), - dateRangeEnd = r.nextIntOption() - ) - ) - - private def makeBaseQuery(publicationId: Int) = - s""" - WITH best_population AS ( - SELECT aa.sample_guid, - p.population_name, - aa.probability, - am.method_name, - ROW_NUMBER() OVER (PARTITION BY aa.sample_guid ORDER BY aa.probability DESC) as rn - FROM ancestry_analysis aa - JOIN population p ON p.population_id = aa.population_id - JOIN analysis_method am ON am.analysis_method_id = aa.analysis_method_id - ) - SELECT b.alias, - b.sample_accession, - sd.donor_type::text, - sd.sex::text, - sd.geocoord, - boh.original_y_haplogroup AS y_haplogroup_name, - boh.original_mt_haplogroup AS mt_haplogroup_name, - sl.reads, - sl.read_length, - bp.population_name, - bp.probability, - bp.method_name, - sd.date_range_start, - sd.date_range_end - FROM publication_biosample pb - INNER JOIN public.biosample b ON b.id = pb.biosample_id - LEFT JOIN specimen_donor sd ON sd.id = b.specimen_donor_id - LEFT JOIN biosample_original_haplogroup boh ON boh.biosample_id = b.id - AND boh.publication_id = $publicationId - LEFT JOIN sequence_library sl on sl.sample_guid = b.sample_guid - LEFT JOIN best_population bp ON bp.sample_guid = b.sample_guid AND bp.rn = 1 - WHERE pb.publication_id = $publicationId - ORDER BY b.alias - """ - - - override def findById(id: Int): Future[Option[(Biosample, Option[SpecimenDonor])]] = { - getBiosampleWithDonor(biosamplesTable.filter(_.id === id)) - } - - def create(biosample: Biosample): Future[Biosample] = { - val insertQuery = (biosamplesTable returning biosamplesTable.map(_.id) - into ((bs, id) => bs.copy(id = Some(id)))) - .+=(biosample) - - db.run(insertQuery.transactionally) - } - - override def update(biosample: Biosample): Future[Boolean] = { - biosample.id match { - case None => Future.successful(false) - case Some(id) => - db.run( - biosamplesTable - .filter(_.id === id) - .map(b => ( - b.alias, - b.description, - b.centerName, - b.specimenDonorId, - b.sourcePlatform, - b.locked - )) - .update(( - biosample.alias, - biosample.description, - biosample.centerName, - biosample.specimenDonorId, - biosample.sourcePlatform, - biosample.locked - )) - .map(_ > 0) - ) - } - } - - - override def findByAccession(accession: String): Future[Option[(Biosample, Option[SpecimenDonor])]] = { - getBiosampleWithDonor(biosamplesTable.filter(_.sampleAccession === accession)) - } - - override def findBiosamplesWithOriginForPublication(publicationId: Int): Future[Seq[BiosampleWithOrigin]] = { - db.run( - sql"""#${makeBaseQuery(publicationId)}""" - .as[BiosampleWithOrigin] - ) - } - - override def findPaginatedBiosamplesWithOriginForPublication(publicationId: Int, page: Int, pageSize: Int): Future[Seq[BiosampleWithOrigin]] = { - val offset = (page - 1) * pageSize - val paginatedQuery = s"""${makeBaseQuery(publicationId)} LIMIT $pageSize OFFSET $offset""" - db.run( - SQLActionBuilder(paginatedQuery, SetParameter.SetUnit) - .as[BiosampleWithOrigin] - ) - } - - override def countBiosamplesForPublication(publicationId: Int): Future[Long] = { - val query = - s""" - SELECT COUNT(DISTINCT b.id) - FROM publication_biosample pb - INNER JOIN public.biosample b ON b.id = pb.biosample_id - WHERE pb.publication_id = $publicationId - """ - rawSQL[Long](query).map(_.head) - } - - override def setLocked(id: Int, locked: Boolean): Future[Boolean] = { - db.run( - biosamplesTable - .filter(_.id === id) - .map(_.locked) - .update(locked) - .map(_ > 0) - ) - } - - /** - * Upserts multiple biosamples, respecting any existing locks - * Locked samples will not have their metadata updated - * - * @param biosamples sequence of biosamples to upsert - * @return future sequence of the upserted biosamples with their IDs - */ - def upsertMany(biosamples: Seq[Biosample]): Future[Seq[Biosample]] = { - if (biosamples.isEmpty) { - Future.successful(Seq.empty) - } else { - val actions = biosamples.map { biosample => - val query = biosamplesTable.filter(_.sampleAccession === biosample.sampleAccession) - - (for { - existingBiosampleOpt <- query.result.headOption - - result <- existingBiosampleOpt match { - case Some(existing) if existing.locked => - // If locked, return existing record without updates - DBIO.successful(existing) - - case Some(existing) => - // For existing unlocked record, we only update biosample fields - query - .map(b => (b.alias, b.locked)) - .update((biosample.alias, biosample.locked)) - .map(_ => biosample.copy(id = existing.id)) - - case None => - // For new records, insert both biosample and donor if needed - for { - // First handle the specimen donor if it exists - donorId <- biosample.specimenDonorId match { - case Some(id) => DBIO.successful(Some(id)) - case None => DBIO.successful(None) - } - - // Then insert the biosample with the donor reference - biosampleWithId <- (biosamplesTable returning biosamplesTable.map(_.id) - into ((bs, id) => bs.copy(id = Some(id)))) - .+=(biosample.copy(specimenDonorId = donorId)) - } yield biosampleWithId - } - } yield result).transactionally - } - - db.run(DBIO.sequence(actions).transactionally) - } - } - - override def findAllWithStudies(): Future[Seq[SampleWithStudies]] = { - val query = - """ - WITH sample_studies AS ( - SELECT - b.id as biosample_id, - b.alias as sample_name, - b.sample_accession, - sd.sex::text as sex, - sd.geocoord, - COALESCE( - jsonb_agg( - DISTINCT jsonb_build_object( - 'accession', gs.accession, - 'title', gs.title, - 'centerName', gs.center_name, - 'source', gs.source, - 'yHaplogroup', boh.original_y_haplogroup, - 'mtHaplogroup', boh.original_mt_haplogroup, - 'notes', boh.notes - ) - ) FILTER (WHERE gs.accession IS NOT NULL), - '[]'::jsonb - ) as studies - FROM biosample b - LEFT JOIN specimen_donor sd ON sd.id = b.specimen_donor_id - JOIN publication_biosample pb ON pb.biosample_id = b.id - JOIN publication p ON p.id = pb.publication_id - JOIN publication_ena_study pgs ON pgs.publication_id = pb.publication_id - JOIN genomic_studies gs ON gs.id = pgs.genomic_study_id - LEFT JOIN biosample_original_haplogroup boh ON - boh.biosample_id = b.id AND - boh.publication_id = p.id - GROUP BY b.id, b.alias, b.sample_accession, sd.sex, sd.geocoord - ) - SELECT - sample_name, - sample_accession, - sex, - geocoord, - studies::text - FROM sample_studies - ORDER BY sample_accession - """ - - db.run(sql"""#$query""".as[( - Option[String], String, Option[String], - Option[AnyRef], - String - )]).map { results => - results.map { case ( - sampleName, accession, sex, - point, - studiesJson - ) => - val studies = Json.parse(studiesJson).as[List[StudyWithHaplogroups]] - - SampleWithStudies( - sampleName = sampleName, - accession = accession, - sex = sex, - geoCoord = readPoint(point), - studies = studies - ) - } - } - } - - override def findByAliasOrAccession(query: String): Future[Option[(Biosample, Option[SpecimenDonor])]] = { - val byAlias = biosamplesTable.filter(_.alias === query) - val byAccession = biosamplesTable.filter(_.sampleAccession === query) - getBiosampleWithDonor(byAlias union byAccession) - } - - override def findByGuid(guid: UUID): Future[Option[(Biosample, Option[SpecimenDonor])]] = { - getBiosampleWithDonor(biosamplesTable.filter(_.sampleGuid === guid)) - } - - def getAllGeoLocations: Future[Seq[(Point, Int)]] = { - - val query = specimenDonorsTable - - .filter(_.geocoord.isDefined) - - .groupBy(_.geocoord) - - .map { case (point, group) => - - (point.asColumnOf[Point], group.length) - - } - - - db.run(query.result) - - } - - - override def delete(id: Int): Future[Boolean] = { - - db.run(biosamplesTable.filter(_.id === id).delete.map(_ > 0)) - - } - -} \ No newline at end of file diff --git a/app/repositories/BiosampleVariantCallRepository.scala b/app/repositories/BiosampleVariantCallRepository.scala deleted file mode 100644 index bf92525f..00000000 --- a/app/repositories/BiosampleVariantCallRepository.scala +++ /dev/null @@ -1,36 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.domain.genomics.BiosampleVariantCall -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait BiosampleVariantCallRepository { - def findByBiosample(biosampleId: Int): Future[Seq[BiosampleVariantCall]] - def findByBiosampleAndVariants(biosampleId: Int, variantIds: Seq[Int]): Future[Seq[BiosampleVariantCall]] - def findByVariant(variantId: Int): Future[Seq[BiosampleVariantCall]] -} - -class BiosampleVariantCallRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with BiosampleVariantCallRepository - with Logging { - - import models.dal.DatabaseSchema.domain.genomics.biosampleVariantCalls - import models.dal.MyPostgresProfile.api.* - - override def findByBiosample(biosampleId: Int): Future[Seq[BiosampleVariantCall]] = - runQuery(biosampleVariantCalls.filter(_.biosampleId === biosampleId).result) - - override def findByBiosampleAndVariants(biosampleId: Int, variantIds: Seq[Int]): Future[Seq[BiosampleVariantCall]] = - runQuery(biosampleVariantCalls - .filter(r => r.biosampleId === biosampleId && r.variantId.inSet(variantIds)) - .result) - - override def findByVariant(variantId: Int): Future[Seq[BiosampleVariantCall]] = - runQuery(biosampleVariantCalls.filter(_.variantId === variantId).result) -} diff --git a/app/repositories/CitizenBiosampleRepository.scala b/app/repositories/CitizenBiosampleRepository.scala deleted file mode 100644 index 168dcd9b..00000000 --- a/app/repositories/CitizenBiosampleRepository.scala +++ /dev/null @@ -1,113 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.MyPostgresProfile.api.* -import models.dal.{DatabaseSchema, MyPostgresProfile} -import models.domain.genomics.CitizenBiosample -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait CitizenBiosampleRepository { - def create(biosample: CitizenBiosample): Future[CitizenBiosample] - - def findByGuid(guid: UUID): Future[Option[CitizenBiosample]] - - def findByAtUri(atUri: String): Future[Option[CitizenBiosample]] - - def findByAccession(accession: String): Future[Option[CitizenBiosample]] - - /** - * Updates the biosample. - * - * @param biosample The biosample with new values. - * @param expectedAtCid The atCid expected to be currently in the database for this record. - * @return Future[Boolean] true if update succeeded, false otherwise (e.g. record not found or atCid mismatch). - */ - def update(biosample: CitizenBiosample, expectedAtCid: Option[String]): Future[Boolean] - - def softDelete(guid: UUID): Future[Boolean] - - def softDeleteByAtUri(atUri: String): Future[Boolean] -} - -@Singleton -class CitizenBiosampleRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) extends CitizenBiosampleRepository with HasDatabaseConfigProvider[MyPostgresProfile] { - - private val citizenBiosamples = DatabaseSchema.domain.genomics.citizenBiosamples - - override def create(biosample: CitizenBiosample): Future[CitizenBiosample] = { - val insertQuery = (citizenBiosamples returning citizenBiosamples.map(_.id) - into ((bs, id) => bs.copy(id = Some(id)))) += biosample - db.run(insertQuery) - } - - override def findByGuid(guid: UUID): Future[Option[CitizenBiosample]] = { - db.run(citizenBiosamples.filter(b => b.sampleGuid === guid && !b.deleted).result.headOption) - } - - override def findByAtUri(atUri: String): Future[Option[CitizenBiosample]] = { - db.run(citizenBiosamples.filter(b => b.atUri === atUri && !b.deleted).result.headOption) - } - - override def findByAccession(accession: String): Future[Option[CitizenBiosample]] = { - db.run(citizenBiosamples.filter(b => b.accession === accession && !b.deleted).result.headOption) - } - - override def update(biosample: CitizenBiosample, expectedAtCid: Option[String]): Future[Boolean] = { - val query = citizenBiosamples.filter { b => - b.sampleGuid === biosample.sampleGuid && - b.atCid === expectedAtCid - } - - val updateAction = query.map(b => ( - b.atUri, - b.accession, - b.alias, - b.sourcePlatform, - b.collectionDate, - b.sex, - b.geocoord, - b.description, - b.yHaplogroup, - b.mtHaplogroup, - b.atCid, - b.updatedAt, - b.deleted - )).update(( - biosample.atUri, - biosample.accession, - biosample.alias, - biosample.sourcePlatform, - biosample.collectionDate, - biosample.sex, - biosample.geocoord, - biosample.description, - biosample.yHaplogroup, - biosample.mtHaplogroup, - biosample.atCid, - LocalDateTime.now(), - biosample.deleted - )) - - db.run(updateAction.map(_ > 0)) - } - - override def softDelete(guid: UUID): Future[Boolean] = { - val q = citizenBiosamples.filter(_.sampleGuid === guid) - .map(b => (b.deleted, b.updatedAt)) - .update((true, LocalDateTime.now())) - db.run(q.map(_ > 0)) - } - - override def softDeleteByAtUri(atUri: String): Future[Boolean] = { - val q = citizenBiosamples.filter(_.atUri === atUri) - .map(b => (b.deleted, b.updatedAt)) - .update((true, LocalDateTime.now())) - db.run(q.map(_ > 0)) - } -} diff --git a/app/repositories/CitizenSequenceRepository.scala b/app/repositories/CitizenSequenceRepository.scala deleted file mode 100644 index e5065a85..00000000 --- a/app/repositories/CitizenSequenceRepository.scala +++ /dev/null @@ -1,30 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository specifically for fetching the next value from the citizen biosample sequence. - * Extracted to allow for easier testing of dependent services. - */ -trait CitizenSequenceRepository { - def getNextSequence(): Future[Long] -} - -@Singleton -class SlickCitizenSequenceRepository @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends CitizenSequenceRepository - with HasDatabaseConfigProvider[MyPostgresProfile] { - - override def getNextSequence(): Future[Long] = { - // Note: 'citizen_biosample_seq' must exist in the Postgres DB - val query = sql"SELECT nextval('citizen_biosample_seq')".as[Long] - db.run(query).map(_.head) - } -} diff --git a/app/repositories/ContactMessageRepository.scala b/app/repositories/ContactMessageRepository.scala deleted file mode 100644 index 645d6661..00000000 --- a/app/repositories/ContactMessageRepository.scala +++ /dev/null @@ -1,164 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.dal.MyPostgresProfile.api.* -import models.domain.support.{ContactMessage, MessageReply, MessageStatus} -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class ContactMessageRepository @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[JdbcProfile] { - - private val contactMessages = DatabaseSchema.support.contactMessages - private val messageReplies = DatabaseSchema.support.messageReplies - - // Import the MessageStatus mapper from the table - import models.dal.support.ContactMessagesTable - private val tableForMapper = new ContactMessagesTable(null) - implicit val messageStatusMapper: BaseColumnType[MessageStatus] = tableForMapper.messageStatusMapper - - // ===== Contact Messages ===== - - def create(message: ContactMessage): Future[ContactMessage] = { - val messageWithId = message.copy(id = Some(message.id.getOrElse(UUID.randomUUID()))) - db.run((contactMessages returning contactMessages) += messageWithId) - } - - def findById(id: UUID): Future[Option[ContactMessage]] = { - db.run(contactMessages.filter(_.id === id).result.headOption) - } - - def findByUserId(userId: UUID): Future[Seq[ContactMessage]] = { - db.run( - contactMessages - .filter(_.userId === userId) - .sortBy(_.createdAt.desc) - .result - ) - } - - def findAll(status: Option[MessageStatus] = None, limit: Int = 50, offset: Int = 0): Future[Seq[ContactMessage]] = { - val baseQuery = contactMessages.sortBy(_.createdAt.desc) - val filteredQuery = status match { - case Some(s) => baseQuery.filter(_.status === s) - case None => baseQuery - } - db.run(filteredQuery.drop(offset).take(limit).result) - } - - def countByStatus(status: Option[MessageStatus] = None): Future[Int] = { - val baseQuery = contactMessages - val filteredQuery = status match { - case Some(s) => baseQuery.filter(_.status === s) - case None => baseQuery - } - db.run(filteredQuery.length.result) - } - - def updateStatus(id: UUID, status: MessageStatus): Future[Int] = { - db.run( - contactMessages - .filter(_.id === id) - .map(m => (m.status, m.updatedAt)) - .update((status, LocalDateTime.now())) - ) - } - - // ===== Message Replies ===== - - def createReply(reply: MessageReply): Future[MessageReply] = { - val replyWithId = reply.copy(id = Some(reply.id.getOrElse(UUID.randomUUID()))) - db.run((messageReplies returning messageReplies) += replyWithId) - } - - def findRepliesByMessageId(messageId: UUID): Future[Seq[MessageReply]] = { - db.run( - messageReplies - .filter(_.messageId === messageId) - .sortBy(_.createdAt.asc) - .result - ) - } - - def markEmailSent(replyId: UUID): Future[Int] = { - db.run( - messageReplies - .filter(_.id === replyId) - .map(r => (r.emailSent, r.emailSentAt)) - .update((true, Some(LocalDateTime.now()))) - ) - } - - /** - * Gets a message with all its replies. - */ - def findWithReplies(messageId: UUID): Future[Option[(ContactMessage, Seq[MessageReply])]] = { - for { - messageOpt <- findById(messageId) - replies <- findRepliesByMessageId(messageId) - } yield messageOpt.map(m => (m, replies)) - } - - /** - * Update the user's last viewed timestamp for a message. - */ - def updateUserLastViewed(messageId: UUID): Future[Int] = { - db.run( - contactMessages - .filter(_.id === messageId) - .map(_.userLastViewedAt) - .update(Some(LocalDateTime.now())) - ) - } - - /** - * Update the user's last viewed timestamp for all their messages. - */ - def updateUserLastViewedAll(userId: UUID): Future[Int] = { - db.run( - contactMessages - .filter(_.userId === userId) - .map(_.userLastViewedAt) - .update(Some(LocalDateTime.now())) - ) - } - - // ===== Badge Count Methods ===== - - /** - * Count new/unread messages for admin badge. - * Returns count of messages with status 'new' or 'read' (not yet replied/closed). - */ - def countUnreadForAdmin: Future[Int] = { - db.run( - contactMessages - .filter(m => m.status === MessageStatus.New || m.status === MessageStatus.Read) - .length - .result - ) - } - - /** - * Count messages with unread replies for a specific user. - * A message has unread replies if: - * - It has at least one reply - * - The latest reply was created after userLastViewedAt (or userLastViewedAt is null) - */ - def countUnreadRepliesForUser(userId: UUID): Future[Int] = { - // Use Slick query with subquery for this logic - val messagesWithNewReplies = for { - msg <- contactMessages if msg.userId === userId - reply <- messageReplies if reply.messageId === msg.id - if msg.userLastViewedAt.isEmpty || reply.createdAt > msg.userLastViewedAt - } yield msg.id - - db.run(messagesWithNewReplies.distinct.length.result) - } -} diff --git a/app/repositories/CookieConsentRepository.scala b/app/repositories/CookieConsentRepository.scala deleted file mode 100644 index 2623ace6..00000000 --- a/app/repositories/CookieConsentRepository.scala +++ /dev/null @@ -1,90 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.dal.MyPostgresProfile.api.* -import models.domain.auth.CookieConsent -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class CookieConsentRepository @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[JdbcProfile] { - - private val cookieConsents = DatabaseSchema.auth.cookieConsents - - /** - * Records a new cookie consent. - */ - def create(consent: CookieConsent): Future[CookieConsent] = { - val consentWithId = consent.copy(id = Some(consent.id.getOrElse(UUID.randomUUID()))) - db.run((cookieConsents returning cookieConsents) += consentWithId) - } - - /** - * Finds the most recent consent for a logged-in user. - */ - def findByUserId(userId: UUID): Future[Option[CookieConsent]] = { - db.run( - cookieConsents - .filter(_.userId === userId) - .sortBy(_.consentTimestamp.desc) - .result - .headOption - ) - } - - /** - * Finds the most recent consent for a session (anonymous user). - */ - def findBySessionId(sessionId: String): Future[Option[CookieConsent]] = { - db.run( - cookieConsents - .filter(_.sessionId === sessionId) - .sortBy(_.consentTimestamp.desc) - .result - .headOption - ) - } - - /** - * Checks if a user has given consent for the current policy version. - */ - def hasValidConsent(userId: UUID, policyVersion: String): Future[Boolean] = { - db.run( - cookieConsents - .filter(c => c.userId === userId && c.policyVersion === policyVersion && c.consentGiven) - .exists - .result - ) - } - - /** - * Checks if a session has given consent for the current policy version. - */ - def hasValidConsentBySession(sessionId: String, policyVersion: String): Future[Boolean] = { - db.run( - cookieConsents - .filter(c => c.sessionId === sessionId && c.policyVersion === policyVersion && c.consentGiven) - .exists - .result - ) - } - - /** - * Links anonymous session consent to a user after login. - */ - def linkSessionToUser(sessionId: String, userId: UUID): Future[Int] = { - db.run( - cookieConsents - .filter(c => c.sessionId === sessionId && c.userId.isEmpty) - .map(_.userId) - .update(Some(userId)) - ) - } -} diff --git a/app/repositories/CoverageExpectationProfileRepository.scala b/app/repositories/CoverageExpectationProfileRepository.scala deleted file mode 100644 index 5f3d6db7..00000000 --- a/app/repositories/CoverageExpectationProfileRepository.scala +++ /dev/null @@ -1,59 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.DatabaseSchema -import models.domain.genomics.CoverageExpectationProfile -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait CoverageExpectationProfileRepository { - def findByTestTypeId(testTypeId: Int): Future[Seq[CoverageExpectationProfile]] - def findByTestTypeAndContig(testTypeId: Int, contigName: String): Future[Seq[CoverageExpectationProfile]] - def findByTestTypeContigAndClass(testTypeId: Int, contigName: String, variantClass: String): Future[Option[CoverageExpectationProfile]] - def create(profile: CoverageExpectationProfile): Future[CoverageExpectationProfile] - def update(profile: CoverageExpectationProfile): Future[Boolean] - def delete(id: Int): Future[Boolean] -} - -class CoverageExpectationProfileRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with CoverageExpectationProfileRepository - with Logging { - - import models.dal.MyPostgresProfile.api.* - - private val profiles = DatabaseSchema.domain.genomics.coverageExpectationProfiles - - override def findByTestTypeId(testTypeId: Int): Future[Seq[CoverageExpectationProfile]] = - runQuery(profiles.filter(_.testTypeId === testTypeId).result) - - override def findByTestTypeAndContig(testTypeId: Int, contigName: String): Future[Seq[CoverageExpectationProfile]] = - runQuery(profiles.filter(p => p.testTypeId === testTypeId && p.contigName === contigName).result) - - override def findByTestTypeContigAndClass(testTypeId: Int, contigName: String, variantClass: String): Future[Option[CoverageExpectationProfile]] = - runQuery(profiles.filter(p => - p.testTypeId === testTypeId && p.contigName === contigName && p.variantClass === variantClass - ).result.headOption) - - override def create(profile: CoverageExpectationProfile): Future[CoverageExpectationProfile] = - runQuery( - (profiles returning profiles.map(_.id) into ((p, id) => p.copy(id = Some(id)))) += profile - ) - - override def update(profile: CoverageExpectationProfile): Future[Boolean] = profile.id match { - case None => Future.successful(false) - case Some(id) => - runQuery( - profiles.filter(_.id === id) - .map(p => (p.minDepthHigh, p.minDepthMedium, p.minDepthLow, p.minCoveragePct, p.minMappingQuality, p.minCallablePct, p.notes)) - .update((profile.minDepthHigh, profile.minDepthMedium, profile.minDepthLow, profile.minCoveragePct, profile.minMappingQuality, profile.minCallablePct, profile.notes)) - ).map(_ > 0) - } - - override def delete(id: Int): Future[Boolean] = - runQuery(profiles.filter(_.id === id).delete).map(_ > 0) -} diff --git a/app/repositories/CoverageRepository.scala b/app/repositories/CoverageRepository.scala deleted file mode 100644 index 9cddfece..00000000 --- a/app/repositories/CoverageRepository.scala +++ /dev/null @@ -1,143 +0,0 @@ -package repositories - -import models.dal.DatabaseSchema -import models.domain.genomics.{CoverageBenchmark, SequencingLab} -import play.api.db.slick.DatabaseConfigProvider -import models.dal.MyPostgresProfile.api.* - -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository for coverage-related database operations. - * Coverage data is now embedded as JSONB on alignment_metadata. - */ -@Singleton -class CoverageRepository @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(using ec: ExecutionContext) extends BaseRepository(dbConfigProvider) { - - def getBenchmarkStatistics: Future[Seq[CoverageBenchmark]] = { - val query = sql""" - SELECT lab, - ttd.code as test_type, - gc.common_name as contig, - avg(read_length) as mean_read_len, - min(read_length) as min_read_len, - max(read_length) as max_read_len, - avg(insert_size) as mean_insert_len, - min(insert_size) as min_insert_len, - max(insert_size) as max_insert_len, - avg((am.coverage->>'meanDepth')::double precision) as mean_depth_avg, - stddev((am.coverage->>'meanDepth')::double precision) as mean_depth_stddev, - avg((am.coverage->>'basesNoCoverage')::double precision) as bases_no_coverage_avg, - stddev((am.coverage->>'basesNoCoverage')::double precision) as bases_no_coverage_stddev, - avg((am.coverage->>'basesLowQualityMapping')::double precision) as bases_low_qual_mapping_avg, - stddev((am.coverage->>'basesLowQualityMapping')::double precision) as bases_low_qual_mapping_stddev, - avg((am.coverage->>'basesCallable')::double precision) as bases_callable_avg, - stddev((am.coverage->>'basesCallable')::double precision) as bases_callable_stddev, - avg((am.coverage->>'meanMappingQuality')::double precision) as mean_mapping_quality, - count(1) as num_samples - FROM alignment_metadata am - JOIN public.genbank_contig gc ON am.genbank_contig_id = gc.genbank_contig_id - JOIN public.sequence_file sf ON am.sequence_file_id = sf.id - JOIN public.sequence_library sl ON sl.id = sf.library_id - JOIN public.test_type_definition ttd ON sl.test_type_id = ttd.id - WHERE am.coverage IS NOT NULL - GROUP BY lab, ttd.code, gc.common_name - ORDER BY lab, ttd.code, gc.common_name - """.as[(String, String, String, Option[Double], Option[Int], Option[Int], Option[Double], - Option[Int], Option[Int], Option[Double], Option[Double], Option[Double], - Option[Double], Option[Double], Option[Double], Option[Double], Option[Double], - Option[Double], Int)] - - db.run(query).map { results => - mapStatistics(results) - } - } - - def getAllLabs: Future[Seq[SequencingLab]] = db.run { - DatabaseSchema.domain.genomics.sequencingLabs - .sortBy(_.name) - .result - .map(_.map { row => - SequencingLab( - id = row.id, - name = row.name, - isD2c = row.isD2c, - websiteUrl = row.websiteUrl, - descriptionMarkdown = row.descriptionMarkdown - ) - }) - } - - def getBenchmarksByLab(labId: Int): Future[Seq[CoverageBenchmark]] = { - val query = sql""" - SELECT sl.name as lab, - ttd.code as test_type, - gc.common_name as contig, - AVG(sl2.read_length) as mean_read_len, - MIN(sl2.read_length) as min_read_len, - MAX(sl2.read_length) as max_read_len, - AVG(sl2.insert_size) as mean_insert_len, - MIN(sl2.insert_size) as min_insert_len, - MAX(sl2.insert_size) as max_insert_len, - AVG((am.coverage->>'meanDepth')::double precision) as mean_depth_avg, - STDDEV_POP((am.coverage->>'meanDepth')::double precision) as mean_depth_stddev, - AVG((am.coverage->>'basesNoCoverage')::double precision) as bases_no_coverage_avg, - STDDEV_POP((am.coverage->>'basesNoCoverage')::double precision) as bases_no_coverage_stddev, - AVG((am.coverage->>'basesLowQualityMapping')::double precision) as bases_low_qual_mapping_avg, - STDDEV_POP((am.coverage->>'basesLowQualityMapping')::double precision) as bases_low_qual_mapping_stddev, - AVG((am.coverage->>'basesCallable')::double precision) as bases_callable_avg, - STDDEV_POP((am.coverage->>'basesCallable')::double precision) as bases_callable_stddev, - AVG((am.coverage->>'meanMappingQuality')::double precision) as mean_mapping_quality, - COUNT(DISTINCT sl.id) as num_samples - FROM alignment_metadata am - JOIN sequence_file sf ON sf.id = am.sequence_file_id - JOIN sequence_library sl2 ON sl2.id = sf.library_id - JOIN test_type_definition ttd ON sl2.test_type_id = ttd.id - JOIN sequencing_lab sl ON sl2.lab = sl.name - JOIN genbank_contig gc ON am.genbank_contig_id = gc.genbank_contig_id - WHERE sl.id = $labId AND am.coverage IS NOT NULL - GROUP BY sl.name, ttd.code, gc.common_name, gc.genbank_contig_id - ORDER BY sl.name, ttd.code, gc.genbank_contig_id - """.as[(String, String, String, Option[Double], Option[Int], Option[Int], Option[Double], - Option[Int], Option[Int], Option[Double], Option[Double], Option[Double], - Option[Double], Option[Double], Option[Double], Option[Double], Option[Double], - Option[Double], Int)] - - db.run(query).map { results => - mapStatistics(results) - } - } - - private def mapStatistics(results: Vector[(String, String, String, Option[Double], Option[Int], Option[Int], Option[Double], Option[Int], Option[Int], Option[Double], Option[Double], Option[Double], Option[Double], Option[Double], Option[Double], Option[Double], Option[Double], Option[Double], Int)]) = { - results.map { case (lab, testType, contig, meanReadLen, minReadLen, maxReadLen, - meanInsertLen, minInsertLen, maxInsertLen, meanDepthAvg, meanDepthStddev, - basesNoCoverageAvg, basesNoCoverageStddev, basesLowQualMappingAvg, - basesLowQualMappingStddev, basesCallableAvg, basesCallableStddev, - meanMappingQuality, numSamples) => - CoverageBenchmark( - lab = lab, - testType = testType, - contig = contig, - meanReadLen = meanReadLen, - minReadLen = minReadLen, - maxReadLen = maxReadLen, - meanInsertLen = meanInsertLen, - minInsertLen = minInsertLen, - maxInsertLen = maxInsertLen, - meanDepthAvg = meanDepthAvg, - meanDepthStddev = meanDepthStddev, - basesNoCoverageAvg = basesNoCoverageAvg, - basesNoCoverageStddev = basesNoCoverageStddev, - basesLowQualMappingAvg = basesLowQualMappingAvg, - basesLowQualMappingStddev = basesLowQualMappingStddev, - basesCallableAvg = basesCallableAvg, - basesCallableStddev = basesCallableStddev, - meanMappingQuality = meanMappingQuality, - numSamples = numSamples - ) - } - } -} diff --git a/app/repositories/CuratorActionRepository.scala b/app/repositories/CuratorActionRepository.scala deleted file mode 100644 index 811199fd..00000000 --- a/app/repositories/CuratorActionRepository.scala +++ /dev/null @@ -1,53 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.domain.discovery.* -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait CuratorActionRepository { - def create(action: CuratorAction): Future[CuratorAction] - def findByTarget(targetType: CuratorTargetType, targetId: Int): Future[Seq[CuratorAction]] - def findByCurator(curatorId: String, limit: Int = 50, offset: Int = 0): Future[Seq[CuratorAction]] - def findRecent(limit: Int = 50, offset: Int = 0): Future[Seq[CuratorAction]] -} - -class CuratorActionRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with CuratorActionRepository - with Logging { - - import models.dal.DatabaseSchema.domain.haplogroups.curatorActions - import models.dal.MyPostgresProfile.api.* - - override def create(action: CuratorAction): Future[CuratorAction] = { - val dbAction = (curatorActions returning curatorActions.map(_.id) - into ((row, id) => row.copy(id = Some(id)))) += action - runQuery(dbAction) - } - - override def findByTarget(targetType: CuratorTargetType, targetId: Int): Future[Seq[CuratorAction]] = - runQuery(curatorActions - .filter(a => a.targetType === targetType && a.targetId === targetId) - .sortBy(_.createdAt.desc) - .result) - - override def findByCurator(curatorId: String, limit: Int, offset: Int): Future[Seq[CuratorAction]] = - runQuery(curatorActions - .filter(_.curatorId === curatorId) - .sortBy(_.createdAt.desc) - .drop(offset) - .take(limit) - .result) - - override def findRecent(limit: Int, offset: Int): Future[Seq[CuratorAction]] = - runQuery(curatorActions - .sortBy(_.createdAt.desc) - .drop(offset) - .take(limit) - .result) -} diff --git a/app/repositories/CuratorAuditRepository.scala b/app/repositories/CuratorAuditRepository.scala deleted file mode 100644 index fdfa4384..00000000 --- a/app/repositories/CuratorAuditRepository.scala +++ /dev/null @@ -1,85 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.dal.MyPostgresProfile.api.* -import models.domain.curator.AuditLogEntry -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class CuratorAuditRepository @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[JdbcProfile] { - - private val auditLog = DatabaseSchema.curator.auditLog - - /** - * Log an audit entry. - */ - def logAction(entry: AuditLogEntry): Future[AuditLogEntry] = { - val entryWithId = entry.copy(id = Some(entry.id.getOrElse(UUID.randomUUID()))) - db.run((auditLog returning auditLog) += entryWithId) - } - - /** - * Get audit history for a specific entity. - */ - def getEntityHistory(entityType: String, entityId: Int): Future[Seq[AuditLogEntry]] = { - db.run( - auditLog - .filter(e => e.entityType === entityType && e.entityId === entityId) - .sortBy(_.createdAt.desc) - .result - ) - } - - /** - * Get recent audit actions with pagination. - */ - def getRecentActions(limit: Int = 50, offset: Int = 0): Future[Seq[AuditLogEntry]] = { - db.run( - auditLog - .sortBy(_.createdAt.desc) - .drop(offset) - .take(limit) - .result - ) - } - - /** - * Get actions by a specific user. - */ - def getActionsByUser(userId: UUID, limit: Int = 50, offset: Int = 0): Future[Seq[AuditLogEntry]] = { - db.run( - auditLog - .filter(_.userId === userId) - .sortBy(_.createdAt.desc) - .drop(offset) - .take(limit) - .result - ) - } - - /** - * Count total audit entries for pagination. - */ - def countAll(): Future[Int] = { - db.run(auditLog.length.result) - } - - /** - * Count audit entries for a specific entity. - */ - def countByEntity(entityType: String, entityId: Int): Future[Int] = { - db.run( - auditLog - .filter(e => e.entityType === entityType && e.entityId === entityId) - .length - .result - ) - } -} diff --git a/app/repositories/GenbankContigRepository.scala b/app/repositories/GenbankContigRepository.scala deleted file mode 100644 index 88f649cd..00000000 --- a/app/repositories/GenbankContigRepository.scala +++ /dev/null @@ -1,110 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.GenbankContig -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository interface for interacting with Genbank contigs. - * Provides methods to fetch contigs using different querying criteria. - */ -trait GenbankContigRepository { - /** - * Finds a GenbankContig by its accession number. - * - * @param accession The accession number of the desired GenbankContig. - * @return A Future containing an Option of GenbankContig. The Option will - * contain the GenbankContig if found, or None if not found. - */ - def findByAccession(accession: String): Future[Option[GenbankContig]] - - /** - * Finds a GenbankContig by its unique identifier. - * - * @param id The unique identifier of the GenbankContig to retrieve. - * @return A Future containing an Option of GenbankContig. The Option will - * contain the GenbankContig if found, or None if not found. - */ - def findById(id: Int): Future[Option[GenbankContig]] - - /** - * Retrieves a sequence of GenbankContig objects corresponding to the provided accession numbers. - * - * @param accessions A sequence of accession numbers for which GenbankContigs need to be fetched. - * @return A Future containing a sequence of GenbankContig objects corresponding to the provided accession numbers. - * The sequence may be empty if no matching GenbankContigs are found. - */ - def getByAccessions(accessions: Seq[String]): Future[Seq[GenbankContig]] - - /** - * Retrieves a sequence of GenbankContig objects corresponding to the provided common names. - * - * @param commonNames A sequence of common names for which GenbankContigs need to be fetched. - * @return A Future containing a sequence of GenbankContig objects. - */ - def findByCommonNames(commonNames: Seq[String]): Future[Seq[GenbankContig]] - - /** - * Retrieves all GenbankContig objects. - * - * @return A Future containing a sequence of all GenbankContig objects. - */ - def getAll: Future[Seq[GenbankContig]] - - /** - * Retrieves Y-DNA and mtDNA contigs (chrY and chrM). - * - * @return A Future containing a sequence of Y and MT GenbankContig objects. - */ - def getYAndMtContigs: Future[Seq[GenbankContig]] -} - -class GenbankContigRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends GenbankContigRepository - with HasDatabaseConfigProvider[MyPostgresProfile] { - - import models.dal.DatabaseSchema.domain.genomics.genbankContigs - - def findByAccession(accession: String): Future[Option[GenbankContig]] = { - val query = genbankContigs.filter(_.accession === accession).result.headOption - db.run(query) - } - - def findById(id: Int): Future[Option[GenbankContig]] = { - val query = genbankContigs.filter(_.genbankContigId === id).result.headOption - db.run(query) - } - - def getByAccessions(accessions: Seq[String]): Future[Seq[GenbankContig]] = { - val query = genbankContigs.filter(_.accession.inSet(accessions)).result - db.run(query) - } - - def findByCommonNames(commonNames: Seq[String]): Future[Seq[GenbankContig]] = { - val query = genbankContigs.filter(_.commonName.inSet(commonNames)).result - db.run(query) - } - - def getAll: Future[Seq[GenbankContig]] = { - val query = genbankContigs.sortBy(c => (c.referenceGenome, c.commonName)).result - db.run(query) - } - - def getYAndMtContigs: Future[Seq[GenbankContig]] = { - // Include both "chrY"/"chrM" (GRCh38) and "Y"/"M" (GRCh37) naming conventions - val query = genbankContigs - .filter(c => - c.commonName.like("chrY%") || c.commonName.like("chrM%") || - c.commonName === "Y" || c.commonName === "M" - ) - .sortBy(c => (c.referenceGenome, c.commonName)) - .result - db.run(query) - } -} \ No newline at end of file diff --git a/app/repositories/GenealogicalAnchorRepository.scala b/app/repositories/GenealogicalAnchorRepository.scala deleted file mode 100644 index 4d34d162..00000000 --- a/app/repositories/GenealogicalAnchorRepository.scala +++ /dev/null @@ -1,56 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.domain.haplogroups.{AnchorType, GenealogicalAnchor} -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait GenealogicalAnchorRepository { - def findById(id: Int): Future[Option[GenealogicalAnchor]] - def findByHaplogroup(haplogroupId: Int): Future[Seq[GenealogicalAnchor]] - def findByType(anchorType: AnchorType): Future[Seq[GenealogicalAnchor]] - def create(anchor: GenealogicalAnchor): Future[GenealogicalAnchor] - def update(anchor: GenealogicalAnchor): Future[Boolean] - def delete(id: Int): Future[Boolean] -} - -class GenealogicalAnchorRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with GenealogicalAnchorRepository - with Logging { - - import models.dal.DatabaseSchema.domain.haplogroups.genealogicalAnchors - import models.dal.MyPostgresProfile.api.* - - override def findById(id: Int): Future[Option[GenealogicalAnchor]] = - runQuery(genealogicalAnchors.filter(_.id === id).result.headOption) - - override def findByHaplogroup(haplogroupId: Int): Future[Seq[GenealogicalAnchor]] = - runQuery(genealogicalAnchors.filter(_.haplogroupId === haplogroupId).result) - - implicit private val anchorTypeMapper: BaseColumnType[AnchorType] = - MappedColumnType.base[AnchorType, String](_.dbValue, AnchorType.fromString) - - override def findByType(anchorType: AnchorType): Future[Seq[GenealogicalAnchor]] = - runQuery(genealogicalAnchors.filter(_.anchorType === anchorType).result) - - override def create(anchor: GenealogicalAnchor): Future[GenealogicalAnchor] = - runQuery( - (genealogicalAnchors returning genealogicalAnchors.map(_.id) - into ((a, id) => a.copy(id = Some(id)))) += anchor - ) - - override def update(anchor: GenealogicalAnchor): Future[Boolean] = - anchor.id match { - case Some(id) => - runQuery(genealogicalAnchors.filter(_.id === id).update(anchor).map(_ > 0)) - case None => Future.successful(false) - } - - override def delete(id: Int): Future[Boolean] = - runQuery(genealogicalAnchors.filter(_.id === id).delete.map(_ > 0)) -} diff --git a/app/repositories/GenomeRegionsRepository.scala b/app/repositories/GenomeRegionsRepository.scala deleted file mode 100644 index a1a3aeae..00000000 --- a/app/repositories/GenomeRegionsRepository.scala +++ /dev/null @@ -1,163 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{GenbankContig, GenomeRegion, GenomeRegionVersion} -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import play.api.libs.json.Json - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository interface for genome region data. - * Provides access to structural annotations and cytobands (now unified). - */ -trait GenomeRegionsRepository { - - /** - * Get the data version for a reference genome build. - */ - def getVersion(referenceGenome: String): Future[Option[GenomeRegionVersion]] - - /** - * Get all contigs (chromosomes) for a reference genome build. - */ - def getContigsForBuild(referenceGenome: String): Future[Seq[GenbankContig]] - - /** - * Get all regions (including cytobands) that have coordinates for the specified build. - */ - def getRegionsForBuild(referenceGenome: String): Future[Seq[GenomeRegion]] - - /** - * Get all data for a build in a single composed query. - */ - def getFullBuildData(referenceGenome: String): Future[FullBuildData] - - // ============================================================================ - // GenomeRegion CRUD operations - // ============================================================================ - - def findRegionById(id: Int): Future[Option[GenomeRegion]] - - // Note: Pagination with JSONB filtering can be slow without specific indices. - // For the management API, we might iterate all or allow filtering by type. - def findRegions(regionType: Option[String], build: Option[String], offset: Int, limit: Int): Future[Seq[GenomeRegion]] - - def countRegions(regionType: Option[String], build: Option[String]): Future[Int] - - def createRegion(region: GenomeRegion): Future[Int] - def updateRegion(id: Int, region: GenomeRegion): Future[Boolean] - def deleteRegion(id: Int): Future[Boolean] - def bulkCreateRegions(regions: Seq[GenomeRegion]): Future[Seq[Int]] -} - -/** - * Aggregated build data from the repository. - */ -case class FullBuildData( - version: Option[GenomeRegionVersion], - contigs: Seq[GenbankContig], - regions: Seq[GenomeRegion] -) - -class GenomeRegionsRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends GenomeRegionsRepository - with HasDatabaseConfigProvider[MyPostgresProfile] { - - import models.dal.DatabaseSchema.domain.genomics.* - - override def getVersion(referenceGenome: String): Future[Option[GenomeRegionVersion]] = { - val query = genomeRegionVersions - .filter(_.referenceGenome === referenceGenome) - .result - .headOption - db.run(query) - } - - override def getContigsForBuild(referenceGenome: String): Future[Seq[GenbankContig]] = { - val query = genbankContigs - .filter(_.referenceGenome === referenceGenome) - .sortBy(_.commonName) - .result - db.run(query) - } - - override def getRegionsForBuild(referenceGenome: String): Future[Seq[GenomeRegion]] = { - // Select regions where coordinates -> buildName exists - val query = genomeRegions - .filter(r => r.coordinates ?? referenceGenome) - .result - db.run(query) - } - - override def getFullBuildData(referenceGenome: String): Future[FullBuildData] = { - for { - version <- getVersion(referenceGenome) - contigs <- getContigsForBuild(referenceGenome) - regions <- getRegionsForBuild(referenceGenome) - } yield FullBuildData( - version = version, - contigs = contigs, - regions = regions - ) - } - - // ============================================================================ - // GenomeRegion CRUD implementations - // ============================================================================ - - override def findRegionById(id: Int): Future[Option[GenomeRegion]] = { - db.run(genomeRegions.filter(_.id === id).result.headOption) - } - - override def findRegions(regionType: Option[String], build: Option[String], offset: Int, limit: Int): Future[Seq[GenomeRegion]] = { - var query = genomeRegions.sortBy(_.id) - - if (regionType.isDefined) { - query = query.filter(_.regionType === regionType.get) - } - - if (build.isDefined) { - query = query.filter(r => r.coordinates ?? build.get) - } - - db.run(query.drop(offset).take(limit).result) - } - - override def countRegions(regionType: Option[String], build: Option[String]): Future[Int] = { - var query = genomeRegions.sortBy(_.id) // Sort irrelevant for count but type checks - - if (regionType.isDefined) { - query = query.filter(_.regionType === regionType.get) - } - - if (build.isDefined) { - query = query.filter(r => r.coordinates ?? build.get) - } - - db.run(query.length.result) - } - - override def createRegion(region: GenomeRegion): Future[Int] = { - db.run((genomeRegions returning genomeRegions.map(_.id)) += region) - } - - override def updateRegion(id: Int, region: GenomeRegion): Future[Boolean] = { - val query = genomeRegions.filter(_.id === id).map(r => - (r.regionType, r.name, r.coordinates, r.properties) - ).update((region.regionType, region.name, Json.toJson(region.coordinates), region.properties)) - db.run(query).map(_ > 0) - } - - override def deleteRegion(id: Int): Future[Boolean] = { - db.run(genomeRegions.filter(_.id === id).delete).map(_ > 0) - } - - override def bulkCreateRegions(regions: Seq[GenomeRegion]): Future[Seq[Int]] = { - db.run((genomeRegions returning genomeRegions.map(_.id)) ++= regions) - } -} \ No newline at end of file diff --git a/app/repositories/GenomicStudyRepository.scala b/app/repositories/GenomicStudyRepository.scala deleted file mode 100644 index a2449bb5..00000000 --- a/app/repositories/GenomicStudyRepository.scala +++ /dev/null @@ -1,109 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.DatabaseSchema -import models.dal.MyPostgresProfile.api.* -import models.domain.publications.GenomicStudy -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import scala.concurrent.{ExecutionContext, Future} - -/** - * A repository trait for genomic study-related data operations, providing a contract for implementing - * persistence and retrieval logic specific to genomic studies. - * - * This trait defines methods for interacting with genomic study data, such as fetching studies - * by their ENA accession number, retrieving all study accessions, saving a study, or finding - * the database identifier (ID) from an accession number. The actual implementation of these - * methods can interact with various data sources, including relational databases or APIs. - * - * Methods: - * - * - `findByAccession`: Retrieves a genomic study by its accession number. - * - * - `getAllAccessions`: Fetches all available accession numbers of genomic studies. - * - * - `saveStudy`: Persists a genomic study entity to the data source and returns the saved entity. - * - * - `findIdByAccession`: Retrieves the unique identifier of a study based on its accession number. - */ -trait GenomicStudyRepository { - /** - * Retrieves a genomic study matching the given accession number. - * - * @param accession The accession number of the genomic study to be retrieved. - * @return A Future containing an Option of the genomic study. The Option will be: - * - Some(GenomicStudy) if a study matching the accession number is found. - * - None if no study is found with the given accession number. - */ - def findByAccession(accession: String): Future[Option[GenomicStudy]] - - /** - * Retrieves all available accession numbers of genomic studies. - * - * @return A Future containing a sequence of strings, where each string represents - * an accession number of a genomic study. - */ - def getAllAccessions: Future[Seq[String]] - - /** - * Persists the provided genomic study to the data source. - * - * The method saves the given `GenomicStudy` instance, including its metadata and details, - * to the underlying data source (e.g., a database or API). If the operation is successful, - * the saved study entity is returned as part of the `Future`. - * - * @param study The instance of `GenomicStudy` to be saved. It contains metadata such as - * accession, title, center name, study name, and other relevant information. - * @return A `Future` containing the saved `GenomicStudy` instance. The returned instance may - * include additional information such as a generated unique identifier if applicable. - */ - def saveStudy(study: GenomicStudy): Future[GenomicStudy] - - /** - * Retrieves the unique identifier associated with the given genomic study accession number. - * - * @param accession The accession number of the genomic study whose identifier is to be retrieved. - * @return A Future containing an Option of the unique identifier (Int). - * The Option will be: - * - Some(Int) if an identifier matching the given accession number is found. - * - None if no identifier is found for the given accession number. - */ - def findIdByAccession(accession: String): Future[Option[Int]] -} - -class GenomicStudyRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends GenomicStudyRepository - with HasDatabaseConfigProvider[JdbcProfile] { - - private val genomicStudies = DatabaseSchema.domain.publications.genomicStudies - - override def findByAccession(accession: String): Future[Option[GenomicStudy]] = { - db.run(genomicStudies.filter(_.accession === accession).result.headOption) - } - - override def findIdByAccession(accession: String): Future[Option[Int]] = { - db.run(genomicStudies.filter(_.accession === accession).map(_.id).result.headOption) - } - - override def getAllAccessions: Future[Seq[String]] = { - db.run(genomicStudies.map(_.accession).result) - } - - override def saveStudy(study: GenomicStudy): Future[GenomicStudy] = { - val query = genomicStudies.filter(_.accession === study.accession) - - db.run(query.result.headOption).flatMap { - case Some(existingStudy) => - val studyToUpdate = study.copy(id = existingStudy.id) - db.run(genomicStudies.filter(_.id === existingStudy.id).update(studyToUpdate)) - .map(_ => studyToUpdate) - case None => - db.run((genomicStudies returning genomicStudies.map(_.id) - into ((study, id) => study.copy(id = Some(id)))) += study) - } - } -} \ No newline at end of file diff --git a/app/repositories/GenotypeDataRepository.scala b/app/repositories/GenotypeDataRepository.scala deleted file mode 100644 index 62f1c2ea..00000000 --- a/app/repositories/GenotypeDataRepository.scala +++ /dev/null @@ -1,18 +0,0 @@ -package repositories - -import models.domain.genomics.GenotypeData - -import java.util.UUID -import scala.concurrent.Future - -trait GenotypeDataRepository { - def findById(id: Int): Future[Option[GenotypeData]] - def findByAtUri(atUri: String): Future[Option[GenotypeData]] - def findBySampleGuid(sampleGuid: UUID): Future[Seq[GenotypeData]] - def findByProvider(provider: String): Future[Seq[GenotypeData]] - def create(genotypeData: GenotypeData): Future[GenotypeData] - def upsertByAtUri(genotypeData: GenotypeData): Future[GenotypeData] - def update(genotypeData: GenotypeData): Future[Boolean] - def softDelete(id: Int): Future[Boolean] - def findBySourceFileHash(hash: String): Future[Option[GenotypeData]] -} diff --git a/app/repositories/GenotypeDataRepositoryImpl.scala b/app/repositories/GenotypeDataRepositoryImpl.scala deleted file mode 100644 index 93671f0b..00000000 --- a/app/repositories/GenotypeDataRepositoryImpl.scala +++ /dev/null @@ -1,82 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.GenotypeData -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class GenotypeDataRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends GenotypeDataRepository with HasDatabaseConfigProvider[JdbcProfile] { - - private val genotypeData = DatabaseSchema.domain.genomics.genotypeData - - override def findById(id: Int): Future[Option[GenotypeData]] = { - db.run(genotypeData.filter(g => g.id === id && !g.deleted).result.headOption) - } - - override def findByAtUri(atUri: String): Future[Option[GenotypeData]] = { - db.run(genotypeData.filter(g => g.atUri === atUri && !g.deleted).result.headOption) - } - - override def findBySampleGuid(sampleGuid: UUID): Future[Seq[GenotypeData]] = { - db.run(genotypeData.filter(g => g.sampleGuid === sampleGuid && !g.deleted).result) - } - - override def findByProvider(provider: String): Future[Seq[GenotypeData]] = { - db.run(genotypeData.filter(g => g.provider === provider && !g.deleted).result) - } - - override def create(data: GenotypeData): Future[GenotypeData] = { - db.run( - (genotypeData returning genotypeData.map(_.id) - into ((g, id) => g.copy(id = Some(id)))) += data - ) - } - - override def upsertByAtUri(data: GenotypeData): Future[GenotypeData] = { - data.atUri match { - case None => create(data) - case Some(uri) => - findByAtUri(uri).flatMap { - case Some(existing) => - val updated = data.copy( - id = existing.id, - createdAt = existing.createdAt, - updatedAt = LocalDateTime.now() - ) - update(updated).map(_ => updated) - case None => create(data) - } - } - } - - override def update(data: GenotypeData): Future[Boolean] = { - data.id match { - case None => Future.successful(false) - case Some(id) => - val updated = data.copy(updatedAt = LocalDateTime.now()) - db.run(genotypeData.filter(_.id === id).update(updated)).map(_ > 0) - } - } - - override def softDelete(id: Int): Future[Boolean] = { - db.run( - genotypeData.filter(_.id === id) - .map(g => (g.deleted, g.updatedAt)) - .update((true, LocalDateTime.now())) - ).map(_ > 0) - } - - override def findBySourceFileHash(hash: String): Future[Option[GenotypeData]] = { - db.run(genotypeData.filter(g => g.sourceFileHash === hash && !g.deleted).result.headOption) - } -} diff --git a/app/repositories/GroupProjectRepository.scala b/app/repositories/GroupProjectRepository.scala deleted file mode 100644 index 6984fd97..00000000 --- a/app/repositories/GroupProjectRepository.scala +++ /dev/null @@ -1,174 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.DatabaseSchema -import models.domain.{GroupProject, GroupProjectMember} -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait GroupProjectRepository { - def create(project: GroupProject): Future[GroupProject] - def findById(id: Int): Future[Option[GroupProject]] - def findByGuid(guid: UUID): Future[Option[GroupProject]] - def findByAtUri(atUri: String): Future[Option[GroupProject]] - def findByOwner(ownerDid: String): Future[Seq[GroupProject]] - def findByType(projectType: String): Future[Seq[GroupProject]] - def findByTargetHaplogroup(haplogroup: String): Future[Seq[GroupProject]] - def update(project: GroupProject): Future[Boolean] - def softDelete(id: Int): Future[Boolean] - def softDeleteByAtUri(atUri: String): Future[Boolean] -} - -class GroupProjectRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with GroupProjectRepository - with Logging { - - import models.dal.MyPostgresProfile.api.* - - private val projects = DatabaseSchema.domain.project.groupProjects - - override def create(project: GroupProject): Future[GroupProject] = - runQuery( - (projects returning projects.map(_.id) into ((p, id) => p.copy(id = Some(id)))) += project - ) - - override def findById(id: Int): Future[Option[GroupProject]] = - runQuery(projects.filter(p => p.id === id && !p.deleted).result.headOption) - - override def findByGuid(guid: UUID): Future[Option[GroupProject]] = - runQuery(projects.filter(p => p.projectGuid === guid && !p.deleted).result.headOption) - - override def findByAtUri(atUri: String): Future[Option[GroupProject]] = - runQuery(projects.filter(p => p.atUri === atUri && !p.deleted).result.headOption) - - override def findByOwner(ownerDid: String): Future[Seq[GroupProject]] = - runQuery(projects.filter(p => p.ownerDid === ownerDid && !p.deleted).result) - - override def findByType(projectType: String): Future[Seq[GroupProject]] = - runQuery(projects.filter(p => p.projectType === projectType && !p.deleted).result) - - override def findByTargetHaplogroup(haplogroup: String): Future[Seq[GroupProject]] = - runQuery(projects.filter(p => p.targetHaplogroup === haplogroup && !p.deleted).result) - - override def update(project: GroupProject): Future[Boolean] = project.id match { - case None => Future.successful(false) - case Some(id) => - runQuery( - projects.filter(_.id === id) - .map(p => (p.projectName, p.description, p.backgroundInfo, p.joinPolicy, - p.haplogroupRequirement, p.memberListVisibility, p.strPolicy, p.snpPolicy, - p.publicTreeView, p.successionPolicy, p.atCid, p.updatedAt)) - .update((project.projectName, project.description, project.backgroundInfo, project.joinPolicy, - project.haplogroupRequirement, project.memberListVisibility, project.strPolicy, project.snpPolicy, - project.publicTreeView, project.successionPolicy, project.atCid, LocalDateTime.now())) - ).map(_ > 0) - } - - override def softDelete(id: Int): Future[Boolean] = - runQuery( - projects.filter(_.id === id) - .map(p => (p.deleted, p.updatedAt)) - .update((true, LocalDateTime.now())) - ).map(_ > 0) - - override def softDeleteByAtUri(atUri: String): Future[Boolean] = - runQuery( - projects.filter(_.atUri === atUri) - .map(p => (p.deleted, p.updatedAt)) - .update((true, LocalDateTime.now())) - ).map(_ > 0) -} - -trait GroupProjectMemberRepository { - def create(member: GroupProjectMember): Future[GroupProjectMember] - def findById(id: Int): Future[Option[GroupProjectMember]] - def findByProjectId(projectId: Int): Future[Seq[GroupProjectMember]] - def findByProjectAndCitizen(projectId: Int, citizenDid: String): Future[Option[GroupProjectMember]] - def findByCitizen(citizenDid: String): Future[Seq[GroupProjectMember]] - def findByProjectAndStatus(projectId: Int, status: String): Future[Seq[GroupProjectMember]] - def findByProjectAndRole(projectId: Int, role: String): Future[Seq[GroupProjectMember]] - def findByAtUri(atUri: String): Future[Option[GroupProjectMember]] - def update(member: GroupProjectMember): Future[Boolean] - def updateStatus(id: Int, status: String): Future[Boolean] - def updateRole(id: Int, role: String): Future[Boolean] - def countByProjectAndStatus(projectId: Int, status: String): Future[Int] - def countActiveByProject(projectId: Int): Future[Int] -} - -class GroupProjectMemberRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with GroupProjectMemberRepository - with Logging { - - import models.dal.MyPostgresProfile.api.* - - private val members = DatabaseSchema.domain.project.groupProjectMembers - - override def create(member: GroupProjectMember): Future[GroupProjectMember] = - runQuery( - (members returning members.map(_.id) into ((m, id) => m.copy(id = Some(id)))) += member - ) - - override def findById(id: Int): Future[Option[GroupProjectMember]] = - runQuery(members.filter(_.id === id).result.headOption) - - override def findByProjectId(projectId: Int): Future[Seq[GroupProjectMember]] = - runQuery(members.filter(_.groupProjectId === projectId).result) - - override def findByProjectAndCitizen(projectId: Int, citizenDid: String): Future[Option[GroupProjectMember]] = - runQuery(members.filter(m => m.groupProjectId === projectId && m.citizenDid === citizenDid).result.headOption) - - override def findByCitizen(citizenDid: String): Future[Seq[GroupProjectMember]] = - runQuery(members.filter(_.citizenDid === citizenDid).result) - - override def findByProjectAndStatus(projectId: Int, status: String): Future[Seq[GroupProjectMember]] = - runQuery(members.filter(m => m.groupProjectId === projectId && m.status === status).result) - - override def findByProjectAndRole(projectId: Int, role: String): Future[Seq[GroupProjectMember]] = - runQuery(members.filter(m => m.groupProjectId === projectId && m.role === role).result) - - override def findByAtUri(atUri: String): Future[Option[GroupProjectMember]] = - runQuery(members.filter(_.atUri === atUri).result.headOption) - - override def update(member: GroupProjectMember): Future[Boolean] = member.id match { - case None => Future.successful(false) - case Some(id) => - runQuery( - members.filter(_.id === id) - .map(m => (m.role, m.status, m.displayName, m.kitId, m.visibility, m.subgroupIds, - m.contributionLevel, m.atCid, m.updatedAt)) - .update((member.role, member.status, member.displayName, member.kitId, - play.api.libs.json.Json.toJson(member.visibility), member.subgroupIds, - member.contributionLevel, member.atCid, LocalDateTime.now())) - ).map(_ > 0) - } - - override def updateStatus(id: Int, status: String): Future[Boolean] = - runQuery( - members.filter(_.id === id) - .map(m => (m.status, m.updatedAt)) - .update((status, LocalDateTime.now())) - ).map(_ > 0) - - override def updateRole(id: Int, role: String): Future[Boolean] = - runQuery( - members.filter(_.id === id) - .map(m => (m.role, m.updatedAt)) - .update((role, LocalDateTime.now())) - ).map(_ > 0) - - override def countByProjectAndStatus(projectId: Int, status: String): Future[Int] = - runQuery(members.filter(m => m.groupProjectId === projectId && m.status === status).length.result) - - override def countActiveByProject(projectId: Int): Future[Int] = - runQuery(members.filter(m => m.groupProjectId === projectId && m.status === "ACTIVE").length.result) -} diff --git a/app/repositories/HaplogroupAncestralStrRepository.scala b/app/repositories/HaplogroupAncestralStrRepository.scala deleted file mode 100644 index 50dc3a0f..00000000 --- a/app/repositories/HaplogroupAncestralStrRepository.scala +++ /dev/null @@ -1,50 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.domain.haplogroups.HaplogroupAncestralStr -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait HaplogroupAncestralStrRepository { - def findByHaplogroup(haplogroupId: Int): Future[Seq[HaplogroupAncestralStr]] - def findByHaplogroupAndMarker(haplogroupId: Int, markerName: String): Future[Option[HaplogroupAncestralStr]] - def findByHaplogroupAndMarkers(haplogroupId: Int, markerNames: Seq[String]): Future[Seq[HaplogroupAncestralStr]] - def upsert(motif: HaplogroupAncestralStr): Future[Int] - def upsertBatch(motifs: Seq[HaplogroupAncestralStr]): Future[Seq[Int]] - def deleteByHaplogroup(haplogroupId: Int): Future[Int] -} - -class HaplogroupAncestralStrRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with HaplogroupAncestralStrRepository - with Logging { - - import models.dal.DatabaseSchema.domain.haplogroups.haplogroupAncestralStrs - import models.dal.MyPostgresProfile.api.* - - override def findByHaplogroup(haplogroupId: Int): Future[Seq[HaplogroupAncestralStr]] = - runQuery(haplogroupAncestralStrs.filter(_.haplogroupId === haplogroupId).result) - - override def findByHaplogroupAndMarker(haplogroupId: Int, markerName: String): Future[Option[HaplogroupAncestralStr]] = - runQuery(haplogroupAncestralStrs - .filter(r => r.haplogroupId === haplogroupId && r.markerName === markerName) - .result.headOption) - - override def findByHaplogroupAndMarkers(haplogroupId: Int, markerNames: Seq[String]): Future[Seq[HaplogroupAncestralStr]] = - runQuery(haplogroupAncestralStrs - .filter(r => r.haplogroupId === haplogroupId && r.markerName.inSet(markerNames)) - .result) - - override def upsert(motif: HaplogroupAncestralStr): Future[Int] = - runQuery(haplogroupAncestralStrs.insertOrUpdate(motif)) - - override def upsertBatch(motifs: Seq[HaplogroupAncestralStr]): Future[Seq[Int]] = - Future.sequence(motifs.map(upsert)) - - override def deleteByHaplogroup(haplogroupId: Int): Future[Int] = - runQuery(haplogroupAncestralStrs.filter(_.haplogroupId === haplogroupId).delete) -} diff --git a/app/repositories/HaplogroupCharacterStateRepository.scala b/app/repositories/HaplogroupCharacterStateRepository.scala deleted file mode 100644 index 044e3944..00000000 --- a/app/repositories/HaplogroupCharacterStateRepository.scala +++ /dev/null @@ -1,38 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.domain.genomics.HaplogroupCharacterState -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait HaplogroupCharacterStateRepository { - def findByHaplogroup(haplogroupId: Int): Future[Seq[HaplogroupCharacterState]] - def findByHaplogroupAndVariants(haplogroupId: Int, variantIds: Seq[Int]): Future[Seq[HaplogroupCharacterState]] - def findStrStatesForHaplogroup(haplogroupId: Int, strVariantIds: Seq[Int]): Future[Seq[HaplogroupCharacterState]] -} - -class HaplogroupCharacterStateRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with HaplogroupCharacterStateRepository - with Logging { - - import models.dal.DatabaseSchema.domain.genomics.haplogroupCharacterStates - import models.dal.MyPostgresProfile.api.* - - override def findByHaplogroup(haplogroupId: Int): Future[Seq[HaplogroupCharacterState]] = - runQuery(haplogroupCharacterStates.filter(_.haplogroupId === haplogroupId).result) - - override def findByHaplogroupAndVariants(haplogroupId: Int, variantIds: Seq[Int]): Future[Seq[HaplogroupCharacterState]] = - runQuery(haplogroupCharacterStates - .filter(r => r.haplogroupId === haplogroupId && r.variantId.inSet(variantIds)) - .result) - - override def findStrStatesForHaplogroup(haplogroupId: Int, strVariantIds: Seq[Int]): Future[Seq[HaplogroupCharacterState]] = - runQuery(haplogroupCharacterStates - .filter(r => r.haplogroupId === haplogroupId && r.variantId.inSet(strVariantIds)) - .result) -} diff --git a/app/repositories/HaplogroupCoreRepository.scala b/app/repositories/HaplogroupCoreRepository.scala deleted file mode 100644 index ed876831..00000000 --- a/app/repositories/HaplogroupCoreRepository.scala +++ /dev/null @@ -1,654 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.HaplogroupType -import models.domain.haplogroups.{Haplogroup, HaplogroupProvenance} -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider -import slick.jdbc.GetResult - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository interface for accessing and managing haplogroup data. - */ -trait HaplogroupCoreRepository { - /** - * Retrieves a haplogroup by its name and type. - * - * @param name the name of the haplogroup to retrieve - * @param haplogroupType the type of haplogroup (e.g., Y or MT) - * @return a Future containing an Option wrapping the Haplogroup object if found, or None otherwise - */ - def getHaplogroupByName(name: String, haplogroupType: HaplogroupType): Future[Option[Haplogroup]] - - /** - * Retrieves the ancestor haplogroups of the specified haplogroup. - * - * @param haplogroupId the unique identifier of the haplogroup for which ancestors are to be retrieved - * @return a Future containing a sequence of ancestor Haplogroup objects - */ - def getAncestors(haplogroupId: Int): Future[Seq[Haplogroup]] - - /** - * Retrieves all descendant haplogroups of the specified haplogroup. - * - * @param haplogroupId the unique identifier of the haplogroup for which descendants are to be retrieved - * @return a Future containing a sequence of descendant Haplogroup objects - */ - def getDescendants(haplogroupId: Int): Future[Seq[Haplogroup]] - - /** - * Retrieves the direct children of a given haplogroup by its unique identifier. - * - * @param haplogroupId the unique identifier of the haplogroup whose direct children are to be retrieved - * @return a Future containing a sequence of Haplogroup objects representing the direct children - */ - def getDirectChildren(haplogroupId: Int): Future[Seq[Haplogroup]] - - /** - * Gets the parent haplogroup of the specified haplogroup. - * - * @param haplogroupId the unique identifier of the haplogroup - * @return a Future containing an Option of the parent Haplogroup if one exists - */ - def getParent(haplogroupId: Int): Future[Option[Haplogroup]] - - // === Curator CRUD Methods === - - /** - * Find a haplogroup by ID (active only - not soft-deleted). - */ - def findById(id: Int): Future[Option[Haplogroup]] - - /** - * Search haplogroups by name with optional type filter (active only). - */ - def search(query: String, haplogroupType: Option[HaplogroupType], limit: Int, offset: Int): Future[Seq[Haplogroup]] - - /** - * Count haplogroups matching search criteria (active only). - */ - def count(query: Option[String], haplogroupType: Option[HaplogroupType]): Future[Int] - - /** - * Count haplogroups by type (active only). - */ - def countByType(haplogroupType: HaplogroupType): Future[Int] - - /** - * Create a new haplogroup. - */ - def create(haplogroup: Haplogroup): Future[Int] - - /** - * Update an existing haplogroup. - */ - def update(haplogroup: Haplogroup): Future[Boolean] - - /** - * Soft-delete a haplogroup by setting valid_until to now. - * Also reassigns all children to the deleted haplogroup's parent. - * - * @param id the haplogroup ID to soft-delete - * @param source the source attribution for the relationship changes - * @return true if successful, false if haplogroup not found - */ - def softDelete(id: Int, source: String): Future[Boolean] - - // === Tree Restructuring Methods === - - /** - * Update a haplogroup's parent by soft-deleting the old relationship and creating a new one. - * - * @param childId the ID of the child haplogroup to re-parent - * @param newParentId the ID of the new parent haplogroup - * @param source the source attribution for the relationship change - * @return true if successful - */ - def updateParent(childId: Int, newParentId: Int, source: String): Future[Int] - - /** - * Create a new haplogroup with an optional parent relationship. - * - * @param haplogroup the haplogroup to create - * @param parentId optional parent haplogroup ID - * @param source the source attribution for the relationship - * @return the ID of the newly created haplogroup - */ - def createWithParent(haplogroup: Haplogroup, parentId: Option[Int], source: String): Future[(Int, Option[Int])] - /** - * Find root haplogroups (those with no parent) for a given type. - * - * @param haplogroupType the type of haplogroup (Y or MT) - * @return a sequence of root haplogroups for that type - */ - def findRoots(haplogroupType: HaplogroupType): Future[Seq[Haplogroup]] - - // === Tree Merge Methods === - - /** - * Update the provenance field for a haplogroup. - * - * @param id the haplogroup ID - * @param provenance the new provenance data - * @return true if updated successfully - */ - def updateProvenance(id: Int, provenance: HaplogroupProvenance): Future[Boolean] - - /** - * Get all haplogroups of a type with their associated variant names. - * Used for building variant-based lookup index for merge operations. - * - * @param haplogroupType the type of haplogroup (Y or MT) - * @return sequence of tuples: (haplogroup, list of variant names) - */ - def getAllWithVariantNames(haplogroupType: HaplogroupType): Future[Seq[(Haplogroup, Seq[String])]] - - /** - * Retrieves the latest revision ID for a given haplogroup relationship identified by its child haplogroup ID. - * - * @param childHaplogroupId The unique identifier of the child haplogroup. - * @return A Future containing an Option with the latest revision ID, or None if no relationships exist for the child. - */ - def getLatestRelationshipRevisionId(childHaplogroupId: Int): Future[Option[Int]] - - // === Bulk Operations for Merge === - - /** - * Create multiple haplogroups in a single batch operation. - * - * @param haplogroups The haplogroups to create - * @return A Future containing a map from haplogroup name to newly assigned ID - */ - def createBatch(haplogroups: Seq[Haplogroup]): Future[Map[String, Int]] - - /** - * Create multiple parent-child relationships in a single batch operation. - * - * @param relationships Sequence of (childId, parentId, source) tuples - * @return A Future containing the sequence of created relationship IDs - */ - def createRelationshipsBatch(relationships: Seq[(Int, Int, String)]): Future[Seq[Int]] - - /** - * Update provenance for multiple haplogroups in a single batch. - * - * @param updates Sequence of (haplogroupId, provenance) tuples - * @return A Future containing the count of updated records - */ - def updateProvenanceBatch(updates: Seq[(Int, HaplogroupProvenance)]): Future[Int] - - /** - * Get all active parent-child relationships for a haplogroup type. - * Returns (childId, parentId) tuples for building in-memory tree. - */ - def getAllRelationships(haplogroupType: HaplogroupType): Future[Seq[(Int, Int)]] -} - -class HaplogroupCoreRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with HaplogroupCoreRepository - with Logging { - - import models.dal.DatabaseSchema.domain.haplogroups.{haplogroupRelationships, haplogroups} - import models.dal.MyPostgresProfile.api.* - - import java.time.LocalDateTime - - /** Filter for active (non-soft-deleted) haplogroups */ - private def activeHaplogroups = haplogroups.filter(h => - h.validUntil.isEmpty || h.validUntil > LocalDateTime.now() - ) - - /** Filter for active relationships */ - private def activeRelationships = haplogroupRelationships.filter(r => - r.validUntil.isEmpty || r.validUntil > LocalDateTime.now() - ) - - override def getHaplogroupByName(name: String, haplogroupType: HaplogroupType): Future[Option[Haplogroup]] = { - val query = activeHaplogroups - .filter(h => h.name === name && h.haplogroupType === haplogroupType) - .result - .headOption - - runQuery(query) - } - - override def getAncestors(haplogroupId: Int): Future[Seq[Haplogroup]] = { - implicit val getHaplogroupResult: GetResult[Haplogroup] = GetResult(r => - Haplogroup( - id = r.nextIntOption(), - name = r.nextString(), - lineage = r.nextStringOption(), - description = r.nextStringOption(), - haplogroupType = HaplogroupType.fromString(r.nextString()).getOrElse(throw new IllegalArgumentException("Invalid haplogroup type")), - revisionId = r.nextInt(), - source = r.nextString(), - confidenceLevel = r.nextString(), - validFrom = r.nextTimestampOption().map(_.toLocalDateTime).orNull, - validUntil = r.nextTimestampOption().map(_.toLocalDateTime) - ) - ) - - // Define the recursive CTE query - val recursiveQuery = sql""" - WITH RECURSIVE ancestor_tree AS ( - -- Base case: immediate parent - SELECT h.*, 1 as level - FROM tree.haplogroup_relationship hr - JOIN tree.haplogroup h ON h.haplogroup_id = hr.parent_haplogroup_id - WHERE hr.child_haplogroup_id = $haplogroupId - - UNION - - -- Recursive case: parents of parents - SELECT h.*, at.level + 1 - FROM tree.haplogroup_relationship hr - JOIN tree.haplogroup h ON h.haplogroup_id = hr.parent_haplogroup_id - JOIN ancestor_tree at ON hr.child_haplogroup_id = at.haplogroup_id - ) - SELECT - haplogroup_id, name, lineage, description, haplogroup_type, - revision_id, source, confidence_level, valid_from, valid_until - FROM ancestor_tree - ORDER BY level DESC - """.as[Haplogroup] - - db.run(recursiveQuery) - - } - - override def getDescendants(haplogroupId: Int): Future[Seq[Haplogroup]] = { - implicit val getHaplogroupResult: GetResult[Haplogroup] = GetResult(r => - Haplogroup( - id = r.nextIntOption(), - name = r.nextString(), - lineage = r.nextStringOption(), - description = r.nextStringOption(), - haplogroupType = HaplogroupType.fromString(r.nextString()).getOrElse(throw new IllegalArgumentException("Invalid haplogroup type")), - revisionId = r.nextInt(), - source = r.nextString(), - confidenceLevel = r.nextString(), - validFrom = r.nextTimestampOption().map(_.toLocalDateTime).orNull, - validUntil = r.nextTimestampOption().map(_.toLocalDateTime) - ) - ) - - // Define the recursive CTE query for descendants - val recursiveQuery = sql""" - WITH RECURSIVE descendant_tree AS ( - -- Base case: immediate children - SELECT h.*, 1 as level - FROM tree.haplogroup_relationship hr - JOIN tree.haplogroup h ON h.haplogroup_id = hr.child_haplogroup_id - WHERE hr.parent_haplogroup_id = $haplogroupId - AND (hr.valid_until IS NULL OR hr.valid_until > NOW()) - AND (h.valid_until IS NULL OR h.valid_until > NOW()) - - UNION - - -- Recursive case: children of children - SELECT h.*, dt.level + 1 - FROM tree.haplogroup_relationship hr - JOIN tree.haplogroup h ON h.haplogroup_id = hr.child_haplogroup_id - JOIN descendant_tree dt ON hr.parent_haplogroup_id = dt.haplogroup_id - WHERE (hr.valid_until IS NULL OR hr.valid_until > NOW()) - AND (h.valid_until IS NULL OR h.valid_until > NOW()) - ) - SELECT - haplogroup_id, name, lineage, description, haplogroup_type, - revision_id, source, confidence_level, valid_from, valid_until - FROM descendant_tree - """.as[Haplogroup] - - db.run(recursiveQuery) - } - - override def getDirectChildren(haplogroupId: Int): Future[Seq[Haplogroup]] = { - val query = for { - rel <- activeRelationships if rel.parentHaplogroupId === haplogroupId - child <- activeHaplogroups if child.haplogroupId === rel.childHaplogroupId - } yield child - - runQuery(query.result) - } - - override def getParent(haplogroupId: Int): Future[Option[Haplogroup]] = { - val query = for { - rel <- activeRelationships if rel.childHaplogroupId === haplogroupId - parent <- activeHaplogroups if parent.haplogroupId === rel.parentHaplogroupId - } yield parent - - runQuery(query.result.headOption) - } - - // === Curator CRUD Methods Implementation === - - override def findById(id: Int): Future[Option[Haplogroup]] = { - runQuery(activeHaplogroups.filter(_.haplogroupId === id).result.headOption) - } - - override def search(query: String, haplogroupType: Option[HaplogroupType], limit: Int, offset: Int): Future[Seq[Haplogroup]] = { - val baseQuery = activeHaplogroups - .filter(h => h.name.toUpperCase.like(s"%${query.toUpperCase}%")) - - val filteredQuery = haplogroupType match { - case Some(hgType) => baseQuery.filter(_.haplogroupType === hgType) - case None => baseQuery - } - - runQuery( - filteredQuery - .sortBy(_.name) - .drop(offset) - .take(limit) - .result - ) - } - - override def count(query: Option[String], haplogroupType: Option[HaplogroupType]): Future[Int] = { - val baseQuery = query match { - case Some(q) => activeHaplogroups.filter(h => h.name.toUpperCase.like(s"%${q.toUpperCase}%")) - case None => activeHaplogroups - } - - val filteredQuery = haplogroupType match { - case Some(hgType) => baseQuery.filter(_.haplogroupType === hgType) - case None => baseQuery - } - - runQuery(filteredQuery.length.result) - } - - override def countByType(haplogroupType: HaplogroupType): Future[Int] = { - runQuery(activeHaplogroups.filter(_.haplogroupType === haplogroupType).length.result) - } - - override def create(haplogroup: Haplogroup): Future[Int] = { - runQuery( - (haplogroups returning haplogroups.map(_.haplogroupId)) += haplogroup - ) - } - - override def update(haplogroup: Haplogroup): Future[Boolean] = { - haplogroup.id match { - case Some(id) => - runQuery( - haplogroups - .filter(_.haplogroupId === id) - .map(h => (h.name, h.lineage, h.description, h.source, h.confidenceLevel)) - .update((haplogroup.name, haplogroup.lineage, haplogroup.description, haplogroup.source, haplogroup.confidenceLevel)) - ).map(_ > 0) - case None => Future.successful(false) - } - } - - override def softDelete(id: Int, source: String): Future[Boolean] = { - val now = LocalDateTime.now() - - // Step 1: Find the haplogroup's current parent relationship - val findParentAction = activeRelationships - .filter(_.childHaplogroupId === id) - .map(_.parentHaplogroupId) - .result - .headOption - - // Step 2: Find all children of this haplogroup - val findChildrenAction = activeRelationships - .filter(_.parentHaplogroupId === id) - .result - - val softDeleteAction = for { - maybeParentId <- findParentAction - childRelationships <- findChildrenAction - - // Step 3: Soft-delete the haplogroup by setting valid_until - updated <- haplogroups - .filter(_.haplogroupId === id) - .filter(h => h.validUntil.isEmpty || h.validUntil > now) - .map(_.validUntil) - .update(Some(now)) - - // Step 4: Soft-delete the haplogroup's parent relationship - _ <- haplogroupRelationships - .filter(_.childHaplogroupId === id) - .filter(r => r.validUntil.isEmpty || r.validUntil > now) - .map(_.validUntil) - .update(Some(now)) - - // Step 5: If there's a parent, reassign children to it - _ <- maybeParentId match { - case Some(parentId) => - // End current relationships for children - val endCurrentRelationships = haplogroupRelationships - .filter(r => r.parentHaplogroupId === id && (r.validUntil.isEmpty || r.validUntil > now)) - .map(_.validUntil) - .update(Some(now)) - - // Create new relationships pointing to the grandparent - import models.domain.haplogroups.HaplogroupRelationship - val newRelationships = childRelationships.map { childRel => - HaplogroupRelationship( - id = None, - childHaplogroupId = childRel.childHaplogroupId, - parentHaplogroupId = parentId, - revisionId = childRel.revisionId, - validFrom = now, - validUntil = None, - source = source - ) - } - endCurrentRelationships.andThen( - (haplogroupRelationships ++= newRelationships).map(_ => ()) - ) - - case None => - // No parent - just end the children's current relationships (they become roots) - haplogroupRelationships - .filter(r => r.parentHaplogroupId === id && (r.validUntil.isEmpty || r.validUntil > now)) - .map(_.validUntil) - .update(Some(now)) - .map(_ => ()) - } - } yield updated > 0 - - runTransactionally(softDeleteAction) - } - - // === Tree Restructuring Methods Implementation === - - override def updateParent(childId: Int, newParentId: Int, source: String): Future[Int] = { - import models.domain.haplogroups.HaplogroupRelationship - val now = LocalDateTime.now() - - for { - // Get the latest revision ID for this child haplogroup - latestRevisionIdOpt <- getLatestRelationshipRevisionId(childId) - newRevisionId = latestRevisionIdOpt.map(_ + 1).getOrElse(1) - - newRelationshipId <- runTransactionally { - for { - // Soft-delete the existing parent relationship - _ <- haplogroupRelationships - .filter(r => r.childHaplogroupId === childId && (r.validUntil.isEmpty || r.validUntil > now)) - .map(_.validUntil) - .update(Some(now)) - - // Create new relationship to new parent and return its ID - relId <- (haplogroupRelationships returning haplogroupRelationships.map(_.haplogroupRelationshipId)) += HaplogroupRelationship( - id = None, - childHaplogroupId = childId, - parentHaplogroupId = newParentId, - revisionId = newRevisionId, - validFrom = now, - validUntil = None, - source = source - ) - } yield relId - } - } yield newRelationshipId - } - - override def createWithParent(haplogroup: Haplogroup, parentId: Option[Int], source: String): Future[(Int, Option[Int])] = { - import models.domain.haplogroups.HaplogroupRelationship - val now = LocalDateTime.now() - - for { - // Create the haplogroup first - newId <- runQuery((haplogroups returning haplogroups.map(_.haplogroupId)) += haplogroup) - - // Create parent relationship if parentId provided - relationshipIdValue <- parentId match { - case Some(pid) => - for { - // Get the latest revision ID for this child haplogroup - latestRevisionIdOpt <- getLatestRelationshipRevisionId(newId) - newRevisionId = latestRevisionIdOpt.map(_ + 1).getOrElse(1) - newRelationshipId <- runQuery { - (haplogroupRelationships returning haplogroupRelationships.map(_.haplogroupRelationshipId)) += HaplogroupRelationship( - id = None, - childHaplogroupId = newId, - parentHaplogroupId = pid, - revisionId = newRevisionId, - validFrom = now, - validUntil = None, - source = source - ) - } - } yield Some(newRelationshipId) - case None => - Future.successful(None) - } - } yield (newId, relationshipIdValue) - } - - override def findRoots(haplogroupType: HaplogroupType): Future[Seq[Haplogroup]] = { - // Find haplogroups of the given type that have no active parent relationship - val query = activeHaplogroups - .filter(_.haplogroupType === haplogroupType) - .filterNot(h => - activeRelationships.filter(_.childHaplogroupId === h.haplogroupId).exists - ) - .sortBy(_.name) - .result - - runQuery(query) - } - - // === Tree Merge Methods Implementation === - - override def updateProvenance(id: Int, provenance: HaplogroupProvenance): Future[Boolean] = { - runQuery( - haplogroups - .filter(_.haplogroupId === id) - .map(_.provenance) - .update(Some(provenance)) - ).map(_ > 0) - } - - override def getAllWithVariantNames(haplogroupType: HaplogroupType): Future[Seq[(Haplogroup, Seq[String])]] = { - import models.dal.DatabaseSchema.domain.haplogroups.haplogroupVariants - import models.dal.DatabaseSchema.domain.genomics.variantsV2 - - // Single query: left join haplogroups with their variant names - val query = for { - (hg, hv) <- activeHaplogroups.filter(_.haplogroupType === haplogroupType) - .joinLeft(haplogroupVariants).on(_.haplogroupId === _.haplogroupId) - } yield (hg, hv.map(_.variantId)) - - runQuery(query.result).flatMap { hgWithVariantIds => - // Collect all variant IDs needed, fetch canonical names in one query - val allVariantIds = hgWithVariantIds.flatMap(_._2).distinct - val variantNamesFuture = if (allVariantIds.isEmpty) { - Future.successful(Map.empty[Int, String]) - } else { - runQuery( - variantsV2.filter(_.variantId.inSet(allVariantIds)) - .map(v => (v.variantId, v.canonicalName)) - .result - ).map { pairs => - pairs.collect { case (id, Some(name)) => id -> name }.toMap - } - } - - variantNamesFuture.map { variantNamesMap => - // Group by haplogroup and resolve variant names - hgWithVariantIds - .groupBy(_._1) - .map { case (hg, rows) => - val names = rows.flatMap(_._2).flatMap(variantNamesMap.get) - (hg, names) - } - .toSeq - } - } - } - - override def getLatestRelationshipRevisionId(childHaplogroupId: Int): Future[Option[Int]] = { - val query = haplogroupRelationships - .filter(_.childHaplogroupId === childHaplogroupId) - .map(_.revisionId) - .max - .result - runQuery(query) - } - - // === Bulk Operations for Merge === - - override def createBatch(haplogroupsToCreate: Seq[Haplogroup]): Future[Map[String, Int]] = { - if (haplogroupsToCreate.isEmpty) return Future.successful(Map.empty) - - runQuery( - (haplogroups returning haplogroups.map(h => (h.name, h.haplogroupId))) ++= haplogroupsToCreate - ).map(_.toMap) - } - - override def createRelationshipsBatch(relationships: Seq[(Int, Int, String)]): Future[Seq[Int]] = { - if (relationships.isEmpty) return Future.successful(Seq.empty) - - import models.domain.haplogroups.HaplogroupRelationship - val now = LocalDateTime.now() - - val relationshipsToInsert = relationships.map { case (childId, parentId, source) => - HaplogroupRelationship( - id = None, - childHaplogroupId = childId, - parentHaplogroupId = parentId, - revisionId = 1, - validFrom = now, - validUntil = None, - source = source - ) - } - - runQuery( - (haplogroupRelationships returning haplogroupRelationships.map(_.haplogroupRelationshipId)) ++= relationshipsToInsert - ) - } - - override def updateProvenanceBatch(updates: Seq[(Int, HaplogroupProvenance)]): Future[Int] = { - if (updates.isEmpty) return Future.successful(0) - - // Use DBIO.sequence for batch updates - val updateActions = updates.map { case (id, provenance) => - haplogroups - .filter(_.haplogroupId === id) - .map(_.provenance) - .update(Some(provenance)) - } - - runTransactionally(DBIO.sequence(updateActions)).map(_.sum) - } - - override def getAllRelationships(haplogroupType: HaplogroupType): Future[Seq[(Int, Int)]] = { - val query = for { - rel <- activeRelationships - child <- activeHaplogroups if child.haplogroupId === rel.childHaplogroupId && child.haplogroupType === haplogroupType - } yield (rel.childHaplogroupId, rel.parentHaplogroupId) - - runQuery(query.result) - } -} diff --git a/app/repositories/HaplogroupReconciliationRepository.scala b/app/repositories/HaplogroupReconciliationRepository.scala deleted file mode 100644 index 7f506389..00000000 --- a/app/repositories/HaplogroupReconciliationRepository.scala +++ /dev/null @@ -1,18 +0,0 @@ -package repositories - -import models.domain.genomics.{DnaType, HaplogroupReconciliation} - -import scala.concurrent.Future - -trait HaplogroupReconciliationRepository { - def findById(id: Int): Future[Option[HaplogroupReconciliation]] - def findByAtUri(atUri: String): Future[Option[HaplogroupReconciliation]] - def findBySpecimenDonorId(specimenDonorId: Int): Future[Seq[HaplogroupReconciliation]] - def findBySpecimenDonorIdAndDnaType(specimenDonorId: Int, dnaType: DnaType): Future[Option[HaplogroupReconciliation]] - def findByConsensusHaplogroup(haplogroup: String): Future[Seq[HaplogroupReconciliation]] - def create(reconciliation: HaplogroupReconciliation): Future[HaplogroupReconciliation] - def upsertByAtUri(reconciliation: HaplogroupReconciliation): Future[HaplogroupReconciliation] - def upsertBySpecimenDonorAndDnaType(reconciliation: HaplogroupReconciliation): Future[HaplogroupReconciliation] - def update(reconciliation: HaplogroupReconciliation): Future[Boolean] - def softDelete(id: Int): Future[Boolean] -} diff --git a/app/repositories/HaplogroupReconciliationRepositoryImpl.scala b/app/repositories/HaplogroupReconciliationRepositoryImpl.scala deleted file mode 100644 index 9d6d0e82..00000000 --- a/app/repositories/HaplogroupReconciliationRepositoryImpl.scala +++ /dev/null @@ -1,103 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{DnaType, HaplogroupReconciliation} -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class HaplogroupReconciliationRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends HaplogroupReconciliationRepository with HasDatabaseConfigProvider[JdbcProfile] { - - private val reconciliations = DatabaseSchema.domain.genomics.haplogroupReconciliations - - override def findById(id: Int): Future[Option[HaplogroupReconciliation]] = { - db.run(reconciliations.filter(r => r.id === id && !r.deleted).result.headOption) - } - - override def findByAtUri(atUri: String): Future[Option[HaplogroupReconciliation]] = { - db.run(reconciliations.filter(r => r.atUri === atUri && !r.deleted).result.headOption) - } - - override def findBySpecimenDonorId(specimenDonorId: Int): Future[Seq[HaplogroupReconciliation]] = { - db.run(reconciliations.filter(r => r.specimenDonorId === specimenDonorId && !r.deleted).result) - } - - override def findBySpecimenDonorIdAndDnaType(specimenDonorId: Int, dnaType: DnaType): Future[Option[HaplogroupReconciliation]] = { - db.run( - reconciliations.filter(r => - r.specimenDonorId === specimenDonorId && - r.dnaType === dnaType && - !r.deleted - ).result.headOption - ) - } - - override def findByConsensusHaplogroup(haplogroup: String): Future[Seq[HaplogroupReconciliation]] = { - // Note: This would be more efficient with a JSONB index, but we're using the status->>'consensusHaplogroup' path - db.run(reconciliations.filter(r => !r.deleted).result).map { results => - results.filter(_.status.consensusHaplogroup.contains(haplogroup)) - } - } - - override def create(reconciliation: HaplogroupReconciliation): Future[HaplogroupReconciliation] = { - db.run( - (reconciliations returning reconciliations.map(_.id) - into ((r, id) => r.copy(id = Some(id)))) += reconciliation - ) - } - - override def upsertByAtUri(reconciliation: HaplogroupReconciliation): Future[HaplogroupReconciliation] = { - reconciliation.atUri match { - case None => create(reconciliation) - case Some(uri) => - findByAtUri(uri).flatMap { - case Some(existing) => - val updated = reconciliation.copy( - id = existing.id, - createdAt = existing.createdAt, - updatedAt = LocalDateTime.now() - ) - update(updated).map(_ => updated) - case None => create(reconciliation) - } - } - } - - override def upsertBySpecimenDonorAndDnaType(reconciliation: HaplogroupReconciliation): Future[HaplogroupReconciliation] = { - findBySpecimenDonorIdAndDnaType(reconciliation.specimenDonorId, reconciliation.dnaType).flatMap { - case Some(existing) => - val updated = reconciliation.copy( - id = existing.id, - createdAt = existing.createdAt, - updatedAt = LocalDateTime.now() - ) - update(updated).map(_ => updated) - case None => create(reconciliation) - } - } - - override def update(reconciliation: HaplogroupReconciliation): Future[Boolean] = { - reconciliation.id match { - case None => Future.successful(false) - case Some(id) => - val updated = reconciliation.copy(updatedAt = LocalDateTime.now()) - db.run(reconciliations.filter(_.id === id).update(updated)).map(_ > 0) - } - } - - override def softDelete(id: Int): Future[Boolean] = { - db.run( - reconciliations.filter(_.id === id) - .map(r => (r.deleted, r.updatedAt)) - .update((true, LocalDateTime.now())) - ).map(_ > 0) - } -} diff --git a/app/repositories/HaplogroupRelationshipRepository.scala b/app/repositories/HaplogroupRelationshipRepository.scala deleted file mode 100644 index 0a726c3f..00000000 --- a/app/repositories/HaplogroupRelationshipRepository.scala +++ /dev/null @@ -1,119 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.domain.haplogroups.{Haplogroup, HaplogroupRelationship} -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository for managing haplogroup relationships and their revisions. - */ -trait HaplogroupRelationshipRepository { - /** - * Retrieves haplogroup subtree relationships starting from the specified root haplogroup ID. - * - * @param rootId The ID of the haplogroup to use as the root for fetching its subtree relationships. - * @return A Future containing a sequence of tuples. Each tuple consists of a haplogroup and its associated relationship. - */ - def getSubtreeRelationships(rootId: Int): Future[Seq[(Haplogroup, HaplogroupRelationship)]] - - /** - * Retrieves the currently valid haplogroup relationships. - * - * This method returns a sequence of tuples, where each tuple consists of a `HaplogroupRelationship` - * and its associated `Haplogroup`. Only relationships that are currently valid, based on their - * validity period (`validFrom` and `validUntil`), are included in the results. - * - * @return A Future containing a sequence of tuples with a currently valid `HaplogroupRelationship` - * and its corresponding `Haplogroup`. - */ - def getCurrentValidRelationships: Future[Seq[(HaplogroupRelationship, Haplogroup)]] - - /** - * Creates a new revision for the provided haplogroup relationship. - * - * The revision allows tracking changes to the relationships over time by associating - * the relationship with a revision ID and setting validity periods. - * - * @param relationship The HaplogroupRelationship object representing the relationship - * to be added as a new revision. - * @return A Future containing the ID of the newly created revision. - */ - def createRelationshipRevision(relationship: HaplogroupRelationship): Future[Int] - - /** - * Retrieves haplogroup relationships for a specific revision. - * - * This method returns a sequence of tuples representing the relationships at a particular revision. - * Each tuple consists of a `HaplogroupRelationship`, the child `Haplogroup`, and the parent `Haplogroup`. - * - * @param revisionId The ID of the revision for which relationships are to be retrieved. - * @return A `Future` containing a sequence of tuples, where each tuple includes a `HaplogroupRelationship`, - * the corresponding child `Haplogroup`, and the parent `Haplogroup`. - */ - def getRelationshipsAtRevision(revisionId: Int): Future[Seq[(HaplogroupRelationship, Haplogroup, Haplogroup)]] -} - -class HaplogroupRelationshipRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with HaplogroupRelationshipRepository { - - import models.dal.DatabaseSchema.* - import models.dal.DatabaseSchema.domain.haplogroups.{haplogroupRelationships, haplogroups} - import models.dal.MyPostgresProfile.api.* - - override def getSubtreeRelationships(rootId: Int): Future[Seq[(Haplogroup, HaplogroupRelationship)]] = { - val query = for { - root <- haplogroups if root.haplogroupId === rootId - rel <- haplogroupRelationships if rel.parentHaplogroupId === root.haplogroupId - child <- haplogroups if child.haplogroupId === rel.childHaplogroupId - } yield (child, rel) - - runQuery(query.result) - } - - override def getCurrentValidRelationships: Future[Seq[(HaplogroupRelationship, Haplogroup)]] = { - val now = java.time.LocalDateTime.now() - val query = for { - rel <- haplogroupRelationships if - rel.validFrom <= now && - (rel.validUntil.isEmpty || rel.validUntil > now) - haplogroup <- haplogroups if haplogroup.haplogroupId === rel.childHaplogroupId - } yield (rel, haplogroup) - - runQuery(query.result) - } - - override def createRelationshipRevision(relationship: HaplogroupRelationship): Future[Int] = { - val nextRevisionQuery = haplogroupRelationships - .filter(r => - r.parentHaplogroupId === relationship.parentHaplogroupId && - r.childHaplogroupId === relationship.childHaplogroupId - ) - .map(_.revisionId) - .max - .getOrElse(1) - .result - .map(_ + 1) - - runQuery(nextRevisionQuery.flatMap { nextRev => - (haplogroupRelationships returning haplogroupRelationships.map(_.haplogroupRelationshipId)) += - relationship.copy(revisionId = nextRev) - }) - } - - override def getRelationshipsAtRevision(revisionId: Int): Future[Seq[(HaplogroupRelationship, Haplogroup, Haplogroup)]] = { - val query = for { - rel <- haplogroupRelationships if rel.revisionId === revisionId - parent <- haplogroups if parent.haplogroupId === rel.parentHaplogroupId && - parent.revisionId === revisionId - child <- haplogroups if child.haplogroupId === rel.childHaplogroupId && - child.revisionId === revisionId - } yield (rel, parent, child) - - runQuery(query.result) - } -} diff --git a/app/repositories/HaplogroupRevisionMetadataRepository.scala b/app/repositories/HaplogroupRevisionMetadataRepository.scala deleted file mode 100644 index 6ec5721a..00000000 --- a/app/repositories/HaplogroupRevisionMetadataRepository.scala +++ /dev/null @@ -1,260 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.domain.haplogroups.{HaplogroupRelationship, RelationshipRevisionMetadata} -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository trait responsible for managing haplogroup relationship revision metadata. - * This provides methods for adding, retrieving, updating, and querying metadata related to revisions - * of haplogroup relationships. - */ -trait HaplogroupRevisionMetadataRepository { - /** - * Adds a new relationship revision metadata entry to the repository. - * - * @param metadata The relationship revision metadata containing details such as - * the haplogroup relationship ID, revision ID, author, timestamp, - * comment, change type, and an optional previous revision ID. - * @return A future containing the ID of the newly added metadata entry. - */ - def addRelationshipRevisionMetadata(metadata: RelationshipRevisionMetadata): Future[Int] - - /** - * Retrieves the revision metadata for a specific haplogroup relationship revision. - * - * @param relationshipId The unique identifier of the haplogroup relationship. - * @param revisionId The unique identifier of the revision for which metadata is to be retrieved. - * @return A future containing an optional `RelationshipRevisionMetadata` object. If the specified - * relationship and revision IDs are found, the metadata is returned, otherwise `None`. - */ - def getRelationshipRevisionMetadata(relationshipId: Int, revisionId: Int): Future[Option[RelationshipRevisionMetadata]] - - /** - * Retrieves the revision history for a specific haplogroup relationship. - * - * @param relationshipId The unique identifier of the haplogroup relationship whose revision history is to be retrieved. - * @return A future containing a sequence of tuples, where each tuple consists of a `HaplogroupRelationship` object - * and its associated `RelationshipRevisionMetadata`. - */ - def getRelationshipRevisionHistory(relationshipId: Int): Future[Seq[(HaplogroupRelationship, RelationshipRevisionMetadata)]] - - /** - * Retrieves all relationship revisions authored by the specified author. - * - * @param author The name of the author whose revisions are to be retrieved. - * @return A future containing a sequence of `RelationshipRevisionMetadata` objects associated with the specified author. - */ - def getRevisionsByAuthor(author: String): Future[Seq[RelationshipRevisionMetadata]] - - /** - * Retrieves the relationship revision metadata entries that fall within the specified date range. - * - * @param startDate The start date and time of the range (inclusive). - * @param endDate The end date and time of the range (inclusive). - * @return A future containing a sequence of `RelationshipRevisionMetadata` objects that match the specified date range. - */ - def getRevisionsBetweenDates(startDate: LocalDateTime, endDate: LocalDateTime): Future[Seq[RelationshipRevisionMetadata]] - - /** - * Updates the comment associated with a specific haplogroup relationship revision. - * - * @param relationshipId The unique identifier of the haplogroup relationship. - * @param revisionId The unique identifier of the revision. - * @param newComment The new comment text to be updated. - * @return A future containing the number of rows updated in the database. - */ - def updateRevisionComment(relationshipId: Int, revisionId: Int, newComment: String): Future[Int] - - /** - * Retrieves the latest relationship revision metadata entries filtered by the specified change type. - * The number of entries returned is determined by the specified limit. - * - * @param changeType The type of change to filter the revision metadata entries by. - * @param limit The maximum number of revision metadata entries to retrieve. - * @return A future containing a sequence of `RelationshipRevisionMetadata` objects that match the specified change type. - */ - def getLatestRevisionsByChangeType(changeType: String, limit: Int): Future[Seq[RelationshipRevisionMetadata]] - - /** - * Retrieves a chain of revisions for a specific haplogroup relationship starting from the specified revision. - * - * @param relationshipId The unique identifier of the haplogroup relationship whose revision chain is to be retrieved. - * @param revisionId The unique identifier of the starting revision for which the chain is to be retrieved. - * @return A future containing a sequence of `RelationshipRevisionMetadata` representing the chain of revisions, - * starting from the specified revision and including all subsequent revisions. - */ - def getRevisionChain(relationshipId: Int, revisionId: Int): Future[Seq[RelationshipRevisionMetadata]] - - /** - * Retrieves the latest revision ID for a given haplogroup relationship ID. - * - * @param haplogroupRelationshipId The unique identifier of the haplogroup relationship. - * @return A Future containing an Option with the latest revision ID, or None if no revisions exist. - */ - def getLatestRevisionId(haplogroupRelationshipId: Int): Future[Option[Int]] - - /** - * Atomically generates the next revision ID for a haplogroup relationship and inserts new metadata. - * This method uses a pessimistic lock to prevent race conditions during concurrent updates. - * - * @param haplogroupRelationshipId The unique identifier of the haplogroup relationship. - * @param author The author of the revision. - * @param timestamp The timestamp of the revision. - * @param comment A descriptive comment for the revision. - * @param changeType The type of change (e.g., "CREATE", "UPDATE", "DELETE"). - * @return A Future containing the newly created RelationshipRevisionMetadata object. - */ - def addNextRelationshipRevisionMetadata( - haplogroupRelationshipId: Int, - author: String, - timestamp: LocalDateTime, - comment: String, - changeType: String - ): Future[RelationshipRevisionMetadata] -} - -class HaplogroupRevisionMetadataRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with HaplogroupRevisionMetadataRepository { - - import models.dal.DatabaseSchema.* - import models.dal.DatabaseSchema.domain.haplogroups.{haplogroupRelationships, relationshipRevisionMetadata} - import models.dal.MyPostgresProfile.api.* - - override def addRelationshipRevisionMetadata(metadata: RelationshipRevisionMetadata): Future[Int] = { - runQuery(relationshipRevisionMetadata += metadata) - } - - override def getRelationshipRevisionMetadata(relationshipId: Int, revisionId: Int): Future[Option[RelationshipRevisionMetadata]] = { - val query = relationshipRevisionMetadata - .filter(m => m.haplogroup_relationship_id === relationshipId && m.revisionId === revisionId) - .result - .headOption - - runQuery(query) - } - - override def getRelationshipRevisionHistory(relationshipId: Int): Future[Seq[(HaplogroupRelationship, RelationshipRevisionMetadata)]] = { - val query = for { - rel <- haplogroupRelationships if rel.haplogroupRelationshipId === relationshipId - metadata <- relationshipRevisionMetadata if metadata.haplogroup_relationship_id === rel.haplogroupRelationshipId - } yield (rel, metadata) - - runQuery(query.sortBy(_._2.timestamp.desc).result) - } - - override def getRevisionsByAuthor(author: String): Future[Seq[RelationshipRevisionMetadata]] = { - val query = relationshipRevisionMetadata - .filter(_.author === author) - .sortBy(_.timestamp.desc) - .result - - runQuery(query) - } - - override def getRevisionsBetweenDates(startDate: LocalDateTime, endDate: LocalDateTime): Future[Seq[RelationshipRevisionMetadata]] = { - val query = relationshipRevisionMetadata - .filter(m => m.timestamp >= startDate && m.timestamp <= endDate) - .sortBy(_.timestamp.desc) - .result - - runQuery(query) - } - - override def updateRevisionComment(relationshipId: Int, revisionId: Int, newComment: String): Future[Int] = { - val query = relationshipRevisionMetadata - .filter(m => m.haplogroup_relationship_id === relationshipId && m.revisionId === revisionId) - .map(_.comment) - .update(newComment) - - runQuery(query) - } - - override def getLatestRevisionsByChangeType(changeType: String, limit: Int): Future[Seq[RelationshipRevisionMetadata]] = { - { - val query = relationshipRevisionMetadata - .filter(_.changeType === changeType) - .sortBy(_.timestamp.desc) - .take(limit) - .result - - runQuery(query) - } - } - - override def getRevisionChain(relationshipId: Int, revisionId: Int): Future[Seq[RelationshipRevisionMetadata]] = { - def recursiveChain( - currentRevisionId: Int, - chain: Seq[RelationshipRevisionMetadata] = Seq.empty - ): DBIO[Seq[RelationshipRevisionMetadata]] = { - val query = relationshipRevisionMetadata - .filter(m => - m.haplogroup_relationship_id === relationshipId && - m.revisionId === currentRevisionId - ) - .result.headOption - - query.flatMap { - case Some(metadata) => - metadata.previousRevisionId match { - case Some(prevId) => recursiveChain(prevId, chain :+ metadata) - case None => DBIO.successful(chain :+ metadata) - } - case None => DBIO.successful(chain) - } - } - - runQuery(recursiveChain(revisionId)) - } - - override def getLatestRevisionId(haplogroupRelationshipId: Int): Future[Option[Int]] = { - val query = relationshipRevisionMetadata - .filter(_.haplogroup_relationship_id === haplogroupRelationshipId) - .map(_.revisionId) - .max - .result - - runQuery(query) - } - - override def addNextRelationshipRevisionMetadata( - haplogroupRelationshipId: Int, - author: String, - timestamp: LocalDateTime, - comment: String, - changeType: String - ): Future[RelationshipRevisionMetadata] = { - val action = (for { - latestRevisionIdOption <- relationshipRevisionMetadata - .filter(_.haplogroup_relationship_id === haplogroupRelationshipId) - .sortBy(_.revisionId.desc) - .take(1) - .forUpdate // Pessimistic lock - .map(_.revisionId) - .result - .headOption - - nextRevisionId = latestRevisionIdOption.map(_ + 1).getOrElse(1) - previousRevisionIdValue = latestRevisionIdOption - - newMetadata = RelationshipRevisionMetadata( - haplogroup_relationship_id = haplogroupRelationshipId, - revisionId = nextRevisionId, - previousRevisionId = previousRevisionIdValue, - author = author, - timestamp = timestamp, - comment = comment, - changeType = changeType - ) - _ <- relationshipRevisionMetadata += newMetadata - } yield newMetadata).transactionally - - runQuery(action) - } -} diff --git a/app/repositories/HaplogroupRevisionRepository.scala b/app/repositories/HaplogroupRevisionRepository.scala deleted file mode 100644 index 01e8e243..00000000 --- a/app/repositories/HaplogroupRevisionRepository.scala +++ /dev/null @@ -1,194 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.HaplogroupType -import models.domain.haplogroups.Haplogroup -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository interface for managing and querying haplogroup data and revisions. - */ -trait HaplogroupRevisionRepository { - /** - * Retrieves the haplogroup information associated with a specific revision. - * - * @param haplogroupId The unique identifier of the haplogroup. - * @param revisionId The unique identifier of the revision. - * @return A Future containing an Option of the Haplogroup if the specified revision exists, or None otherwise. - */ - def getHaplogroupAtRevision(haplogroupId: Int, revisionId: Int): Future[Option[Haplogroup]] - - /** - * Retrieves the latest revision of the specified haplogroup. - * - * @param haplogroupId The unique identifier of the haplogroup for which the latest revision is to be fetched. - * @return A Future containing an Option of Haplogroup. It will be Some(Haplogroup) if a revision exists for the provided haplogroupId, otherwise None. - */ - def getLatestRevision(haplogroupId: Int): Future[Option[Haplogroup]] - - /** - * Retrieves the complete revision history of a haplogroup identified by its unique identifier. - * - * @param haplogroupId The unique identifier of the haplogroup whose revision history is to be fetched. - * @return A Future containing a sequence of Haplogroup instances representing the revision history. - */ - def getRevisionHistory(haplogroupId: Int): Future[Seq[Haplogroup]] - - /** - * Creates a new revision for the specified haplogroup. The provided haplogroup must have all necessary - * information such as its type, source, and validity periods. This method generates a new revision entry - * and persists it in the database, returning the unique identifier of the newly created revision. - * - * @param haplogroup The haplogroup entity containing information required to create a new revision. - * @return A Future containing the unique identifier of the newly created revision. - */ - def createNewRevision(haplogroup: Haplogroup): Future[Int] - - /** - * Retrieves the child haplogroups at a specific revision. - * - * @param haplogroupId The unique identifier of the parent haplogroup. - * @param revisionId The unique identifier of the revision to fetch children for. - * @return A Future containing a sequence of child Haplogroup instances at the specified revision. - */ - def getChildrenAtRevision(haplogroupId: Int, revisionId: Int): Future[Seq[Haplogroup]] - - /** - * Retrieves the ancestry of a haplogroup at a specific revision. - * This includes all ancestor haplogroups leading up to the specified revision. - * - * @param haplogroupId The unique identifier of the haplogroup whose ancestry is to be fetched. - * @param revisionId The unique identifier of the revision for which ancestry is to be retrieved. - * @return A Future containing a sequence of Haplogroup instances representing the ancestry at the specified revision. - */ - def getAncestryAtRevision(haplogroupId: Int, revisionId: Int): Future[Seq[Haplogroup]] - - /** - * Counts the number of haplogroups of a specific type. - * - * @param haplogroupType The type of haplogroup to be counted (e.g., paternal or maternal lineage). - * @return A Future containing the count of haplogroups of the specified type as an integer. - */ - def countByType(haplogroupType: HaplogroupType): Future[Int] -} - -class HaplogroupRevisionRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with HaplogroupRevisionRepository { - - import models.dal.DatabaseSchema.* - import models.dal.DatabaseSchema.domain.haplogroups.{haplogroupRelationships, haplogroups} - import models.dal.MyPostgresProfile.api.* - - - override def getHaplogroupAtRevision(haplogroupId: Int, revisionId: Int): Future[Option[Haplogroup]] = { - val query = haplogroups - .filter(h => h.haplogroupId === haplogroupId && h.revisionId === revisionId) - .result.headOption - - runQuery(query) - } - - override def getLatestRevision(haplogroupId: Int): Future[Option[Haplogroup]] = { - val query = haplogroups - .filter(_.haplogroupId === haplogroupId) - .sortBy(_.revisionId.desc) - .take(1) - .result.headOption - - runQuery(query) - } - - override def getRevisionHistory(haplogroupId: Int): Future[Seq[Haplogroup]] = { - val query = haplogroups - .filter(_.haplogroupId === haplogroupId) - .sortBy(_.revisionId.desc) - .result - - runQuery(query) - } - - override def createNewRevision(haplogroup: Haplogroup): Future[Int] = { - val nextRevisionQuery = haplogroups - .filter(_.haplogroupId === haplogroup.id) - .map(_.revisionId) - .max - .getOrElse(1) - .result - .map(_ + 1) - - runQuery(nextRevisionQuery.flatMap { nextRev => - (haplogroups returning haplogroups.map(_.haplogroupId)) += haplogroup.copy(revisionId = nextRev) - }) - } - - override def getChildrenAtRevision(haplogroupId: Int, revisionId: Int): Future[Seq[Haplogroup]] = { - val query = for { - rel <- haplogroupRelationships if rel.parentHaplogroupId === haplogroupId && - rel.revisionId === revisionId - child <- haplogroups if child.haplogroupId === rel.childHaplogroupId && - child.revisionId === revisionId - } yield child - - runQuery(query.result) - } - - override def getAncestryAtRevision(haplogroupId: Int, revisionId: Int): Future[Seq[Haplogroup]] = { - import slick.jdbc.GetResult - - implicit val getHaplogroupResult: GetResult[Haplogroup] = GetResult(r => - Haplogroup( - id = r.nextIntOption(), - name = r.nextString(), - lineage = r.nextStringOption(), - description = r.nextStringOption(), - haplogroupType = models.HaplogroupType.fromString(r.nextString()) - .getOrElse(throw new IllegalArgumentException("Invalid haplogroup type")), - revisionId = r.nextInt(), - source = r.nextString(), - confidenceLevel = r.nextString(), - validFrom = r.nextTimestampOption().map(_.toLocalDateTime).orNull, - validUntil = r.nextTimestampOption().map(_.toLocalDateTime) - ) - ) - - val query = sql""" - WITH RECURSIVE ancestor_tree AS ( - SELECT h.*, 1 as level - FROM tree.haplogroup_relationship hr - JOIN tree.haplogroup h ON h.haplogroup_id = hr.parent_haplogroup_id - AND h.revision_id = $revisionId - WHERE hr.child_haplogroup_id = $haplogroupId - AND hr.revision_id = $revisionId - - UNION - - SELECT h.*, at.level + 1 - FROM tree.haplogroup_relationship hr - JOIN tree.haplogroup h ON h.haplogroup_id = hr.parent_haplogroup_id - AND h.revision_id = $revisionId - JOIN ancestor_tree at ON hr.child_haplogroup_id = at.haplogroup_id - WHERE hr.revision_id = $revisionId - ) - SELECT haplogroup_id, name, lineage, description, haplogroup_type, - revision_id, source, confidence_level, valid_from, valid_until - FROM ancestor_tree - ORDER BY level DESC - """.as[Haplogroup] - - db.run(query) - } - - override def countByType(haplogroupType: HaplogroupType): Future[Int] = { - val query = haplogroups - .filter(_.haplogroupType === haplogroupType) - .length - .result - - runQuery(query) - } -} \ No newline at end of file diff --git a/app/repositories/HaplogroupVariantMetadataRepository.scala b/app/repositories/HaplogroupVariantMetadataRepository.scala deleted file mode 100644 index 73d7f6cd..00000000 --- a/app/repositories/HaplogroupVariantMetadataRepository.scala +++ /dev/null @@ -1,263 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.domain.haplogroups.{HaplogroupVariant, HaplogroupVariantMetadata} -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -/** - * A repository trait for managing metadata associated with haplogroup variant revisions. - * Provides methods for creating, retrieving, updating, and querying haplogroup variant metadata. - */ -trait HaplogroupVariantMetadataRepository { - /** - * Adds metadata for a haplogroup variant revision to the repository. - * - * @param metadata An instance of HaplogroupVariantMetadata containing details such as - * variant ID, revision ID, author, timestamp, comment, change type, - * and optional previous revision ID. - * @return A Future containing the number of rows affected by the addition, typically 1 if successful. - */ - def addVariantRevisionMetadata(metadata: HaplogroupVariantMetadata): Future[Int] - - /** - * Retrieves the metadata for a specific revision of a haplogroup variant. - * - * @param variantId The unique identifier of the haplogroup variant. - * @param revisionId The unique identifier of the specific revision of the haplogroup variant. - * @return A Future containing an Option with the metadata for the specified variant revision. - * If no metadata is found, the Option will be None. - */ - def getVariantRevisionMetadata(variantId: Int, revisionId: Int): Future[Option[HaplogroupVariantMetadata]] - - /** - * Retrieves the revision history for a specific haplogroup variant. - * - * @param variantId The unique identifier of the haplogroup variant whose revision history is being requested. - * @return A Future containing a sequence of tuples, where each tuple consists of a HaplogroupVariant and its associated HaplogroupVariantMetadata. - * The sequence represents the revision history for the specified haplogroup variant. - */ - def getVariantRevisionHistory(variantId: Int): Future[Seq[(HaplogroupVariant, HaplogroupVariantMetadata)]] - - /** - * Retrieves all haplogroup variant metadata revisions created by a specific author. - * - * @param author The name of the author whose revisions need to be retrieved. - * @return A Future containing a sequence of HaplogroupVariantMetadata instances - * representing the revisions authored by the specified individual. - */ - def getVariantRevisionsByAuthor(author: String): Future[Seq[HaplogroupVariantMetadata]] - - /** - * Retrieves a sequence of haplogroup variant metadata revisions that were created within the specified date range. - * - * @param startDate The start date and time of the range within which revisions should be retrieved. - * @param endDate The end date and time of the range within which revisions should be retrieved. - * @return A Future containing a sequence of HaplogroupVariantMetadata instances representing the revisions - * created within the specified date range. - */ - def getVariantRevisionsBetweenDates(startDate: LocalDateTime, endDate: LocalDateTime): Future[Seq[HaplogroupVariantMetadata]] - - /** - * Updates the comment for a specific revision of a haplogroup variant. - * - * @param variantId The unique identifier of the haplogroup variant. - * @param revisionId The unique identifier of the revision whose comment is to be updated. - * @param newComment The new comment to be associated with the specified revision. - * @return A Future containing the number of rows affected by the update operation, typically 1 if successful. - */ - def updateVariantRevisionComment(variantId: Int, revisionId: Int, newComment: String): Future[Int] - - /** - * Retrieves the latest haplogroup variant metadata revisions for a specific change type. - * - * @param changeType The type of change (e.g., "CREATE", "UPDATE", "DELETE") used to filter the revisions. - * @param limit The maximum number of revisions to retrieve. Default is 10. - * @return A Future containing a sequence of HaplogroupVariantMetadata instances representing the latest revisions - * that match the specified change type, up to the specified limit. - */ - def getLatestVariantRevisionsByChangeType(changeType: String, limit: Int = 10): Future[Seq[HaplogroupVariantMetadata]] - - /** - * Retrieves the revision chain for a specific variant revision, starting from the given revision ID - * and following the chain of previous revisions until the first revision is reached. - * - * @param variantId The unique identifier of the haplogroup variant. - * @param revisionId The unique identifier of the revision to start the chain from. - * @return A Future containing a sequence of HaplogroupVariantMetadata instances, representing the - * revision chain of the specified variant starting from the specified revision. - */ - def getVariantRevisionChain(variantId: Int, revisionId: Int): Future[Seq[HaplogroupVariantMetadata]] - - /** - * Retrieves the latest revision ID for a given haplogroup variant ID. - * - * @param haplogroupVariantId The unique identifier of the haplogroup variant. - * @return A Future containing an Option with the latest revision ID, or None if no revisions exist. - */ - def getLatestRevisionId(haplogroupVariantId: Int): Future[Option[Int]] - - def addNextVariantRevisionMetadata( - haplogroupVariantId: Int, - author: String, - comment: String, - changeType: String - ): Future[HaplogroupVariantMetadata] -} - -class HaplogroupVariantMetadataRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with HaplogroupVariantMetadataRepository { - - import models.dal.DatabaseSchema.* - import models.dal.DatabaseSchema.domain.haplogroups.{haplogroupVariantMetadata, haplogroupVariants} - import models.dal.MyPostgresProfile.api.* - - override def addVariantRevisionMetadata(metadata: HaplogroupVariantMetadata): Future[Int] = { - val insertion = haplogroupVariantMetadata += metadata - runQuery(insertion) - } - - override def getVariantRevisionMetadata(variantId: Int, revisionId: Int): Future[Option[HaplogroupVariantMetadata]] = { - val query = haplogroupVariantMetadata - .filter(m => m.haplogroup_variant_id === variantId && m.revision_id === revisionId) - .result - .headOption - - runQuery(query) - } - - override def getVariantRevisionHistory(variantId: Int): Future[Seq[(HaplogroupVariant, HaplogroupVariantMetadata)]] = { - val query = for { - variant <- haplogroupVariants if variant.haplogroupVariantId === variantId - metadata <- haplogroupVariantMetadata if metadata.haplogroup_variant_id === variant.haplogroupVariantId - } yield (variant, metadata) - - runQuery(query.sortBy(_._2.timestamp.desc).result) - } - - override def getVariantRevisionsByAuthor(author: String): Future[Seq[HaplogroupVariantMetadata]] = { - val query = haplogroupVariantMetadata - .filter(_.author === author) - .sortBy(_.timestamp.desc) - .result - - runQuery(query) - } - - override def getVariantRevisionsBetweenDates( - startDate: LocalDateTime, - endDate: LocalDateTime - ): Future[Seq[HaplogroupVariantMetadata]] = { - val query = haplogroupVariantMetadata - .filter(m => m.timestamp >= startDate && m.timestamp <= endDate) - .sortBy(_.timestamp.desc) - .result - - runQuery(query) - } - - override def updateVariantRevisionComment( - variantId: Int, - revisionId: Int, - newComment: String - ): Future[Int] = { - val query = haplogroupVariantMetadata - .filter(m => m.haplogroup_variant_id === variantId && m.revision_id === revisionId) - .map(_.comment) - .update(newComment) - - runQuery(query) - } - - override def getLatestVariantRevisionsByChangeType( - changeType: String, - limit: Int = 10 - ): Future[Seq[HaplogroupVariantMetadata]] = { - val query = haplogroupVariantMetadata - .filter(_.change_type === changeType) - .sortBy(_.timestamp.desc) - .take(limit) - .result - - runQuery(query) - } - - override def getVariantRevisionChain( - variantId: Int, - revisionId: Int - ): Future[Seq[HaplogroupVariantMetadata]] = { - def recursiveChain( - currentRevisionId: Int, - chain: Seq[HaplogroupVariantMetadata] = Seq.empty - ): DBIO[Seq[HaplogroupVariantMetadata]] = { - val query = haplogroupVariantMetadata - .filter(m => - m.haplogroup_variant_id === variantId && - m.revision_id === currentRevisionId - ) - .result.headOption - - query.flatMap { - case Some(metadata) => - metadata.previous_revision_id match { - case Some(prevId) => recursiveChain(prevId, chain :+ metadata) - case None => DBIO.successful(chain :+ metadata) - } - case None => DBIO.successful(chain) - } - } - - runQuery(recursiveChain(revisionId)) - } - - override def getLatestRevisionId(haplogroupVariantId: Int): Future[Option[Int]] = { - val query = haplogroupVariantMetadata - .filter(_.haplogroup_variant_id === haplogroupVariantId) - .map(_.revision_id) - .max - .result - - runQuery(query) - } - - override def addNextVariantRevisionMetadata( - haplogroupVariantId: Int, - author: String, - comment: String, - changeType: String - ): Future[HaplogroupVariantMetadata] = { - val action = (for { - // Select the maximum revision_id for the given haplogroup_variant_id, and lock the table for update - // Using "forUpdate" to prevent race conditions during concurrent merges - latestRevisionIdOption <- haplogroupVariantMetadata - .filter(_.haplogroup_variant_id === haplogroupVariantId) - .sortBy(_.revision_id.desc) // Sort by revision_id descending - .take(1) // Take only the latest one - .forUpdate // Apply forUpdate to this subquery - .map(_.revision_id) // Map to get the revision_id - .result - .headOption // Get the single result - - nextRevisionId = latestRevisionIdOption.map(_ + 1).getOrElse(1) - previousRevisionIdValue = latestRevisionIdOption // This is the actual previous_revision_id - - newMetadata = HaplogroupVariantMetadata( - haplogroup_variant_id = haplogroupVariantId, - revision_id = nextRevisionId, - previous_revision_id = previousRevisionIdValue, // Use the fetched latestRevisionIdOption - author = author, - comment = comment, - timestamp = LocalDateTime.now(), - change_type = changeType - ) - _ <- haplogroupVariantMetadata += newMetadata - } yield newMetadata).transactionally // Ensure the entire block runs as a single transaction - - runQuery(action) - } -} \ No newline at end of file diff --git a/app/repositories/HaplogroupVariantRepository.scala b/app/repositories/HaplogroupVariantRepository.scala deleted file mode 100644 index b1843374..00000000 --- a/app/repositories/HaplogroupVariantRepository.scala +++ /dev/null @@ -1,367 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.* -import models.domain.genomics.{MutationType, NamingStatus, VariantV2} -import models.domain.haplogroups.{Haplogroup, HaplogroupVariant} -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Trait for managing and querying relationships between haplogroups and genetic variants. - */ -trait HaplogroupVariantRepository { - /** - * Finds and retrieves a list of variants based on the given query string. - * - * @param query The search query used to filter and retrieve the relevant variants. - * @return A future containing a sequence of variants that match the provided query. - */ - def findVariants(query: String): Future[Seq[VariantV2]] - - /** - * Retrieves the list of variants associated with a given haplogroup. - * - * @param haplogroupId the identifier of the haplogroup for which variants are to be retrieved - * @return a future containing a sequence of VariantV2 objects - */ - def getHaplogroupVariants(haplogroupId: Int): Future[Seq[VariantV2]] - - def countHaplogroupVariants(haplogroupId: Long): Future[Int] - - /** - * Retrieves a list of haplogroups associated with the specified variant. - * - * @param variantId The unique identifier of the variant for which haplogroups are to be retrieved. - * @return A Future containing a sequence of Haplogroup objects associated with the specified variant. - */ - def getHaplogroupsByVariant(variantId: Int): Future[Seq[Haplogroup]] - - /** - * Associates a genetic variant with a specified haplogroup. - * - * @param haplogroupId The unique identifier of the haplogroup to which the variant will be added. - * @param variantId The unique identifier of the genetic variant to add to the haplogroup. - * @return A Future containing the number of records updated or affected (typically 1 if successful, 0 otherwise). - */ - def addVariantToHaplogroup(haplogroupId: Int, variantId: Int): Future[Int] - - /** - * Retrieves the `haplogroup_variant_id`s for a given haplogroup. - * - * @param haplogroupId The unique identifier of the haplogroup. - * @return A Future containing a sequence of `haplogroup_variant_id`s. - */ - def getHaplogroupVariantIds(haplogroupId: Int): Future[Seq[Int]] - - /** - * Removes a specified variant from a given haplogroup. - * - * @param haplogroupId The unique identifier of the haplogroup from which the variant will be removed. - * @param variantId The unique identifier of the variant to be removed from the haplogroup. - * @return A future containing the number of records affected by the operation. - */ - def removeVariantFromHaplogroup(haplogroupId: Int, variantId: Int): Future[Int] - - /** - * Finds haplogroups associated with a given defining variant. - * - * @param variantId The identifier of the defining variant used to locate haplogroups. - * @param haplogroupType The type of haplogroup to be returned, indicating the classification context. - * @return A Future containing a sequence of haplogroups that match the given defining variant and type. - */ - def findHaplogroupsByDefiningVariant(variantId: String, haplogroupType: HaplogroupType): Future[Seq[Haplogroup]] - - /** - * Retrieves variants associated with a haplogroup by its name. - * - * @param haplogroupName The name of the haplogroup (e.g., "R-M269") - * @return A Future containing a sequence of VariantV2 for the haplogroup - */ - def getVariantsByHaplogroupName(haplogroupName: String): Future[Seq[VariantV2]] - - /** - * Retrieves variants for multiple haplogroups in a single query. - * - * @param haplogroupIds The sequence of haplogroup IDs to retrieve variants for. - * @return A Future containing a sequence of (haplogroupId, VariantV2) tuples. - */ - def getVariantsForHaplogroups(haplogroupIds: Seq[Int]): Future[Seq[(Int, VariantV2)]] - - /** - * Bulk associate variants with haplogroups in a single operation. - * Uses ON CONFLICT to handle duplicates gracefully. - * - * @param associations Sequence of (haplogroupId, variantId) tuples - * @return A Future containing the sequence of haplogroup_variant_ids created or found - */ - def bulkAddVariantsToHaplogroups(associations: Seq[(Int, Int)]): Future[Seq[Int]] - - /** - * Bulk remove variant associations from a haplogroup. - * - * @param haplogroupId The haplogroup to remove variants from - * @param variantIds The variant IDs to remove - * @return A Future containing the number of associations removed - */ - def bulkRemoveVariantsFromHaplogroup(haplogroupId: Int, variantIds: Seq[Int]): Future[Int] - - /** - * Gets the variant IDs (not haplogroup_variant_ids) for a haplogroup. - * - * @param haplogroupId The haplogroup ID - * @return A Future containing the sequence of variant IDs - */ - def getVariantIdsForHaplogroup(haplogroupId: Int): Future[Seq[Int]] - - /** - * Gets variant names by their IDs for display purposes. - * - * @param variantIds The set of variant IDs to look up - * @return A Future containing a map of variant ID -> canonical name - */ - def getVariantNamesByIds(variantIds: Set[Int]): Future[Map[Int, String]] -} - -class HaplogroupVariantRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with HaplogroupVariantRepository { - - import models.dal.DatabaseSchema.domain.haplogroups.{haplogroupVariants, haplogroups} - import models.dal.MyPostgresProfile.api.* - import models.dal.domain.genomics.VariantV2Table - import play.api.libs.json.Json - import slick.jdbc.GetResult - - private val variantsV2 = TableQuery[VariantV2Table] - - // GetResult for raw SQL queries - private implicit val variantV2GetResult: GetResult[VariantV2] = GetResult { r => - VariantV2( - variantId = Some(r.nextInt()), - canonicalName = r.nextStringOption(), - mutationType = MutationType.fromStringOrDefault(r.nextString()), - namingStatus = NamingStatus.fromStringOrDefault(r.nextString()), - aliases = Json.parse(r.nextString()), - coordinates = Json.parse(r.nextString()), - definingHaplogroupId = r.nextIntOption(), - evidence = Json.parse(r.nextString()), - primers = Json.parse(r.nextString()), - notes = r.nextStringOption(), - createdAt = r.nextTimestamp().toInstant, - updatedAt = r.nextTimestamp().toInstant - ) - } - - override def findVariants(query: String): Future[Seq[VariantV2]] = { - val normalizedQuery = query.trim.toLowerCase - val upperQuery = normalizedQuery.toUpperCase - val searchPattern = s"%$upperQuery%" - - // Handle different query formats - if (normalizedQuery.startsWith("rs")) { - // Search rs_ids in aliases - val rsQuery = sql""" - SELECT * FROM variant_v2 - WHERE aliases->'rs_ids' ?? $normalizedQuery - """.as[VariantV2] - runQuery(rsQuery) - } else if (normalizedQuery.contains(":")) { - // Coordinate-based search (contig:position or contig:position:ref:alt) - val parts = normalizedQuery.split(":") - parts.length match { - case 2 => - val contig = parts(0) - val position = parts(1).toIntOption.getOrElse(0) - val coordQuery = sql""" - SELECT * FROM variant_v2 - WHERE EXISTS ( - SELECT 1 FROM jsonb_each(coordinates) AS c(ref_genome, coords) - WHERE coords->>'contig' ILIKE $contig - AND (coords->>'position')::int = $position - ) - """.as[VariantV2] - runQuery(coordQuery) - case 4 => - val contig = parts(0) - val position = parts(1).toIntOption.getOrElse(0) - val ref = parts(2).toUpperCase - val alt = parts(3).toUpperCase - val coordQuery = sql""" - SELECT * FROM variant_v2 - WHERE EXISTS ( - SELECT 1 FROM jsonb_each(coordinates) AS c(ref_genome, coords) - WHERE coords->>'contig' ILIKE $contig - AND (coords->>'position')::int = $position - AND UPPER(coords->>'ref') = $ref - AND UPPER(coords->>'alt') = $alt - ) - """.as[VariantV2] - runQuery(coordQuery) - case _ => - Future.successful(Seq.empty) - } - } else { - // Search by canonical name or aliases - val nameQuery = sql""" - SELECT * FROM variant_v2 - WHERE UPPER(canonical_name) LIKE $searchPattern - OR aliases->'common_names' ?? $normalizedQuery - OR EXISTS ( - SELECT 1 FROM jsonb_array_elements_text(aliases->'common_names') AS name - WHERE UPPER(name) LIKE $searchPattern - ) - LIMIT 100 - """.as[VariantV2] - runQuery(nameQuery) - } - } - - override def getHaplogroupVariants(haplogroupId: Int): Future[Seq[VariantV2]] = { - val query = for { - hv <- haplogroupVariants if hv.haplogroupId === haplogroupId - v <- variantsV2 if v.variantId === hv.variantId - } yield v - - runQuery(query.result) - } - - def countHaplogroupVariants(haplogroupId: Long): Future[Int] = { - val q = for { - hv <- haplogroupVariants if hv.haplogroupId === haplogroupId.toInt - v <- variantsV2 if hv.variantId === v.variantId - } yield v.canonicalName - - runQuery(q.distinct.length.result) - } - - override def getHaplogroupsByVariant(variantId: Int): Future[Seq[Haplogroup]] = { - val query = for { - hv <- haplogroupVariants if hv.variantId === variantId - haplogroup <- haplogroups if haplogroup.haplogroupId === hv.haplogroupId - } yield haplogroup - - runQuery(query.result) - } - - override def addVariantToHaplogroup(haplogroupId: Int, variantId: Int): Future[Int] = { - val insertAction = sql""" - INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) - VALUES ($haplogroupId, $variantId) - ON CONFLICT (haplogroup_id, variant_id) DO UPDATE SET haplogroup_id = EXCLUDED.haplogroup_id -- No actual update needed, just to trigger RETURNING - RETURNING haplogroup_variant_id - """.as[Int].head - - runQuery(insertAction) - } - - override def getHaplogroupVariantIds(haplogroupId: Int): Future[Seq[Int]] = { - val query = haplogroupVariants.filter(_.haplogroupId === haplogroupId).map(_.haplogroupVariantId) - runQuery(query.result) - } - - def removeVariantFromHaplogroup(haplogroupId: Int, variantId: Int): Future[Int] = { - val query = haplogroupVariants - .filter(hv => hv.haplogroupId === haplogroupId && hv.variantId === variantId) - .delete - - runQuery(query) - } - - override def findHaplogroupsByDefiningVariant(variantId: String, haplogroupType: HaplogroupType): Future[Seq[Haplogroup]] = { - // Search by canonical name or variant ID - val variantIdOpt = variantId.toIntOption - - val query = variantIdOpt match { - case Some(vid) => - for { - variant <- variantsV2 if variant.variantId === vid || variant.canonicalName === variantId - haplogroupVariant <- haplogroupVariants if haplogroupVariant.variantId === variant.variantId - haplogroup <- haplogroups if - haplogroup.haplogroupId === haplogroupVariant.haplogroupId && - haplogroup.haplogroupType === haplogroupType - } yield haplogroup - case None => - for { - variant <- variantsV2 if variant.canonicalName === variantId - haplogroupVariant <- haplogroupVariants if haplogroupVariant.variantId === variant.variantId - haplogroup <- haplogroups if - haplogroup.haplogroupId === haplogroupVariant.haplogroupId && - haplogroup.haplogroupType === haplogroupType - } yield haplogroup - } - - runQuery(query.result) - } - - override def getVariantsByHaplogroupName(haplogroupName: String): Future[Seq[VariantV2]] = { - val query = for { - hg <- haplogroups if hg.name === haplogroupName - hv <- haplogroupVariants if hv.haplogroupId === hg.haplogroupId - v <- variantsV2 if v.variantId === hv.variantId - } yield v - - runQuery(query.result) - } - - override def getVariantsForHaplogroups(haplogroupIds: Seq[Int]): Future[Seq[(Int, VariantV2)]] = { - val query = for { - hv <- haplogroupVariants if hv.haplogroupId.inSet(haplogroupIds) - v <- variantsV2 if v.variantId === hv.variantId - } yield (hv.haplogroupId, v) - - runQuery(query.result) - } - - override def bulkAddVariantsToHaplogroups(associations: Seq[(Int, Int)]): Future[Seq[Int]] = { - if (associations.isEmpty) return Future.successful(Seq.empty) - - // Build values clause for bulk insert - val valuesClause = associations.map { case (hgId, varId) => - s"($hgId, $varId)" - }.mkString(", ") - - val insertQuery = sql""" - INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) - VALUES #$valuesClause - ON CONFLICT (haplogroup_id, variant_id) DO UPDATE - SET haplogroup_id = EXCLUDED.haplogroup_id - RETURNING haplogroup_variant_id - """.as[Int] - - runQuery(insertQuery) - } - - override def bulkRemoveVariantsFromHaplogroup(haplogroupId: Int, variantIds: Seq[Int]): Future[Int] = { - if (variantIds.isEmpty) return Future.successful(0) - - val query = haplogroupVariants - .filter(hv => hv.haplogroupId === haplogroupId && hv.variantId.inSet(variantIds)) - .delete - - runQuery(query) - } - - override def getVariantIdsForHaplogroup(haplogroupId: Int): Future[Seq[Int]] = { - val query = haplogroupVariants.filter(_.haplogroupId === haplogroupId).map(_.variantId) - runQuery(query.result) - } - - override def getVariantNamesByIds(variantIds: Set[Int]): Future[Map[Int, String]] = { - if (variantIds.isEmpty) return Future.successful(Map.empty) - - val query = variantsV2 - .filter(_.variantId.inSet(variantIds)) - .map(v => (v.variantId, v.canonicalName)) - .result - - runQuery(query).map { results => - results.collect { - case (id, Some(name)) => id -> name - }.toMap - } - } -} \ No newline at end of file diff --git a/app/repositories/InstrumentObservationRepository.scala b/app/repositories/InstrumentObservationRepository.scala deleted file mode 100644 index 3922ffc2..00000000 --- a/app/repositories/InstrumentObservationRepository.scala +++ /dev/null @@ -1,74 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.genomics.{InstrumentObservation, ObservationConfidence} -import play.api.db.slick.DatabaseConfigProvider -import slick.ast.BaseTypedType - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -trait InstrumentObservationRepository { - def create(observation: InstrumentObservation): Future[InstrumentObservation] - def findByAtUri(atUri: String): Future[Option[InstrumentObservation]] - def findByInstrumentId(instrumentId: String): Future[Seq[InstrumentObservation]] - def findByLabName(labName: String): Future[Seq[InstrumentObservation]] - def findByBiosampleRef(biosampleRef: String): Future[Seq[InstrumentObservation]] - def update(observation: InstrumentObservation): Future[Boolean] - def deleteByAtUri(atUri: String): Future[Boolean] -} - -@Singleton -class InstrumentObservationRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with InstrumentObservationRepository { - - import models.dal.MyPostgresProfile.api.* - - implicit private val confidenceMapper: BaseTypedType[ObservationConfidence] = - MappedColumnType.base[ObservationConfidence, String](_.dbValue, ObservationConfidence.fromString) - - private val observations = DatabaseSchema.domain.genomics.instrumentObservations - - override def create(observation: InstrumentObservation): Future[InstrumentObservation] = { - db.run( - (observations returning observations.map(_.id) - into ((o, id) => o.copy(id = Some(id)))) += observation - ) - } - - override def findByAtUri(atUri: String): Future[Option[InstrumentObservation]] = { - db.run(observations.filter(_.atUri === atUri).result.headOption) - } - - override def findByInstrumentId(instrumentId: String): Future[Seq[InstrumentObservation]] = { - db.run(observations.filter(_.instrumentId === instrumentId).result) - } - - override def findByLabName(labName: String): Future[Seq[InstrumentObservation]] = { - db.run(observations.filter(_.labName === labName).result) - } - - override def findByBiosampleRef(biosampleRef: String): Future[Seq[InstrumentObservation]] = { - db.run(observations.filter(_.biosampleRef === biosampleRef).result) - } - - override def update(observation: InstrumentObservation): Future[Boolean] = { - db.run( - observations.filter(_.atUri === observation.atUri) - .map(o => (o.atCid, o.instrumentId, o.labName, o.biosampleRef, o.sequenceRunRef, - o.platform, o.instrumentModel, o.flowcellId, o.runDate, o.confidence, o.updatedAt)) - .update((observation.atCid, observation.instrumentId, observation.labName, - observation.biosampleRef, observation.sequenceRunRef, observation.platform, - observation.instrumentModel, observation.flowcellId, observation.runDate, - observation.confidence, Some(LocalDateTime.now()))) - ).map(_ > 0) - } - - override def deleteByAtUri(atUri: String): Future[Boolean] = { - db.run(observations.filter(_.atUri === atUri).delete.map(_ > 0)) - } -} diff --git a/app/repositories/InstrumentProposalRepository.scala b/app/repositories/InstrumentProposalRepository.scala deleted file mode 100644 index 90dac541..00000000 --- a/app/repositories/InstrumentProposalRepository.scala +++ /dev/null @@ -1,87 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.genomics.{InstrumentAssociationProposal, ProposalStatus} -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -trait InstrumentProposalRepository { - def create(proposal: InstrumentAssociationProposal): Future[InstrumentAssociationProposal] - def findById(id: Int): Future[Option[InstrumentAssociationProposal]] - def findActiveByInstrumentId(instrumentId: String): Future[Option[InstrumentAssociationProposal]] - def findByStatus(status: ProposalStatus): Future[Seq[InstrumentAssociationProposal]] - def findPending(): Future[Seq[InstrumentAssociationProposal]] - def update(proposal: InstrumentAssociationProposal): Future[Boolean] - def updateStatus(id: Int, status: ProposalStatus): Future[Boolean] -} - -@Singleton -class InstrumentProposalRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with InstrumentProposalRepository { - - import models.dal.MyPostgresProfile.api.* - - implicit private val statusMapper: BaseColumnType[ProposalStatus] = - MappedColumnType.base[ProposalStatus, String](_.dbValue, ProposalStatus.fromString) - - private val proposals = DatabaseSchema.domain.genomics.instrumentAssociationProposals - - override def create(proposal: InstrumentAssociationProposal): Future[InstrumentAssociationProposal] = { - db.run( - (proposals returning proposals.map(_.id) - into ((p, id) => p.copy(id = Some(id)))) += proposal - ) - } - - override def findById(id: Int): Future[Option[InstrumentAssociationProposal]] = { - db.run(proposals.filter(_.id === id).result.headOption) - } - - override def findActiveByInstrumentId(instrumentId: String): Future[Option[InstrumentAssociationProposal]] = { - val activeStatuses = Seq(ProposalStatus.Pending, ProposalStatus.ReadyForReview, ProposalStatus.UnderReview) - db.run( - proposals - .filter(p => p.instrumentId === instrumentId && p.status.inSet(activeStatuses)) - .result.headOption - ) - } - - override def findByStatus(status: ProposalStatus): Future[Seq[InstrumentAssociationProposal]] = { - db.run(proposals.filter(_.status === status).sortBy(_.updatedAt.desc).result) - } - - override def findPending(): Future[Seq[InstrumentAssociationProposal]] = { - val activeStatuses = Seq(ProposalStatus.Pending, ProposalStatus.ReadyForReview) - db.run(proposals.filter(_.status.inSet(activeStatuses)).sortBy(_.updatedAt.desc).result) - } - - override def update(proposal: InstrumentAssociationProposal): Future[Boolean] = { - db.run( - proposals.filter(_.id === proposal.id) - .map(p => (p.proposedLabName, p.proposedManufacturer, p.proposedModel, - p.observationCount, p.distinctCitizenCount, p.confidenceScore, - p.earliestObservation, p.latestObservation, p.status, - p.reviewedAt, p.reviewedBy, p.reviewNotes, - p.acceptedLabId, p.acceptedInstrumentId, p.updatedAt)) - .update((proposal.proposedLabName, proposal.proposedManufacturer, proposal.proposedModel, - proposal.observationCount, proposal.distinctCitizenCount, proposal.confidenceScore, - proposal.earliestObservation, proposal.latestObservation, proposal.status, - proposal.reviewedAt, proposal.reviewedBy, proposal.reviewNotes, - proposal.acceptedLabId, proposal.acceptedInstrumentId, LocalDateTime.now())) - ).map(_ > 0) - } - - override def updateStatus(id: Int, status: ProposalStatus): Future[Boolean] = { - db.run( - proposals.filter(_.id === id) - .map(p => (p.status, p.updatedAt)) - .update((status, LocalDateTime.now())) - ).map(_ > 0) - } -} diff --git a/app/repositories/MatchConsentTrackingRepository.scala b/app/repositories/MatchConsentTrackingRepository.scala deleted file mode 100644 index 6a59d06e..00000000 --- a/app/repositories/MatchConsentTrackingRepository.scala +++ /dev/null @@ -1,78 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.ibd.MatchConsentTracking -import play.api.Logging -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.ZonedDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait MatchConsentTrackingRepository { - def create(consent: MatchConsentTracking): Future[MatchConsentTracking] - def findByAtUri(atUri: String): Future[Option[MatchConsentTracking]] - def findBySampleGuid(sampleGuid: UUID): Future[Option[MatchConsentTracking]] - def findByDid(did: String): Future[Seq[MatchConsentTracking]] - def findActiveConsentForSample(sampleGuid: UUID): Future[Option[MatchConsentTracking]] - def revoke(atUri: String): Future[Boolean] - def upsertFromFirehose(consent: MatchConsentTracking): Future[MatchConsentTracking] - def deleteByAtUri(atUri: String): Future[Boolean] -} - -@Singleton -class MatchConsentTrackingRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends HasDatabaseConfigProvider[JdbcProfile] - with MatchConsentTrackingRepository - with Logging { - - import profile.api.* - import models.dal.MyPostgresProfile.api.playJsonTypeMapper - - private val consents = DatabaseSchema.domain.ibd.matchConsentTracking - - override def create(consent: MatchConsentTracking): Future[MatchConsentTracking] = - db.run((consents returning consents.map(_.id) into ((c, id) => c.copy(id = Some(id)))) += consent) - - override def findByAtUri(atUri: String): Future[Option[MatchConsentTracking]] = - db.run(consents.filter(_.atUri === atUri).result.headOption) - - override def findBySampleGuid(sampleGuid: UUID): Future[Option[MatchConsentTracking]] = - db.run(consents.filter(_.sampleGuid === sampleGuid).result.headOption) - - override def findByDid(did: String): Future[Seq[MatchConsentTracking]] = - db.run(consents.filter(_.consentingDid === did).result) - - override def findActiveConsentForSample(sampleGuid: UUID): Future[Option[MatchConsentTracking]] = - db.run( - consents.filter(c => c.sampleGuid === sampleGuid && c.revokedAt.isEmpty) - .result.headOption - ) - - override def revoke(atUri: String): Future[Boolean] = - db.run( - consents.filter(c => c.atUri === atUri && c.revokedAt.isEmpty) - .map(_.revokedAt) - .update(Some(ZonedDateTime.now())) - ).map(_ > 0) - - override def upsertFromFirehose(consent: MatchConsentTracking): Future[MatchConsentTracking] = { - findByAtUri(consent.atUri).flatMap { - case Some(existing) => - db.run( - consents.filter(_.atUri === consent.atUri) - .map(c => (c.consentLevel, c.allowedMatchTypes, c.shareContactInfo, c.expiresAt)) - .update((consent.consentLevel, consent.allowedMatchTypes, consent.shareContactInfo, consent.expiresAt)) - ).map(_ => consent.copy(id = existing.id)) - case None => - create(consent) - } - } - - override def deleteByAtUri(atUri: String): Future[Boolean] = - db.run(consents.filter(_.atUri === atUri).delete).map(_ > 0) -} diff --git a/app/repositories/MatchRequestTrackingRepository.scala b/app/repositories/MatchRequestTrackingRepository.scala deleted file mode 100644 index af26c3f8..00000000 --- a/app/repositories/MatchRequestTrackingRepository.scala +++ /dev/null @@ -1,70 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.ibd.MatchRequestTracking -import play.api.Logging -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.ZonedDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait MatchRequestTrackingRepository { - def create(request: MatchRequestTracking): Future[MatchRequestTracking] - def findByAtUri(atUri: String): Future[Option[MatchRequestTracking]] - def findPendingForSample(sampleGuid: UUID): Future[Seq[MatchRequestTracking]] - def findSentByDid(did: String): Future[Seq[MatchRequestTracking]] - def updateStatus(atUri: String, status: String): Future[Boolean] - def upsertFromFirehose(request: MatchRequestTracking): Future[MatchRequestTracking] -} - -@Singleton -class MatchRequestTrackingRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends HasDatabaseConfigProvider[JdbcProfile] - with MatchRequestTrackingRepository - with Logging { - - import profile.api.* - import models.dal.MyPostgresProfile.api.playJsonTypeMapper - - private val requests = DatabaseSchema.domain.ibd.matchRequestTracking - - override def create(request: MatchRequestTracking): Future[MatchRequestTracking] = - db.run((requests returning requests.map(_.id) into ((r, id) => r.copy(id = Some(id)))) += request) - - override def findByAtUri(atUri: String): Future[Option[MatchRequestTracking]] = - db.run(requests.filter(_.atUri === atUri).result.headOption) - - override def findPendingForSample(sampleGuid: UUID): Future[Seq[MatchRequestTracking]] = - db.run( - requests.filter(r => r.toSampleGuid === sampleGuid && r.status === "PENDING") - .sortBy(_.createdAt.desc).result - ) - - override def findSentByDid(did: String): Future[Seq[MatchRequestTracking]] = - db.run(requests.filter(_.requesterDid === did).sortBy(_.createdAt.desc).result) - - override def updateStatus(atUri: String, status: String): Future[Boolean] = - db.run( - requests.filter(_.atUri === atUri) - .map(r => (r.status, r.updatedAt)) - .update((status, ZonedDateTime.now())) - ).map(_ > 0) - - override def upsertFromFirehose(request: MatchRequestTracking): Future[MatchRequestTracking] = { - findByAtUri(request.atUri).flatMap { - case Some(existing) => - db.run( - requests.filter(_.atUri === request.atUri) - .map(r => (r.status, r.message, r.updatedAt, r.expiresAt)) - .update((request.status, request.message, ZonedDateTime.now(), request.expiresAt)) - ).map(_ => request.copy(id = existing.id)) - case None => - create(request) - } - } -} diff --git a/app/repositories/MatchSuggestionRepository.scala b/app/repositories/MatchSuggestionRepository.scala deleted file mode 100644 index 50e21e44..00000000 --- a/app/repositories/MatchSuggestionRepository.scala +++ /dev/null @@ -1,72 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.ibd.MatchSuggestion -import play.api.Logging -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.ZonedDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait MatchSuggestionRepository { - def create(suggestion: MatchSuggestion): Future[MatchSuggestion] - def createBatch(suggestions: Seq[MatchSuggestion]): Future[Seq[MatchSuggestion]] - def findByTargetSample(sampleGuid: UUID, suggestionType: Option[String], limit: Int): Future[Seq[MatchSuggestion]] - def findById(id: Long): Future[Option[MatchSuggestion]] - def dismiss(id: Long): Future[Boolean] - def expireOld(now: ZonedDateTime): Future[Int] - def countByTargetSample(sampleGuid: UUID): Future[Int] -} - -@Singleton -class MatchSuggestionRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends HasDatabaseConfigProvider[JdbcProfile] - with MatchSuggestionRepository - with Logging { - - import profile.api.* - import models.dal.MyPostgresProfile.api.playJsonTypeMapper - - private val suggestions = DatabaseSchema.domain.ibd.matchSuggestions - - override def create(suggestion: MatchSuggestion): Future[MatchSuggestion] = - db.run((suggestions returning suggestions.map(_.id) into ((s, id) => s.copy(id = Some(id)))) += suggestion) - - override def createBatch(batch: Seq[MatchSuggestion]): Future[Seq[MatchSuggestion]] = - db.run((suggestions returning suggestions.map(_.id) into ((s, id) => s.copy(id = Some(id)))) ++= batch).map(_.toSeq) - - override def findByTargetSample(sampleGuid: UUID, suggestionType: Option[String], limit: Int): Future[Seq[MatchSuggestion]] = { - val query = suggestions - .filter(s => s.targetSampleGuid === sampleGuid && s.status === "ACTIVE") - val filtered = suggestionType match { - case Some(t) => query.filter(_.suggestionType === t) - case None => query - } - db.run(filtered.sortBy(_.score.desc).take(limit).result) - } - - override def findById(id: Long): Future[Option[MatchSuggestion]] = - db.run(suggestions.filter(_.id === id).result.headOption) - - override def dismiss(id: Long): Future[Boolean] = - db.run( - suggestions.filter(s => s.id === id && s.status === "ACTIVE") - .map(_.status) - .update("DISMISSED") - ).map(_ > 0) - - override def expireOld(now: ZonedDateTime): Future[Int] = - db.run( - suggestions.filter(s => s.status === "ACTIVE" && s.expiresAt <= now) - .map(_.status) - .update("EXPIRED") - ) - - override def countByTargetSample(sampleGuid: UUID): Future[Int] = - db.run(suggestions.filter(s => s.targetSampleGuid === sampleGuid && s.status === "ACTIVE").length.result) -} diff --git a/app/repositories/PDSRegistrationRepository.scala b/app/repositories/PDSRegistrationRepository.scala deleted file mode 100644 index 5312af21..00000000 --- a/app/repositories/PDSRegistrationRepository.scala +++ /dev/null @@ -1,45 +0,0 @@ -package repositories - -import models.PDSRegistration -import models.dal.MetadataSchema.pdsRegistrations -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import play.db.NamedDatabase -import slick.jdbc.JdbcProfile - -import java.time.ZonedDateTime -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class PDSRegistrationRepository @Inject()( - @NamedDatabase("metadata") protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[JdbcProfile] { - - import profile.api.* - - def create(pdsRegistration: PDSRegistration): Future[PDSRegistration] = db.run { - pdsRegistrations += pdsRegistration - }.map(_ => pdsRegistration) - - def findByDid(did: String): Future[Option[PDSRegistration]] = db.run { - pdsRegistrations.filter(_.did === did).result.headOption - } - - def findByHandle(handle: String): Future[Option[PDSRegistration]] = db.run { - pdsRegistrations.filter(_.handle === handle).result.headOption - } - - def updateCursor(did: String, lastCommitCid: String, newCursor: Long): Future[Int] = db.run { - pdsRegistrations.filter(_.did === did) - .map(reg => (reg.lastCommitCid, reg.cursor, reg.updatedAt)) - .update((Some(lastCommitCid), newCursor, ZonedDateTime.now())) - } - - def listAll: Future[Seq[PDSRegistration]] = db.run { - pdsRegistrations.result - } - - def delete(did: String): Future[Int] = db.run { - pdsRegistrations.filter(_.did === did).delete - } -} diff --git a/app/repositories/PatronSubscriptionRepository.scala b/app/repositories/PatronSubscriptionRepository.scala deleted file mode 100644 index 5547dffd..00000000 --- a/app/repositories/PatronSubscriptionRepository.scala +++ /dev/null @@ -1,95 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.DatabaseSchema -import models.domain.billing.PatronSubscription -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait PatronSubscriptionRepository { - def create(subscription: PatronSubscription): Future[PatronSubscription] - def findById(id: Int): Future[Option[PatronSubscription]] - def findByUserId(userId: UUID): Future[Seq[PatronSubscription]] - def findActiveByUserId(userId: UUID): Future[Option[PatronSubscription]] - def findByProviderSubscriptionId(provider: String, providerSubId: String): Future[Option[PatronSubscription]] - def findByStatus(status: String): Future[Seq[PatronSubscription]] - def updateStatus(id: Int, status: String): Future[Boolean] - def updatePeriod(id: Int, periodStart: LocalDateTime, periodEnd: LocalDateTime): Future[Boolean] - def cancel(id: Int): Future[Boolean] - def countByTier(): Future[Map[String, Int]] - def countActive(): Future[Int] -} - -class PatronSubscriptionRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with PatronSubscriptionRepository - with Logging { - - import models.dal.MyPostgresProfile.api.* - - private val subscriptions = DatabaseSchema.billing.patronSubscriptions - - override def create(subscription: PatronSubscription): Future[PatronSubscription] = - runQuery( - (subscriptions returning subscriptions.map(_.id) into ((s, id) => s.copy(id = Some(id)))) += subscription - ) - - override def findById(id: Int): Future[Option[PatronSubscription]] = - runQuery(subscriptions.filter(_.id === id).result.headOption) - - override def findByUserId(userId: UUID): Future[Seq[PatronSubscription]] = - runQuery(subscriptions.filter(_.userId === userId).sortBy(_.createdAt.desc).result) - - override def findActiveByUserId(userId: UUID): Future[Option[PatronSubscription]] = - runQuery( - subscriptions.filter(s => s.userId === userId && s.status === "ACTIVE") - .sortBy(_.createdAt.desc).result.headOption - ) - - override def findByProviderSubscriptionId(provider: String, providerSubId: String): Future[Option[PatronSubscription]] = - runQuery( - subscriptions.filter(s => - s.paymentProvider === provider && s.providerSubscriptionId === providerSubId - ).result.headOption - ) - - override def findByStatus(status: String): Future[Seq[PatronSubscription]] = - runQuery(subscriptions.filter(_.status === status).sortBy(_.createdAt.desc).result) - - override def updateStatus(id: Int, status: String): Future[Boolean] = - runQuery( - subscriptions.filter(_.id === id) - .map(s => (s.status, s.updatedAt)) - .update((status, LocalDateTime.now())) - ).map(_ > 0) - - override def updatePeriod(id: Int, periodStart: LocalDateTime, periodEnd: LocalDateTime): Future[Boolean] = - runQuery( - subscriptions.filter(_.id === id) - .map(s => (s.currentPeriodStart, s.currentPeriodEnd, s.updatedAt)) - .update((Some(periodStart), Some(periodEnd), LocalDateTime.now())) - ).map(_ > 0) - - override def cancel(id: Int): Future[Boolean] = - runQuery( - subscriptions.filter(_.id === id) - .map(s => (s.status, s.cancelledAt, s.updatedAt)) - .update(("CANCELLED", Some(LocalDateTime.now()), LocalDateTime.now())) - ).map(_ > 0) - - override def countByTier(): Future[Map[String, Int]] = - runQuery( - subscriptions.filter(_.status === "ACTIVE") - .groupBy(_.patronTier).map { case (tier, group) => (tier, group.length) } - .result - ).map(_.toMap) - - override def countActive(): Future[Int] = - runQuery(subscriptions.filter(_.status === "ACTIVE").length.result) -} diff --git a/app/repositories/PdsNodeRepository.scala b/app/repositories/PdsNodeRepository.scala deleted file mode 100644 index 466b1669..00000000 --- a/app/repositories/PdsNodeRepository.scala +++ /dev/null @@ -1,169 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.MetadataSchema -import models.domain.pds.{PdsFleetConfig, PdsHeartbeatLog, PdsNode} -import play.api.Logging -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import play.db.NamedDatabase -import slick.jdbc.JdbcProfile - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -trait PdsNodeRepository { - def create(node: PdsNode): Future[PdsNode] - def findById(id: Int): Future[Option[PdsNode]] - def findByDid(did: String): Future[Option[PdsNode]] - def findByStatus(status: String): Future[Seq[PdsNode]] - def findAll(): Future[Seq[PdsNode]] - def update(node: PdsNode): Future[Boolean] - def updateStatus(id: Int, status: String): Future[Boolean] - def updateHeartbeat(id: Int, status: String, softwareVersion: Option[String], - lastCommitCid: Option[String], lastCommitRev: Option[String]): Future[Boolean] - def countByStatus(): Future[Map[String, Int]] - def findStaleNodes(threshold: LocalDateTime): Future[Seq[PdsNode]] - def delete(id: Int): Future[Boolean] -} - -@Singleton -class PdsNodeRepositoryImpl @Inject()( - @NamedDatabase("metadata") protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends HasDatabaseConfigProvider[JdbcProfile] - with PdsNodeRepository - with Logging { - - import profile.api.* - import models.dal.MyPostgresProfile.api.playJsonTypeMapper - - private val nodes = MetadataSchema.pdsNodes - - override def create(node: PdsNode): Future[PdsNode] = - db.run((nodes returning nodes.map(_.id) into ((n, id) => n.copy(id = Some(id)))) += node) - - override def findById(id: Int): Future[Option[PdsNode]] = - db.run(nodes.filter(_.id === id).result.headOption) - - override def findByDid(did: String): Future[Option[PdsNode]] = - db.run(nodes.filter(_.did === did).result.headOption) - - override def findByStatus(status: String): Future[Seq[PdsNode]] = - db.run(nodes.filter(_.status === status).result) - - override def findAll(): Future[Seq[PdsNode]] = - db.run(nodes.sortBy(_.did).result) - - override def update(node: PdsNode): Future[Boolean] = node.id match { - case None => Future.successful(false) - case Some(id) => - val now = LocalDateTime.now() - val q1 = nodes.filter(_.id === id) - .map(n => (n.pdsUrl, n.handle, n.nodeName, n.softwareVersion, n.status)) - .update((node.pdsUrl, node.handle, node.nodeName, node.softwareVersion, node.status)) - val q2 = nodes.filter(_.id === id) - .map(n => (n.capabilities, n.ipAddress, n.osInfo, n.updatedAt)) - .update((node.capabilities, node.ipAddress, node.osInfo, now)) - db.run(DBIO.seq(q1, q2).transactionally).map(_ => true) - } - - override def updateStatus(id: Int, status: String): Future[Boolean] = - db.run( - nodes.filter(_.id === id) - .map(n => (n.status, n.updatedAt)) - .update((status, LocalDateTime.now())) - ).map(_ > 0) - - override def updateHeartbeat(id: Int, status: String, softwareVersion: Option[String], - lastCommitCid: Option[String], lastCommitRev: Option[String]): Future[Boolean] = - db.run( - nodes.filter(_.id === id) - .map(n => (n.status, n.softwareVersion, n.lastHeartbeat, n.lastCommitCid, n.lastCommitRev, n.updatedAt)) - .update((status, softwareVersion, Some(LocalDateTime.now()), lastCommitCid, lastCommitRev, LocalDateTime.now())) - ).map(_ > 0) - - override def countByStatus(): Future[Map[String, Int]] = - db.run(nodes.groupBy(_.status).map { case (status, group) => (status, group.length) }.result) - .map(_.toMap) - - override def findStaleNodes(threshold: LocalDateTime): Future[Seq[PdsNode]] = - db.run( - nodes.filter(n => - n.status =!= "OFFLINE" && (n.lastHeartbeat.isEmpty || n.lastHeartbeat < threshold) - ).result - ) - - override def delete(id: Int): Future[Boolean] = - db.run(nodes.filter(_.id === id).delete).map(_ > 0) -} - -trait PdsHeartbeatLogRepository { - def create(log: PdsHeartbeatLog): Future[PdsHeartbeatLog] - def findByNode(nodeId: Int, limit: Int = 100): Future[Seq[PdsHeartbeatLog]] - def findByNodeSince(nodeId: Int, since: LocalDateTime): Future[Seq[PdsHeartbeatLog]] - def deleteOlderThan(cutoff: LocalDateTime): Future[Int] -} - -@Singleton -class PdsHeartbeatLogRepositoryImpl @Inject()( - @NamedDatabase("metadata") protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends HasDatabaseConfigProvider[JdbcProfile] - with PdsHeartbeatLogRepository - with Logging { - - import profile.api.* - - private val logs = MetadataSchema.pdsHeartbeatLogs - - override def create(log: PdsHeartbeatLog): Future[PdsHeartbeatLog] = - db.run((logs returning logs.map(_.id) into ((l, id) => l.copy(id = Some(id)))) += log) - - override def findByNode(nodeId: Int, limit: Int): Future[Seq[PdsHeartbeatLog]] = - db.run(logs.filter(_.pdsNodeId === nodeId).sortBy(_.recordedAt.desc).take(limit).result) - - override def findByNodeSince(nodeId: Int, since: LocalDateTime): Future[Seq[PdsHeartbeatLog]] = - db.run(logs.filter(l => l.pdsNodeId === nodeId && l.recordedAt >= since).sortBy(_.recordedAt.desc).result) - - override def deleteOlderThan(cutoff: LocalDateTime): Future[Int] = - db.run(logs.filter(_.recordedAt < cutoff).delete) -} - -trait PdsFleetConfigRepository { - def findByKey(key: String): Future[Option[PdsFleetConfig]] - def findAll(): Future[Seq[PdsFleetConfig]] - def upsert(key: String, value: String, updatedBy: Option[String] = None): Future[Boolean] -} - -@Singleton -class PdsFleetConfigRepositoryImpl @Inject()( - @NamedDatabase("metadata") protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends HasDatabaseConfigProvider[JdbcProfile] - with PdsFleetConfigRepository - with Logging { - - import profile.api.* - - private val configs = MetadataSchema.pdsFleetConfigs - - override def findByKey(key: String): Future[Option[PdsFleetConfig]] = - db.run(configs.filter(_.configKey === key).result.headOption) - - override def findAll(): Future[Seq[PdsFleetConfig]] = - db.run(configs.sortBy(_.configKey).result) - - override def upsert(key: String, value: String, updatedBy: Option[String]): Future[Boolean] = - findByKey(key).flatMap { - case Some(existing) => - db.run( - configs.filter(_.configKey === key) - .map(c => (c.configValue, c.updatedBy, c.updatedAt)) - .update((value, updatedBy, LocalDateTime.now())) - ).map(_ > 0) - case None => - db.run( - configs += PdsFleetConfig(configKey = key, configValue = value, updatedBy = updatedBy) - ).map(_ > 0) - } -} diff --git a/app/repositories/PdsSubmissionRepository.scala b/app/repositories/PdsSubmissionRepository.scala deleted file mode 100644 index 8f2b7e79..00000000 --- a/app/repositories/PdsSubmissionRepository.scala +++ /dev/null @@ -1,86 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.MetadataSchema -import models.domain.pds.PdsSubmission -import play.api.Logging -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import play.db.NamedDatabase -import slick.jdbc.JdbcProfile - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait PdsSubmissionRepository { - def create(submission: PdsSubmission): Future[PdsSubmission] - def findById(id: Int): Future[Option[PdsSubmission]] - def findByNode(nodeId: Int, limit: Int = 100): Future[Seq[PdsSubmission]] - def findByNodeAndType(nodeId: Int, submissionType: String): Future[Seq[PdsSubmission]] - def findByBiosampleId(biosampleId: Int): Future[Seq[PdsSubmission]] - def findByBiosampleGuid(guid: UUID): Future[Seq[PdsSubmission]] - def findByStatus(status: String, limit: Int = 100): Future[Seq[PdsSubmission]] - def findByTypeAndStatus(submissionType: String, status: String, limit: Int = 100): Future[Seq[PdsSubmission]] - def updateStatus(id: Int, status: String, reviewedBy: Option[String], reviewNotes: Option[String]): Future[Boolean] - def countByNodeAndStatus(nodeId: Int): Future[Map[String, Int]] - def countByStatus(): Future[Map[String, Int]] -} - -@Singleton -class PdsSubmissionRepositoryImpl @Inject()( - @NamedDatabase("metadata") protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends HasDatabaseConfigProvider[JdbcProfile] - with PdsSubmissionRepository - with Logging { - - import profile.api.* - import models.dal.MyPostgresProfile.api.playJsonTypeMapper - - private val submissions = MetadataSchema.pdsSubmissions - - override def create(submission: PdsSubmission): Future[PdsSubmission] = - db.run((submissions returning submissions.map(_.id) into ((s, id) => s.copy(id = Some(id)))) += submission) - - override def findById(id: Int): Future[Option[PdsSubmission]] = - db.run(submissions.filter(_.id === id).result.headOption) - - override def findByNode(nodeId: Int, limit: Int): Future[Seq[PdsSubmission]] = - db.run(submissions.filter(_.pdsNodeId === nodeId).sortBy(_.createdAt.desc).take(limit).result) - - override def findByNodeAndType(nodeId: Int, submissionType: String): Future[Seq[PdsSubmission]] = - db.run(submissions.filter(s => s.pdsNodeId === nodeId && s.submissionType === submissionType) - .sortBy(_.createdAt.desc).result) - - override def findByBiosampleId(biosampleId: Int): Future[Seq[PdsSubmission]] = - db.run(submissions.filter(_.biosampleId === biosampleId).sortBy(_.createdAt.desc).result) - - override def findByBiosampleGuid(guid: UUID): Future[Seq[PdsSubmission]] = - db.run(submissions.filter(_.biosampleGuid === guid).sortBy(_.createdAt.desc).result) - - override def findByStatus(status: String, limit: Int): Future[Seq[PdsSubmission]] = - db.run(submissions.filter(_.status === status).sortBy(_.createdAt.desc).take(limit).result) - - override def findByTypeAndStatus(submissionType: String, status: String, limit: Int): Future[Seq[PdsSubmission]] = - db.run(submissions.filter(s => s.submissionType === submissionType && s.status === status) - .sortBy(_.createdAt.desc).take(limit).result) - - override def updateStatus(id: Int, status: String, reviewedBy: Option[String], reviewNotes: Option[String]): Future[Boolean] = - db.run( - submissions.filter(_.id === id) - .map(s => (s.status, s.reviewedBy, s.reviewedAt, s.reviewNotes)) - .update((status, reviewedBy, Some(LocalDateTime.now()), reviewNotes)) - ).map(_ > 0) - - override def countByNodeAndStatus(nodeId: Int): Future[Map[String, Int]] = - db.run( - submissions.filter(_.pdsNodeId === nodeId) - .groupBy(_.status).map { case (status, group) => (status, group.length) } - .result - ).map(_.toMap) - - override def countByStatus(): Future[Map[String, Int]] = - db.run( - submissions.groupBy(_.status).map { case (status, group) => (status, group.length) }.result - ).map(_.toMap) -} diff --git a/app/repositories/PopulationBreakdownCacheRepository.scala b/app/repositories/PopulationBreakdownCacheRepository.scala deleted file mode 100644 index f9c0e971..00000000 --- a/app/repositories/PopulationBreakdownCacheRepository.scala +++ /dev/null @@ -1,61 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.ibd.PopulationBreakdownCache -import play.api.Logging -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.ZonedDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait PopulationBreakdownCacheRepository { - def upsert(cache: PopulationBreakdownCache): Future[PopulationBreakdownCache] - def findBySampleGuid(sampleGuid: UUID): Future[Option[PopulationBreakdownCache]] - def findAll(): Future[Seq[PopulationBreakdownCache]] - def findAllSampleGuids(): Future[Seq[UUID]] - def deleteBySampleGuid(sampleGuid: UUID): Future[Boolean] -} - -@Singleton -class PopulationBreakdownCacheRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends HasDatabaseConfigProvider[JdbcProfile] - with PopulationBreakdownCacheRepository - with Logging { - - import profile.api.* - import models.dal.MyPostgresProfile.api.playJsonTypeMapper - - private val cache = DatabaseSchema.domain.ibd.populationBreakdownCache - - override def upsert(entry: PopulationBreakdownCache): Future[PopulationBreakdownCache] = { - findBySampleGuid(entry.sampleGuid).flatMap { - case Some(existing) if existing.breakdownHash == entry.breakdownHash => - Future.successful(existing) - case Some(existing) => - db.run( - cache.filter(_.sampleGuid === entry.sampleGuid) - .map(c => (c.breakdown, c.breakdownHash, c.cachedAt, c.sourceAtUri)) - .update((entry.breakdown, entry.breakdownHash, ZonedDateTime.now(), entry.sourceAtUri)) - ).map(_ => entry.copy(id = existing.id)) - case None => - db.run((cache returning cache.map(_.id) into ((c, id) => c.copy(id = Some(id)))) += entry) - } - } - - override def findBySampleGuid(sampleGuid: UUID): Future[Option[PopulationBreakdownCache]] = - db.run(cache.filter(_.sampleGuid === sampleGuid).result.headOption) - - override def findAll(): Future[Seq[PopulationBreakdownCache]] = - db.run(cache.result) - - override def findAllSampleGuids(): Future[Seq[UUID]] = - db.run(cache.map(_.sampleGuid).result) - - override def deleteBySampleGuid(sampleGuid: UUID): Future[Boolean] = - db.run(cache.filter(_.sampleGuid === sampleGuid).delete).map(_ > 0) -} diff --git a/app/repositories/PopulationBreakdownRepository.scala b/app/repositories/PopulationBreakdownRepository.scala deleted file mode 100644 index d1c5a7b9..00000000 --- a/app/repositories/PopulationBreakdownRepository.scala +++ /dev/null @@ -1,26 +0,0 @@ -package repositories - -import models.domain.genomics.{PopulationBreakdown, PopulationComponent, SuperPopulationSummary} - -import java.util.UUID -import scala.concurrent.Future - -trait PopulationBreakdownRepository { - def findById(id: Int): Future[Option[PopulationBreakdown]] - def findByAtUri(atUri: String): Future[Option[PopulationBreakdown]] - def findBySampleGuid(sampleGuid: UUID): Future[Option[PopulationBreakdown]] - def create(breakdown: PopulationBreakdown): Future[PopulationBreakdown] - def upsertByAtUri(breakdown: PopulationBreakdown): Future[PopulationBreakdown] - def update(breakdown: PopulationBreakdown): Future[Boolean] - def softDelete(id: Int): Future[Boolean] - - // Population Components - def findComponentsByBreakdownId(breakdownId: Int): Future[Seq[PopulationComponent]] - def createComponent(component: PopulationComponent): Future[PopulationComponent] - def upsertComponentsByBreakdownId(breakdownId: Int, components: Seq[PopulationComponent]): Future[Seq[PopulationComponent]] - - // Super Population Summaries - def findSummariesByBreakdownId(breakdownId: Int): Future[Seq[SuperPopulationSummary]] - def createSummary(summary: SuperPopulationSummary): Future[SuperPopulationSummary] - def upsertSummariesByBreakdownId(breakdownId: Int, summaries: Seq[SuperPopulationSummary]): Future[Seq[SuperPopulationSummary]] -} diff --git a/app/repositories/PopulationBreakdownRepositoryImpl.scala b/app/repositories/PopulationBreakdownRepositoryImpl.scala deleted file mode 100644 index 775033e2..00000000 --- a/app/repositories/PopulationBreakdownRepositoryImpl.scala +++ /dev/null @@ -1,118 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{PopulationBreakdown, PopulationComponent, SuperPopulationSummary} -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class PopulationBreakdownRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends PopulationBreakdownRepository with HasDatabaseConfigProvider[JdbcProfile] { - - private val breakdowns = DatabaseSchema.domain.genomics.populationBreakdowns - private val components = DatabaseSchema.domain.genomics.populationComponents - private val summaries = DatabaseSchema.domain.genomics.superPopulationSummaries - - override def findById(id: Int): Future[Option[PopulationBreakdown]] = { - db.run(breakdowns.filter(b => b.id === id && !b.deleted).result.headOption) - } - - override def findByAtUri(atUri: String): Future[Option[PopulationBreakdown]] = { - db.run(breakdowns.filter(b => b.atUri === atUri && !b.deleted).result.headOption) - } - - override def findBySampleGuid(sampleGuid: UUID): Future[Option[PopulationBreakdown]] = { - db.run(breakdowns.filter(b => b.sampleGuid === sampleGuid && !b.deleted).result.headOption) - } - - override def create(breakdown: PopulationBreakdown): Future[PopulationBreakdown] = { - db.run( - (breakdowns returning breakdowns.map(_.id) - into ((b, id) => b.copy(id = Some(id)))) += breakdown - ) - } - - override def upsertByAtUri(breakdown: PopulationBreakdown): Future[PopulationBreakdown] = { - breakdown.atUri match { - case None => create(breakdown) - case Some(uri) => - findByAtUri(uri).flatMap { - case Some(existing) => - val updated = breakdown.copy( - id = existing.id, - createdAt = existing.createdAt, - updatedAt = LocalDateTime.now() - ) - update(updated).map(_ => updated) - case None => create(breakdown) - } - } - } - - override def update(breakdown: PopulationBreakdown): Future[Boolean] = { - breakdown.id match { - case None => Future.successful(false) - case Some(id) => - val updated = breakdown.copy(updatedAt = LocalDateTime.now()) - db.run(breakdowns.filter(_.id === id).update(updated)).map(_ > 0) - } - } - - override def softDelete(id: Int): Future[Boolean] = { - db.run( - breakdowns.filter(_.id === id) - .map(b => (b.deleted, b.updatedAt)) - .update((true, LocalDateTime.now())) - ).map(_ > 0) - } - - // Population Components - override def findComponentsByBreakdownId(breakdownId: Int): Future[Seq[PopulationComponent]] = { - db.run(components.filter(_.populationBreakdownId === breakdownId).result) - } - - override def createComponent(component: PopulationComponent): Future[PopulationComponent] = { - db.run( - (components returning components.map(_.id) - into ((c, id) => c.copy(id = Some(id)))) += component - ) - } - - override def upsertComponentsByBreakdownId(breakdownId: Int, newComponents: Seq[PopulationComponent]): Future[Seq[PopulationComponent]] = { - val action = for { - _ <- components.filter(_.populationBreakdownId === breakdownId).delete - result <- (components returning components.map(_.id) - into ((c, id) => c.copy(id = Some(id)))) ++= newComponents.map(_.copy(populationBreakdownId = breakdownId)) - } yield result - db.run(action.transactionally) - } - - // Super Population Summaries - override def findSummariesByBreakdownId(breakdownId: Int): Future[Seq[SuperPopulationSummary]] = { - db.run(summaries.filter(_.populationBreakdownId === breakdownId).result) - } - - override def createSummary(summary: SuperPopulationSummary): Future[SuperPopulationSummary] = { - db.run( - (summaries returning summaries.map(_.id) - into ((s, id) => s.copy(id = Some(id)))) += summary - ) - } - - override def upsertSummariesByBreakdownId(breakdownId: Int, newSummaries: Seq[SuperPopulationSummary]): Future[Seq[SuperPopulationSummary]] = { - val action = for { - _ <- summaries.filter(_.populationBreakdownId === breakdownId).delete - result <- (summaries returning summaries.map(_.id) - into ((s, id) => s.copy(id = Some(id)))) ++= newSummaries.map(_.copy(populationBreakdownId = breakdownId)) - } yield result - db.run(action.transactionally) - } -} diff --git a/app/repositories/PopulationOverlapScoreRepository.scala b/app/repositories/PopulationOverlapScoreRepository.scala deleted file mode 100644 index 9a7ed19e..00000000 --- a/app/repositories/PopulationOverlapScoreRepository.scala +++ /dev/null @@ -1,65 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.ibd.PopulationOverlapScore -import play.api.Logging -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.ZonedDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait PopulationOverlapScoreRepository { - def upsert(score: PopulationOverlapScore): Future[PopulationOverlapScore] - def findByPair(guid1: UUID, guid2: UUID): Future[Option[PopulationOverlapScore]] - def findBySample(sampleGuid: UUID, minScore: Double): Future[Seq[PopulationOverlapScore]] - def deleteAll(): Future[Int] -} - -@Singleton -class PopulationOverlapScoreRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends HasDatabaseConfigProvider[JdbcProfile] - with PopulationOverlapScoreRepository - with Logging { - - import profile.api.* - - private val scores = DatabaseSchema.domain.ibd.populationOverlapScores - - private def ordered(g1: UUID, g2: UUID): (UUID, UUID) = - if (g1.compareTo(g2) < 0) (g1, g2) else (g2, g1) - - override def upsert(score: PopulationOverlapScore): Future[PopulationOverlapScore] = { - val (g1, g2) = ordered(score.sampleGuid1, score.sampleGuid2) - val normalized = score.copy(sampleGuid1 = g1, sampleGuid2 = g2) - findByPair(g1, g2).flatMap { - case Some(existing) => - db.run( - scores.filter(_.id === existing.id.get) - .map(s => (s.overlapScore, s.computedAt)) - .update((normalized.overlapScore, ZonedDateTime.now())) - ).map(_ => normalized.copy(id = existing.id)) - case None => - db.run((scores returning scores.map(_.id) into ((s, id) => s.copy(id = Some(id)))) += normalized) - } - } - - override def findByPair(guid1: UUID, guid2: UUID): Future[Option[PopulationOverlapScore]] = { - val (g1, g2) = ordered(guid1, guid2) - db.run(scores.filter(s => s.sampleGuid1 === g1 && s.sampleGuid2 === g2).result.headOption) - } - - override def findBySample(sampleGuid: UUID, minScore: Double): Future[Seq[PopulationOverlapScore]] = - db.run( - scores.filter(s => - (s.sampleGuid1 === sampleGuid || s.sampleGuid2 === sampleGuid) && s.overlapScore >= minScore - ).sortBy(_.overlapScore.desc).result - ) - - override def deleteAll(): Future[Int] = - db.run(scores.delete) -} diff --git a/app/repositories/PrivateVariantRepository.scala b/app/repositories/PrivateVariantRepository.scala deleted file mode 100644 index 41650e85..00000000 --- a/app/repositories/PrivateVariantRepository.scala +++ /dev/null @@ -1,70 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.HaplogroupType -import models.domain.discovery.* -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait PrivateVariantRepository { - def create(pv: BiosamplePrivateVariant): Future[BiosamplePrivateVariant] - def createAll(pvs: Seq[BiosamplePrivateVariant]): Future[Seq[BiosamplePrivateVariant]] - def findBySample(sampleType: BiosampleSourceType, sampleId: Int): Future[Seq[BiosamplePrivateVariant]] - def findBySampleGuid(sampleGuid: UUID): Future[Seq[BiosamplePrivateVariant]] - def findByVariantId(variantId: Int): Future[Seq[BiosamplePrivateVariant]] - def findByTerminalHaplogroup(haplogroupId: Int): Future[Seq[BiosamplePrivateVariant]] - def findActiveByVariantIds(variantIds: Set[Int], haplogroupType: HaplogroupType): Future[Seq[BiosamplePrivateVariant]] - def updateStatus(id: Int, status: PrivateVariantStatus): Future[Boolean] - def countByVariant(variantId: Int, haplogroupType: HaplogroupType): Future[Int] -} - -class PrivateVariantRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with PrivateVariantRepository - with Logging { - - import models.dal.DatabaseSchema.domain.haplogroups.biosamplePrivateVariants - import models.dal.MyPostgresProfile.api.* - - override def create(pv: BiosamplePrivateVariant): Future[BiosamplePrivateVariant] = { - val action = (biosamplePrivateVariants returning biosamplePrivateVariants.map(_.id) - into ((row, id) => row.copy(id = Some(id)))) += pv - runQuery(action) - } - - override def createAll(pvs: Seq[BiosamplePrivateVariant]): Future[Seq[BiosamplePrivateVariant]] = { - val action = (biosamplePrivateVariants returning biosamplePrivateVariants.map(_.id) - into ((row, id) => row.copy(id = Some(id)))) ++= pvs - runQuery(action) - } - - override def findBySample(sampleType: BiosampleSourceType, sampleId: Int): Future[Seq[BiosamplePrivateVariant]] = - runQuery(biosamplePrivateVariants.filter(pv => pv.sampleType === sampleType && pv.sampleId === sampleId).result) - - override def findBySampleGuid(sampleGuid: UUID): Future[Seq[BiosamplePrivateVariant]] = - runQuery(biosamplePrivateVariants.filter(_.sampleGuid === sampleGuid).result) - - override def findByVariantId(variantId: Int): Future[Seq[BiosamplePrivateVariant]] = - runQuery(biosamplePrivateVariants.filter(_.variantId === variantId).result) - - override def findByTerminalHaplogroup(haplogroupId: Int): Future[Seq[BiosamplePrivateVariant]] = - runQuery(biosamplePrivateVariants.filter(_.terminalHaplogroupId === haplogroupId).result) - - override def findActiveByVariantIds(variantIds: Set[Int], haplogroupType: HaplogroupType): Future[Seq[BiosamplePrivateVariant]] = - runQuery(biosamplePrivateVariants - .filter(pv => pv.variantId.inSet(variantIds) && pv.haplogroupType === haplogroupType && pv.status === (PrivateVariantStatus.Active: PrivateVariantStatus)) - .result) - - override def updateStatus(id: Int, status: PrivateVariantStatus): Future[Boolean] = - runQuery(biosamplePrivateVariants.filter(_.id === id).map(_.status).update(status).map(_ > 0)) - - override def countByVariant(variantId: Int, haplogroupType: HaplogroupType): Future[Int] = - runQuery(biosamplePrivateVariants - .filter(pv => pv.variantId === variantId && pv.haplogroupType === haplogroupType && pv.status === (PrivateVariantStatus.Active: PrivateVariantStatus)) - .length.result) -} diff --git a/app/repositories/ProjectRepository.scala b/app/repositories/ProjectRepository.scala deleted file mode 100644 index 8cfc9ee0..00000000 --- a/app/repositories/ProjectRepository.scala +++ /dev/null @@ -1,88 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.MyPostgresProfile.api.* -import models.dal.{DatabaseSchema, MyPostgresProfile} -import models.domain.Project -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait ProjectRepository { - def create(project: Project): Future[Project] - - def findByProjectGuid(projectGuid: UUID): Future[Option[Project]] - - def findByAtUri(atUri: String): Future[Option[Project]] - - def update(project: Project, expectedAtCid: Option[String]): Future[Boolean] - - def softDelete(projectGuid: UUID): Future[Boolean] - - def softDeleteByAtUri(atUri: String): Future[Boolean] -} - -@Singleton -class ProjectRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) extends ProjectRepository with HasDatabaseConfigProvider[MyPostgresProfile] { - - private val projects = DatabaseSchema.domain.project.projects - - override def create(project: Project): Future[Project] = { - val insertQuery = (projects returning projects.map(_.id) - into ((p, id) => p.copy(id = Some(id)))) += project - db.run(insertQuery) - } - - override def findByProjectGuid(projectGuid: UUID): Future[Option[Project]] = { - db.run(projects.filter(p => p.projectGuid === projectGuid && !p.deleted).result.headOption) - } - - override def findByAtUri(atUri: String): Future[Option[Project]] = { - db.run(projects.filter(p => p.atUri === atUri && !p.deleted).result.headOption) - } - - override def update(project: Project, expectedAtCid: Option[String]): Future[Boolean] = { - val query = projects.filter { p => - p.projectGuid === project.projectGuid && - p.atCid === expectedAtCid - } - - val updateAction = query.map(p => ( - p.name, - p.description, - p.ownerDid, - p.atUri, - p.atCid, - p.updatedAt, - p.deleted - )).update(( - project.name, - project.description, - project.ownerDid, - project.atUri, - project.atCid, - LocalDateTime.now(), - project.deleted - )) - - db.run(updateAction.map(_ > 0)) - } - - override def softDelete(projectGuid: UUID): Future[Boolean] = { - val q = projects.filter(_.projectGuid === projectGuid) - .map(p => (p.deleted, p.updatedAt)) - .update((true, LocalDateTime.now())) - db.run(q.map(_ > 0)) - } - - override def softDeleteByAtUri(atUri: String): Future[Boolean] = { - val q = projects.filter(_.atUri === atUri) - .map(p => (p.deleted, p.updatedAt)) - .update((true, LocalDateTime.now())) - db.run(q.map(_ > 0)) - } -} diff --git a/app/repositories/ProposedBranchRepository.scala b/app/repositories/ProposedBranchRepository.scala deleted file mode 100644 index 3c96673e..00000000 --- a/app/repositories/ProposedBranchRepository.scala +++ /dev/null @@ -1,126 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.HaplogroupType -import models.domain.discovery.* -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait ProposedBranchRepository { - def create(pb: ProposedBranch): Future[ProposedBranch] - def findById(id: Int): Future[Option[ProposedBranch]] - def findByParentAndType(parentHaplogroupId: Int, haplogroupType: HaplogroupType): Future[Seq[ProposedBranch]] - def findByStatus(status: ProposedBranchStatus, haplogroupType: Option[HaplogroupType] = None): Future[Seq[ProposedBranch]] - def update(pb: ProposedBranch): Future[Boolean] - def updateStatus(id: Int, status: ProposedBranchStatus): Future[Boolean] - def updateConsensus(id: Int, consensusCount: Int, confidenceScore: Double): Future[Boolean] - - // Variant operations - def addVariant(pbv: ProposedBranchVariant): Future[ProposedBranchVariant] - def getVariants(proposedBranchId: Int): Future[Seq[ProposedBranchVariant]] - def getVariantIds(proposedBranchId: Int): Future[Set[Int]] - def updateVariantEvidence(proposedBranchId: Int, variantId: Int, evidenceCount: Int): Future[Boolean] - - // Evidence operations - def addEvidence(evidence: ProposedBranchEvidence): Future[ProposedBranchEvidence] - def getEvidence(proposedBranchId: Int): Future[Seq[ProposedBranchEvidence]] - def countEvidence(proposedBranchId: Int): Future[Int] - - // Config - def getConfig(haplogroupType: HaplogroupType, configKey: String): Future[Option[String]] -} - -class ProposedBranchRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with ProposedBranchRepository - with Logging { - - import models.dal.DatabaseSchema.domain.haplogroups.{proposedBranches, proposedBranchVariants, proposedBranchEvidence, discoveryConfig} - import models.dal.MyPostgresProfile.api.* - - private def activeBranches = proposedBranches.filter(pb => - pb.status =!= (ProposedBranchStatus.Rejected: ProposedBranchStatus) && - pb.status =!= (ProposedBranchStatus.Promoted: ProposedBranchStatus) - ) - - override def create(pb: ProposedBranch): Future[ProposedBranch] = { - val action = (proposedBranches returning proposedBranches.map(_.id) - into ((row, id) => row.copy(id = Some(id)))) += pb - runQuery(action) - } - - override def findById(id: Int): Future[Option[ProposedBranch]] = - runQuery(proposedBranches.filter(_.id === id).result.headOption) - - override def findByParentAndType(parentHaplogroupId: Int, haplogroupType: HaplogroupType): Future[Seq[ProposedBranch]] = - runQuery(activeBranches - .filter(pb => pb.parentHaplogroupId === parentHaplogroupId && pb.haplogroupType === haplogroupType) - .result) - - override def findByStatus(status: ProposedBranchStatus, haplogroupType: Option[HaplogroupType]): Future[Seq[ProposedBranch]] = { - val base = proposedBranches.filter(_.status === status) - val filtered = haplogroupType.fold(base)(ht => base.filter(_.haplogroupType === ht)) - runQuery(filtered.result) - } - - override def update(pb: ProposedBranch): Future[Boolean] = - runQuery(proposedBranches.filter(_.id === pb.id.get) - .map(r => (r.proposedName, r.status, r.consensusCount, r.confidenceScore, r.updatedAt, r.reviewedAt, r.reviewedBy, r.notes, r.promotedHaplogroupId)) - .update((pb.proposedName, pb.status, pb.consensusCount, pb.confidenceScore, LocalDateTime.now(), pb.reviewedAt, pb.reviewedBy, pb.notes, pb.promotedHaplogroupId)) - .map(_ > 0)) - - override def updateStatus(id: Int, status: ProposedBranchStatus): Future[Boolean] = - runQuery(proposedBranches.filter(_.id === id).map(r => (r.status, r.updatedAt)).update((status, LocalDateTime.now())).map(_ > 0)) - - override def updateConsensus(id: Int, consensusCount: Int, confidenceScore: Double): Future[Boolean] = - runQuery(proposedBranches.filter(_.id === id) - .map(r => (r.consensusCount, r.confidenceScore, r.updatedAt)) - .update((consensusCount, confidenceScore, LocalDateTime.now())) - .map(_ > 0)) - - // Variant operations - override def addVariant(pbv: ProposedBranchVariant): Future[ProposedBranchVariant] = { - val action = (proposedBranchVariants returning proposedBranchVariants.map(_.id) - into ((row, id) => row.copy(id = Some(id)))) += pbv - runQuery(action) - } - - override def getVariants(proposedBranchId: Int): Future[Seq[ProposedBranchVariant]] = - runQuery(proposedBranchVariants.filter(_.proposedBranchId === proposedBranchId).result) - - override def getVariantIds(proposedBranchId: Int): Future[Set[Int]] = - runQuery(proposedBranchVariants.filter(_.proposedBranchId === proposedBranchId).map(_.variantId).result).map(_.toSet) - - override def updateVariantEvidence(proposedBranchId: Int, variantId: Int, evidenceCount: Int): Future[Boolean] = - runQuery(proposedBranchVariants - .filter(pbv => pbv.proposedBranchId === proposedBranchId && pbv.variantId === variantId) - .map(r => (r.evidenceCount, r.lastObservedAt)) - .update((evidenceCount, LocalDateTime.now())) - .map(_ > 0)) - - // Evidence operations - override def addEvidence(evidence: ProposedBranchEvidence): Future[ProposedBranchEvidence] = { - val action = (proposedBranchEvidence returning proposedBranchEvidence.map(_.id) - into ((row, id) => row.copy(id = Some(id)))) += evidence - runQuery(action) - } - - override def getEvidence(proposedBranchId: Int): Future[Seq[ProposedBranchEvidence]] = - runQuery(proposedBranchEvidence.filter(_.proposedBranchId === proposedBranchId).result) - - override def countEvidence(proposedBranchId: Int): Future[Int] = - runQuery(proposedBranchEvidence.filter(_.proposedBranchId === proposedBranchId).length.result) - - // Config - override def getConfig(haplogroupType: HaplogroupType, configKey: String): Future[Option[String]] = - runQuery(discoveryConfig - .filter(c => c.haplogroupType === haplogroupType && c.configKey === configKey) - .map(_.configValue) - .result.headOption) -} diff --git a/app/repositories/PublicationBiosampleRepository.scala b/app/repositories/PublicationBiosampleRepository.scala deleted file mode 100644 index ad82aebb..00000000 --- a/app/repositories/PublicationBiosampleRepository.scala +++ /dev/null @@ -1,93 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.DatabaseSchema -import models.domain.publications.PublicationBiosample -import play.api.Logging -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import scala.concurrent.{ExecutionContext, Future} - -/** - * The `PublicationBiosampleRepository` trait provides an abstraction for operations - * related to biosample data associated with a specific publication. - */ -trait PublicationBiosampleRepository { - /** - * Counts the number of biosamples associated with the specified publication. - * - * @param publicationId The unique identifier of the publication. - * @return A `Future` containing the count of associated biosamples as an `Int`. - */ - def countSamplesForPublication(publicationId: Int): Future[Int] - - /** - * Creates a new association between a publication and a biosample in the system. - * - * @param link The `PublicationBiosample` object representing the association to be created. - * It contains the publication ID and the biosample ID to be linked. - * @return A `Future` containing the created `PublicationBiosample` object with the details of the new association. - */ - def create(link: PublicationBiosample): Future[PublicationBiosample] - - /** - * Retrieves all `PublicationBiosample` entries associated with the specified biosample ID. - * - * @param biosampleId The unique identifier of the biosample for which associated entries are to be retrieved. - * @return A `Future` containing a sequence of `PublicationBiosample` objects associated with the given biosample ID. - */ - def findByBiosampleId(biosampleId: Int): Future[Seq[PublicationBiosample]] - - /** - * Deletes all `PublicationBiosample` entries associated with the specified biosample ID. - * - * @param biosampleId The unique identifier of the biosample for which associated entries are to be deleted. - * @return A `Future` containing the number of deleted rows. - */ - def deleteByBiosampleId(biosampleId: Int): Future[Int] -} - -class PublicationBiosampleRepositoryImpl @Inject()(protected val dbConfigProvider: DatabaseConfigProvider)(implicit ec: ExecutionContext) - extends PublicationBiosampleRepository with HasDatabaseConfigProvider[JdbcProfile] with Logging { - - import profile.api.* - - private val publicationBiosamples = DatabaseSchema.domain.publications.publicationBiosamples - - override def countSamplesForPublication(publicationId: Int): Future[Int] = { - val query = publicationBiosamples.filter(_.publicationId === publicationId).length - db.run(query.result) - } - - override def create(link: PublicationBiosample): Future[PublicationBiosample] = { - logger.info(s"Linking: $link") - - // Create a query to check for existing link - val existingQuery = publicationBiosamples - .filter(pb => - pb.publicationId === link.publicationId && - pb.biosampleId === link.biosampleId - ) - - // Create an upsert action - val upsertAction = existingQuery.result.headOption.flatMap { - case Some(_) => - // Link already exists, no need to update - DBIO.successful(link) - case None => - // Insert new link - publicationBiosamples += link - }.transactionally - - db.run(upsertAction).map(_ => link) - } - - override def findByBiosampleId(biosampleId: Int): Future[Seq[PublicationBiosample]] = { - db.run(publicationBiosamples.filter(_.biosampleId === biosampleId).result) - } - - override def deleteByBiosampleId(biosampleId: Int): Future[Int] = { - db.run(publicationBiosamples.filter(_.biosampleId === biosampleId).delete) - } -} diff --git a/app/repositories/PublicationCandidateRepository.scala b/app/repositories/PublicationCandidateRepository.scala deleted file mode 100644 index 8c3a2457..00000000 --- a/app/repositories/PublicationCandidateRepository.scala +++ /dev/null @@ -1,139 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.dal.domain.publications.PublicationCandidatesTable -import models.domain.publications.PublicationCandidate -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait PublicationCandidateRepository { - def create(candidate: PublicationCandidate): Future[PublicationCandidate] - def findById(id: Int): Future[Option[PublicationCandidate]] - def findByOpenAlexId(id: String): Future[Option[PublicationCandidate]] - def listPending(page: Int, pageSize: Int): Future[(Seq[PublicationCandidate], Long)] - def listByStatus(status: String, page: Int, pageSize: Int): Future[(Seq[PublicationCandidate], Long)] - def updateStatus(id: Int, status: String, reviewedBy: Option[UUID], reason: Option[String]): Future[Boolean] - def bulkUpdateStatus(ids: Seq[Int], status: String, reviewedBy: UUID, reason: Option[String]): Future[Int] - def bulkReject(ids: Seq[Int], reason: String, reviewedBy: UUID): Future[Int] - def saveCandidates(candidates: Seq[PublicationCandidate]): Future[Seq[PublicationCandidate]] - def countByStatus(): Future[Map[String, Int]] - def listReviewed(): Future[Seq[PublicationCandidate]] -} - -@Singleton -class PublicationCandidateRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider -)(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with PublicationCandidateRepository { - - import models.dal.MyPostgresProfile.api.* - - private val candidatesTable = DatabaseSchema.domain.publications.publicationCandidates - - override def create(candidate: PublicationCandidate): Future[PublicationCandidate] = { - db.run((candidatesTable returning candidatesTable.map(_.id) - into ((c, id) => c.copy(id = Some(id)))) += candidate) - } - - override def findById(id: Int): Future[Option[PublicationCandidate]] = { - db.run(candidatesTable.filter(_.id === id).result.headOption) - } - - override def findByOpenAlexId(id: String): Future[Option[PublicationCandidate]] = { - db.run(candidatesTable.filter(_.openAlexId === id).result.headOption) - } - - override def listPending(page: Int, pageSize: Int): Future[(Seq[PublicationCandidate], Long)] = { - val query = candidatesTable.filter(_.status === "pending") - val totalCountQuery = query.length.result - - val pagedQuery = query - .sortBy(_.relevanceScore.desc.nullsLast) - .drop((page - 1) * pageSize) - .take(pageSize) - .result - - for { - total <- db.run(totalCountQuery) - results <- db.run(pagedQuery) - } yield (results, total.toLong) - } - - override def updateStatus(id: Int, status: String, reviewedBy: Option[UUID], reason: Option[String]): Future[Boolean] = { - val updateAction = candidatesTable.filter(_.id === id) - .map(c => (c.status, c.reviewedBy, c.reviewedAt, c.rejectionReason)) - .update((status, reviewedBy, Some(LocalDateTime.now()), reason)) - - db.run(updateAction).map(_ > 0) - } - - override def listByStatus(status: String, page: Int, pageSize: Int): Future[(Seq[PublicationCandidate], Long)] = { - val query = candidatesTable.filter(_.status === status) - val totalCountQuery = query.length.result - - val pagedQuery = query - .sortBy(_.relevanceScore.desc.nullsLast) - .drop((page - 1) * pageSize) - .take(pageSize) - .result - - for { - total <- db.run(totalCountQuery) - results <- db.run(pagedQuery) - } yield (results, total.toLong) - } - - override def bulkUpdateStatus(ids: Seq[Int], status: String, reviewedBy: UUID, reason: Option[String]): Future[Int] = { - if (ids.isEmpty) return Future.successful(0) - val updateAction = candidatesTable.filter(_.id.inSet(ids)) - .map(c => (c.status, c.reviewedBy, c.reviewedAt, c.rejectionReason)) - .update((status, Some(reviewedBy), Some(LocalDateTime.now()), reason)) - db.run(updateAction) - } - - override def bulkReject(ids: Seq[Int], reason: String, reviewedBy: UUID): Future[Int] = { - val updateAction = candidatesTable.filter(_.id.inSet(ids)) - .map(c => (c.status, c.reviewedBy, c.reviewedAt, c.rejectionReason)) - .update(("rejected", Some(reviewedBy), Some(LocalDateTime.now()), Some(reason))) - - db.run(updateAction) - } - - override def saveCandidates(candidates: Seq[PublicationCandidate]): Future[Seq[PublicationCandidate]] = { - // Insert or Ignore (ON CONFLICT DO NOTHING) is standard for this. - // But PublicationCandidate.openAlexId is unique. - // Slick doesn't have a built-in "insertOrIgnoreAll" easily. - // We can do it one by one or filter existing first. - // For simplicity, let's filter existing by openAlexId. - - if (candidates.isEmpty) return Future.successful(Seq.empty) - - val openAlexIds = candidates.map(_.openAlexId) - - for { - existingIds <- db.run(candidatesTable.filter(_.openAlexId.inSet(openAlexIds)).map(_.openAlexId).result) - newCandidates = candidates.filterNot(c => existingIds.contains(c.openAlexId)) - saved <- if (newCandidates.nonEmpty) { - db.run((candidatesTable returning candidatesTable.map(_.id) - into ((c, id) => c.copy(id = Some(id)))) ++= newCandidates) - } else { - Future.successful(Seq.empty) - } - } yield saved - } - - override def countByStatus(): Future[Map[String, Int]] = { - db.run(candidatesTable.groupBy(_.status).map { case (status, group) => - (status, group.length) - }.result).map(_.toMap) - } - - override def listReviewed(): Future[Seq[PublicationCandidate]] = { - db.run(candidatesTable.filter(c => c.status === "accepted" || c.status === "rejected").result) - } -} diff --git a/app/repositories/PublicationCitizenBiosampleRepository.scala b/app/repositories/PublicationCitizenBiosampleRepository.scala deleted file mode 100644 index 02d3d207..00000000 --- a/app/repositories/PublicationCitizenBiosampleRepository.scala +++ /dev/null @@ -1,43 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.DatabaseSchema -import models.domain.publications.PublicationCitizenBiosample -import play.api.Logging -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import scala.concurrent.{ExecutionContext, Future} - -trait PublicationCitizenBiosampleRepository { - def create(link: PublicationCitizenBiosample): Future[PublicationCitizenBiosample] - - def deleteByCitizenBiosampleId(citizenBiosampleId: Int): Future[Int] -} - -class PublicationCitizenBiosampleRepositoryImpl @Inject()(protected val dbConfigProvider: DatabaseConfigProvider)(implicit ec: ExecutionContext) - extends PublicationCitizenBiosampleRepository with HasDatabaseConfigProvider[JdbcProfile] with Logging { - - import profile.api.* - - private val publicationCitizenBiosamples = DatabaseSchema.domain.publications.publicationCitizenBiosamples - - override def create(link: PublicationCitizenBiosample): Future[PublicationCitizenBiosample] = { - val existingQuery = publicationCitizenBiosamples - .filter(pb => - pb.publicationId === link.publicationId && - pb.citizenBiosampleId === link.citizenBiosampleId - ) - - val upsertAction = existingQuery.result.headOption.flatMap { - case Some(_) => DBIO.successful(link) - case None => publicationCitizenBiosamples += link - }.transactionally - - db.run(upsertAction).map(_ => link) - } - - override def deleteByCitizenBiosampleId(citizenBiosampleId: Int): Future[Int] = { - db.run(publicationCitizenBiosamples.filter(_.citizenBiosampleId === citizenBiosampleId).delete) - } -} diff --git a/app/repositories/PublicationGenomicStudyRepository.scala b/app/repositories/PublicationGenomicStudyRepository.scala deleted file mode 100644 index 10b905d9..00000000 --- a/app/repositories/PublicationGenomicStudyRepository.scala +++ /dev/null @@ -1,184 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.DatabaseSchema -import models.domain.publications.PublicationGenomicStudy -import play.api.db.slick.DatabaseConfigProvider - -import javax.inject.Singleton -import scala.concurrent.{ExecutionContext, Future} - - -/** - * Trait that defines the repository interface for managing associations between publications - * and genomic studies (ENA studies). - * - * This repository provides abstraction for common CRUD operations and queries specifically related - * to the `PublicationGenomicStudy` model. - * - * Operations include creating, updating, deleting, and retrieving relationships between publications - * and their associated genomic studies. - */ -trait PublicationGenomicStudyRepository { - /** - * Creates a new association between a publication and a genomic study. - * - * The method persists a `PublicationGenomicStudy` instance in the repository and returns - * the created instance wrapped in a `Future`. This association enables linking a - * specific publication to its corresponding ENA study. - * - * @param link The `PublicationGenomicStudy` instance representing the relationship to be created. - * It contains the unique identifiers for both the publication (`publicationId`) - * and the ENA study (`studyId`). - * @return A `Future` containing the created `PublicationGenomicStudy` instance, as confirmed - * by the repository after a successful operation. - */ - def create(link: PublicationGenomicStudy): Future[PublicationGenomicStudy] - - /** - * Retrieves a sequence of genomic study associations for a specific publication. - * - * This method queries the repository to find all instances of `PublicationGenomicStudy` - * associated with the given publication identifier (`publicationId`). - * - * @param publicationId The unique identifier of the publication for which associated genomic studies are to be retrieved. - * @return A `Future` containing a sequence of `PublicationGenomicStudy` instances, representing the associations found for the specified publication. - */ - def findByPublicationId(publicationId: Int): Future[Seq[PublicationGenomicStudy]] - - /** - * Retrieves a sequence of publication-genomic study associations for a specific ENA study. - * - * This method queries the repository to find all `PublicationGenomicStudy` instances - * associated with the given ENA study identifier (`enaStudyId`). This is useful for - * obtaining all publications linked to a specific ENA study. - * - * @param enaStudyId The unique identifier of the ENA study for which associated - * `PublicationGenomicStudy` records are to be retrieved. - * @return A `Future` containing a sequence of `PublicationGenomicStudy` instances - * representing the associations found for the specified ENA study. - */ - def findByEnaStudyId(enaStudyId: Int): Future[Seq[PublicationGenomicStudy]] - - /** - * Updates the given genomic study publication in the system. - * - * @param link the `PublicationGenomicStudy` entity to be updated - * @return a `Future` containing an `Option` of the updated `PublicationGenomicStudy` if successful, or `None` if no updates were made - */ - def update(link: PublicationGenomicStudy): Future[Option[PublicationGenomicStudy]] - - /** - * Deletes the association between a publication and an ENA study. - * - * This method removes the specified `PublicationGenomicStudy` record - * from the repository based on the provided `publicationId` and `enaStudyId`. - * - * @param publicationId The unique identifier of the publication to be unlinked. - * @param enaStudyId The unique identifier of the ENA study to be unlinked. - * @return A `Future` containing the number of rows affected by the deletion. - */ - def delete(publicationId: Int, enaStudyId: Int): Future[Int] - - /** - * Checks if an association between a publication and an ENA study exists. - * - * The method verifies the presence of a `PublicationGenomicStudy` record - * in the repository for the specified publication and ENA study identifiers. - * - * @param publicationId The unique identifier of the publication to check. - * @param enaStudyId The unique identifier of the ENA study to check. - * @return A `Future` containing `true` if the association exists, or `false` otherwise. - */ - def exists(publicationId: Int, enaStudyId: Int): Future[Boolean] - - /** - * Retrieves all associations between publications and genomic studies. - * - * This method fetches all `PublicationGenomicStudy` records stored in the repository. - * It is useful for obtaining a complete list of publication-genomic study relationships. - * - * @return A `Future` containing a sequence of `PublicationGenomicStudy` instances, - * representing all the associations present in the repository. - */ - def findAll(): Future[Seq[PublicationGenomicStudy]] -} - -@Singleton -class PublicationGenomicStudyRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with PublicationGenomicStudyRepository { - - import models.dal.MyPostgresProfile.api.* - - val publicationGenomicStudies = DatabaseSchema.domain.publications.publicationGenomicStudies - - override def create(link: PublicationGenomicStudy): Future[PublicationGenomicStudy] = { - runQuery( - publicationGenomicStudies - .returning(publicationGenomicStudies) - .insertOrUpdate(link) - ).map(_ => link) - } - - override def findAll(): Future[Seq[PublicationGenomicStudy]] = { - db.run(publicationGenomicStudies.result) - } - - override def findByPublicationId(publicationId: Int): Future[Seq[PublicationGenomicStudy]] = { - db.run( - publicationGenomicStudies - .filter(_.publicationId === publicationId) - .result - ) - } - - override def findByEnaStudyId(enaStudyId: Int): Future[Seq[PublicationGenomicStudy]] = { - db.run( - publicationGenomicStudies - .filter(_.genomicStudyId === enaStudyId) - .result - ) - } - - override def update(link: PublicationGenomicStudy): Future[Option[PublicationGenomicStudy]] = { - db.run( - publicationGenomicStudies - .filter(r => - r.publicationId === link.publicationId && - r.genomicStudyId === link.studyId - ) - .update(link) - ).map { - case 0 => None // No rows were updated - case _ => Some(link) - } - } - - override def delete(publicationId: Int, enaStudyId: Int): Future[Int] = { - db.run( - publicationGenomicStudies - .filter(r => - r.publicationId === publicationId && - r.genomicStudyId === enaStudyId - ) - .delete - ) - } - - override def exists(publicationId: Int, enaStudyId: Int): Future[Boolean] = { - db.run( - publicationGenomicStudies - .filter(r => - r.publicationId === publicationId && - r.genomicStudyId === enaStudyId - ) - .exists - .result - ) - } -} - - diff --git a/app/repositories/PublicationRepository.scala b/app/repositories/PublicationRepository.scala deleted file mode 100644 index 6644c6e0..00000000 --- a/app/repositories/PublicationRepository.scala +++ /dev/null @@ -1,243 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.api.PublicationWithEnaStudiesAndSampleCount -import models.dal.DatabaseSchema -import models.domain.publications.{GenomicStudy, Publication} -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Represents a repository interface for handling operations related to publications and their associated data. - */ -trait PublicationRepository { - /** - * Fetches all publications available in the repository. - * - * @return a Future containing a sequence of Publication objects. - */ - def getAllPublications: Future[Seq[Publication]] - - /** - * Retrieves a sequence of EnaStudy records associated with a specific publication. - * - * @param publicationId the unique identifier of the publication for which associated EnaStudy records are to be fetched - * @return a Future containing a sequence of EnaStudy objects related to the specified publication - */ - def getEnaStudiesForPublication(publicationId: Int): Future[Seq[GenomicStudy]] - - /** - * Retrieves a paginated list of publications along with associated ENA studies and their sample counts. - * - * @param page the page number to retrieve (1-based index) - * @param pageSize the number of records to include in each page - * @return a Future containing a sequence of PublicationWithEnaStudiesAndSampleCount objects - */ - def findPublicationsWithDetailsPaginated(page: Int, pageSize: Int): Future[Seq[PublicationWithEnaStudiesAndSampleCount]] - - /** - * Counts the total number of publications available in the repository. - * - * @return a Future containing the total count of publications as a Long - */ - def countAllPublications(): Future[Long] - - /** - * Retrieves all DOIs of existing publications in the repository. - * - * @return A Future containing a sequence of DOIs (Strings). - */ - def getAllDois: Future[Seq[String]] - - /** - * Finds a publication in the repository by its DOI. - * - * @param doi the DOI (Digital Object Identifier) of the publication to search for - * @return a Future containing an Option of Publication, where the Option is: - * - Some(Publication) if a publication with the specified DOI exists - * - None if no publication with the specified DOI is found - */ - def findByDoi(doi: String): Future[Option[Publication]] - - /** - * Saves a publication to the database. If a publication with the same OpenAlex ID or DOI - * already exists, it updates the existing record; otherwise, it inserts a new one. - * - * @param publication The publication to save or update. - * @return A Future containing the saved or updated Publication object (with its database ID). - */ - def savePublication(publication: Publication): Future[Publication] - - /** - * Searches publications by a query string, matching against title, authors, and abstract. - * Results are paginated and sorted by relevance (citation percentile, cited by count, date). - * - * @param query The search query string to match against title, authors, and abstract - * @param page The page number to retrieve (1-based index) - * @param pageSize The number of records to include in each page - * @return A Future containing a tuple of (matching publications with details, total count) - */ - def searchPublications(query: String, page: Int, pageSize: Int): Future[(Seq[PublicationWithEnaStudiesAndSampleCount], Long)] -} - -class PublicationRepositoryImpl @Inject()(protected val dbConfigProvider: DatabaseConfigProvider)(implicit ec: ExecutionContext) - extends PublicationRepository with HasDatabaseConfigProvider[JdbcProfile] { - - import profile.api.* - - private val publications = DatabaseSchema.domain.publications.publications - private val publicationEnaStudies = DatabaseSchema.domain.publications.publicationGenomicStudies - private val enaStudies = DatabaseSchema.domain.publications.genomicStudies - private val publicationBiosamples = DatabaseSchema.domain.publications.publicationBiosamples - - override def getAllPublications: Future[Seq[Publication]] = db.run(publications.result) - - override def getEnaStudiesForPublication(publicationId: Int): Future[Seq[GenomicStudy]] = { - val query = for { - pes <- publicationEnaStudies if pes.publicationId === publicationId - es <- enaStudies if es.id === pes.genomicStudyId - } yield es - db.run(query.result) - } - - override def findPublicationsWithDetailsPaginated(page: Int, pageSize: Int): Future[Seq[PublicationWithEnaStudiesAndSampleCount]] = { - val offset = (page - 1) * pageSize - - // Apply sorting first, then pagination - val sortedAndPaginatedQuery = publications - .sortBy { p => - ( - p.citationNormalizedPercentile.desc.nullsLast, - p.citedByCount.desc.nullsLast, - p.publicationDate.desc.nullsLast - ) - } - .drop(offset) - .take(pageSize) - - db.run(sortedAndPaginatedQuery.result).flatMap { paginatedPublications => - if (paginatedPublications.isEmpty) { - Future.successful(Seq.empty) - } else { - assemblePublicationsWithDetails(paginatedPublications) - } - } - } - - /** - * Assembles publication details using batch queries instead of N+1 pattern. - * Uses 2 additional queries regardless of the number of publications. - */ - private def assemblePublicationsWithDetails(paginatedPublications: Seq[Publication]): Future[Seq[PublicationWithEnaStudiesAndSampleCount]] = { - val publicationIds = paginatedPublications.flatMap(_.id) - - // Batch query 1: Get all genomic studies for all publication IDs - val studiesQuery = (for { - pes <- publicationEnaStudies if pes.publicationId.inSet(publicationIds) - es <- enaStudies if es.id === pes.genomicStudyId - } yield (pes.publicationId, es)).result - - // Batch query 2: Get all biosample counts for all publication IDs - val countsQuery = publicationBiosamples - .filter(_.publicationId.inSet(publicationIds)) - .groupBy(_.publicationId) - .map { case (pubId, group) => (pubId, group.length) } - .result - - for { - studiesWithPubId <- db.run(studiesQuery) - counts <- db.run(countsQuery) - } yield { - // Group studies by publication ID - val studiesByPubId: Map[Int, Seq[GenomicStudy]] = studiesWithPubId - .groupBy(_._1) - .view - .mapValues(_.map(_._2)) - .toMap - - // Convert counts to map - val countsByPubId: Map[Int, Int] = counts.toMap - - // Assemble results maintaining original order - paginatedPublications.map { publication => - val pubId = publication.id.getOrElse(0) - PublicationWithEnaStudiesAndSampleCount( - publication, - studiesByPubId.getOrElse(pubId, Seq.empty), - countsByPubId.getOrElse(pubId, 0) - ) - } - } - } - - override def countAllPublications(): Future[Long] = { - db.run(publications.length.result.map(_.toLong)) - } - - override def getAllDois: Future[Seq[String]] = { - db.run(publications.filter(_.doi.isDefined).map(_.doi.get).result) - } - - override def savePublication(updatedPublication: Publication): Future[Publication] = { - val query = publications.filter { p => - // Combine conditions with OR. If updatedPublication.openAlexId is None, - // p.openAlexId === updatedPublication.openAlexId will resolve to false. - // This is the idiomatic way to compare Option columns in Slick. - (p.openAlexId === updatedPublication.openAlexId) || - (p.doi === updatedPublication.doi) - } - - db.run(query.result.headOption).flatMap { - case Some(existingPublication) => - // Publication exists, update it - // Ensure the ID of the publication passed to update is the existing one - val publicationToUpdate = updatedPublication.copy(id = existingPublication.id) - db.run(publications.filter(_.id === existingPublication.id).update(publicationToUpdate)) - .map(_ => publicationToUpdate) // Return the updated publication - case None => - // Publication does not exist, insert a new one - db.run((publications returning publications.map(_.id) into ((pub, id) => pub.copy(id = Some(id)))) += updatedPublication) - } - } - - override def findByDoi(doi: String): Future[Option[Publication]] = db.run(publications.filter(_.doi === doi).result.headOption) - - override def searchPublications(query: String, page: Int, pageSize: Int): Future[(Seq[PublicationWithEnaStudiesAndSampleCount], Long)] = { - val offset = (page - 1) * pageSize - val searchPattern = s"%${query.toLowerCase}%" - - // Filter by title, authors, or abstract (case-insensitive) - val baseQuery = publications.filter { p => - p.title.toLowerCase.like(searchPattern) || - p.authors.map(_.toLowerCase).like(searchPattern) || - p.abstractSummary.map(_.toLowerCase).like(searchPattern) - } - - // Get total count for pagination - val countQuery = baseQuery.length.result - - // Apply sorting and pagination - val sortedAndPaginatedQuery = baseQuery - .sortBy { p => - ( - p.citationNormalizedPercentile.desc.nullsLast, - p.citedByCount.desc.nullsLast, - p.publicationDate.desc.nullsLast - ) - } - .drop(offset) - .take(pageSize) - - for { - totalCount <- db.run(countQuery) - paginatedPublications <- db.run(sortedAndPaginatedQuery.result) - publicationsWithDetails <- if (paginatedPublications.isEmpty) { - Future.successful(Seq.empty) - } else { - assemblePublicationsWithDetails(paginatedPublications) - } - } yield (publicationsWithDetails, totalCount.toLong) - } -} \ No newline at end of file diff --git a/app/repositories/PublicationSearchConfigRepository.scala b/app/repositories/PublicationSearchConfigRepository.scala deleted file mode 100644 index e89dc4bd..00000000 --- a/app/repositories/PublicationSearchConfigRepository.scala +++ /dev/null @@ -1,50 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.publications.PublicationSearchConfig -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -trait PublicationSearchConfigRepository { - def create(config: PublicationSearchConfig): Future[PublicationSearchConfig] - def listAll(): Future[Seq[PublicationSearchConfig]] - def findById(id: Int): Future[Option[PublicationSearchConfig]] - def updateLastRun(id: Int, timestamp: LocalDateTime): Future[Boolean] - def getEnabledConfigs(): Future[Seq[PublicationSearchConfig]] -} - -@Singleton -class PublicationSearchConfigRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider -)(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with PublicationSearchConfigRepository { - - import models.dal.MyPostgresProfile.api.* - - private val configsTable = DatabaseSchema.domain.publications.publicationSearchConfigs - - override def create(config: PublicationSearchConfig): Future[PublicationSearchConfig] = { - db.run((configsTable returning configsTable.map(_.id) - into ((c, id) => c.copy(id = Some(id)))) += config) - } - - override def listAll(): Future[Seq[PublicationSearchConfig]] = { - db.run(configsTable.sortBy(_.id.desc).result) - } - - override def findById(id: Int): Future[Option[PublicationSearchConfig]] = { - db.run(configsTable.filter(_.id === id).result.headOption) - } - - override def updateLastRun(id: Int, timestamp: LocalDateTime): Future[Boolean] = { - db.run(configsTable.filter(_.id === id).map(_.lastRun).update(Some(timestamp)).map(_ > 0)) - } - - override def getEnabledConfigs(): Future[Seq[PublicationSearchConfig]] = { - db.run(configsTable.filter(_.enabled === true).result) - } -} diff --git a/app/repositories/PublicationSearchRunRepository.scala b/app/repositories/PublicationSearchRunRepository.scala deleted file mode 100644 index 5eace9a4..00000000 --- a/app/repositories/PublicationSearchRunRepository.scala +++ /dev/null @@ -1,34 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.publications.PublicationSearchRun -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait PublicationSearchRunRepository { - def create(run: PublicationSearchRun): Future[PublicationSearchRun] - def listByConfig(configId: Int, limit: Int): Future[Seq[PublicationSearchRun]] -} - -@Singleton -class PublicationSearchRunRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider -)(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with PublicationSearchRunRepository { - - import models.dal.MyPostgresProfile.api.* - - private val runsTable = DatabaseSchema.domain.publications.publicationSearchRuns - - override def create(run: PublicationSearchRun): Future[PublicationSearchRun] = { - db.run((runsTable returning runsTable.map(_.id) - into ((r, id) => r.copy(id = Some(id)))) += run) - } - - override def listByConfig(configId: Int, limit: Int): Future[Seq[PublicationSearchRun]] = { - db.run(runsTable.filter(_.configId === configId).sortBy(_.runAt.desc).take(limit).result) - } -} diff --git a/app/repositories/RoleRepository.scala b/app/repositories/RoleRepository.scala deleted file mode 100644 index df6a0154..00000000 --- a/app/repositories/RoleRepository.scala +++ /dev/null @@ -1,32 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.auth.Role -import models.dal.DatabaseSchema -import play.api.db.slick.DatabaseConfigProvider - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class RoleRepository @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) { - - import models.dal.MyPostgresProfile.api.* - - private val roles = DatabaseSchema.auth.roles - - def findByName(name: String): Future[Option[Role]] = { - db.run(roles.filter(_.name === name).result.headOption) - } - - def create(role: Role): Future[Role] = { - db.run((roles returning roles) += role) - } - - def findAll(): Future[Seq[Role]] = { - db.run(roles.result) - } -} diff --git a/app/repositories/SequenceFileRepository.scala b/app/repositories/SequenceFileRepository.scala deleted file mode 100644 index 9edd1ccb..00000000 --- a/app/repositories/SequenceFileRepository.scala +++ /dev/null @@ -1,130 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.{DatabaseSchema, MyPostgresProfile} -import models.domain.genomics.SequenceFile -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -trait SequenceFileRepository { - /** - * Creates a new sequence file record. - * - * @param file the sequence file to create - * @return a future containing the created sequence file with its assigned ID - */ - def create(file: SequenceFile): Future[SequenceFile] - - /** - * Retrieves a sequence file by its ID. - * - * @param id the unique identifier of the sequence file - * @return a future containing an optional sequence file if found - */ - def findById(id: Int): Future[Option[SequenceFile]] - - /** - * Updates an existing sequence file. - * - * @param file the sequence file with updated fields - * @return a future containing true if update was successful - */ - def update(file: SequenceFile): Future[Boolean] - - /** - * Deletes a sequence file. - * - * @param id the ID of the sequence file to delete - * @return a future containing true if deletion was successful - */ - def delete(id: Int): Future[Boolean] - - /** - * Finds all sequence files associated with a library. - * - * @param libraryId the ID of the sequence library - * @return a future containing a sequence of files - */ - def findByLibraryId(libraryId: Int): Future[Seq[SequenceFile]] - - /** - * Deletes all sequence files associated with a library. - * - * @param libraryId the ID of the sequence library - * @return a future containing the number of deleted files - */ - def deleteByLibraryId(libraryId: Int): Future[Int] -} - -@Singleton -class SequenceFileRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with SequenceFileRepository { - - import models.dal.MyPostgresProfile.api.* - - private val sequenceFiles = DatabaseSchema.domain.genomics.sequenceFiles - - override def create(file: SequenceFile): Future[SequenceFile] = { - val insertQuery = (sequenceFiles returning sequenceFiles.map(_.id) - into ((f, id) => f.copy(id = Some(id)))) - .+=(file) - - db.run(insertQuery.transactionally) - } - - override def findById(id: Int): Future[Option[SequenceFile]] = { - db.run(sequenceFiles.filter(_.id === id).result.headOption) - } - - override def update(file: SequenceFile): Future[Boolean] = { - file.id match { - case None => Future.successful(false) - case Some(id) => - val updateQuery = sequenceFiles - .filter(_.id === id) - .map(f => ( - f.libraryId, - f.fileName, - f.fileSizeBytes, - f.fileFormat, - f.checksums, - f.httpLocations, - f.atpLocation, - f.aligner, - f.targetReference, - f.updatedAt - )) - .update(( - file.libraryId, - file.fileName, - file.fileSizeBytes, - file.fileFormat, - file.checksums, - file.httpLocations, - file.atpLocation, - file.aligner, - file.targetReference, - Some(LocalDateTime.now()) - )) - - db.run(updateQuery.transactionally.map(_ > 0)) - } - } - - override def delete(id: Int): Future[Boolean] = { - db.run(sequenceFiles.filter(_.id === id).delete.map(_ > 0)) - } - - override def deleteByLibraryId(libraryId: Int): Future[Int] = { - db.run(sequenceFiles.filter(_.libraryId === libraryId).delete) - } - - override def findByLibraryId(libraryId: Int): Future[Seq[SequenceFile]] = { - db.run(sequenceFiles.filter(_.libraryId === libraryId).result) - } -} \ No newline at end of file diff --git a/app/repositories/SequenceLibraryRepository.scala b/app/repositories/SequenceLibraryRepository.scala deleted file mode 100644 index c31a083b..00000000 --- a/app/repositories/SequenceLibraryRepository.scala +++ /dev/null @@ -1,153 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.{DatabaseSchema, MyPostgresProfile} -import models.domain.genomics.SequenceLibrary -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait SequenceLibraryRepository { - /** - * Creates a new sequence library record. - * - * @param library the sequence library to create - * @return a future containing the created sequence library with its assigned ID - */ - def create(library: SequenceLibrary): Future[SequenceLibrary] - - /** - * Retrieves a sequence library by its ID. - * - * @param id the unique identifier of the sequence library - * @return a future containing an optional sequence library if found - */ - def findById(id: Int): Future[Option[SequenceLibrary]] - - /** - * Retrieves all sequence libraries for a given sample. - * - * @param sampleGuid the UUID of the sample - * @return a future containing a sequence of sequence libraries - */ - def findBySampleGuid(sampleGuid: UUID): Future[Seq[SequenceLibrary]] - - /** - * Updates an existing sequence library. - * - * @param library the sequence library with updated fields - * @return a future containing true if update was successful - */ - def update(library: SequenceLibrary): Future[Boolean] - - /** - * Deletes a sequence library by its ID. - * - * @param id the ID of the sequence library to delete - * @return a future containing true if deletion was successful - */ - def delete(id: Int): Future[Boolean] - - /** - * Finds all sequence libraries created within a date range. - * - * @param start start date-time - * @param end end date-time - * @return a future containing a sequence of matching libraries - */ - def findByDateRange(start: LocalDateTime, end: LocalDateTime): Future[Seq[SequenceLibrary]] - - def findByAtUri(atUri: String): Future[Option[SequenceLibrary]] - - def deleteByAtUri(atUri: String): Future[Boolean] -} - -@Singleton -class SequenceLibraryRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with SequenceLibraryRepository { - - import models.dal.MyPostgresProfile.api.* - - private val sequenceLibraries = DatabaseSchema.domain.genomics.sequenceLibraries - - override def create(library: SequenceLibrary): Future[SequenceLibrary] = { - val insertQuery = (sequenceLibraries returning sequenceLibraries.map(_.id) - into ((lib, id) => lib.copy(id = Some(id)))) - .+=(library) - - db.run(insertQuery.transactionally) - } - - override def findById(id: Int): Future[Option[SequenceLibrary]] = { - db.run(sequenceLibraries.filter(_.id === id).result.headOption) - } - - override def findByAtUri(atUri: String): Future[Option[SequenceLibrary]] = { - db.run(sequenceLibraries.filter(_.atUri === atUri).result.headOption) - } - - override def findBySampleGuid(sampleGuid: UUID): Future[Seq[SequenceLibrary]] = { - db.run(sequenceLibraries.filter(_.sampleGuid === sampleGuid).result) - } - - override def update(library: SequenceLibrary): Future[Boolean] = { - library.id match { - case None => Future.successful(false) - case Some(id) => - val updateQuery = sequenceLibraries - .filter(_.id === id) - .map(l => ( - l.sampleGuid, - l.lab, - l.testTypeId, - l.runDate, - l.instrument, - l.reads, - l.readLength, - l.pairedEnd, - l.insertSize, - l.atUri, - l.atCid, - l.updatedAt - )) - .update(( - library.sampleGuid, - library.lab, - library.testTypeId, - library.runDate, - library.instrument, - library.reads, - library.readLength, - library.pairedEnd, - library.insertSize, - library.atUri, - library.atCid, - Some(LocalDateTime.now()) - )) - - db.run(updateQuery.transactionally.map(_ > 0)) - } - } - - override def delete(id: Int): Future[Boolean] = { - db.run(sequenceLibraries.filter(_.id === id).delete.map(_ > 0)) - } - - override def deleteByAtUri(atUri: String): Future[Boolean] = { - db.run(sequenceLibraries.filter(_.atUri === atUri).delete.map(_ > 0)) - } - - override def findByDateRange(start: LocalDateTime, end: LocalDateTime): Future[Seq[SequenceLibrary]] = { - db.run( - sequenceLibraries - .filter(l => l.runDate >= start && l.runDate <= end) - .sortBy(_.runDate.desc) - .result - ) - } -} \ No newline at end of file diff --git a/app/repositories/SequenceLibrarySketchRepository.scala b/app/repositories/SequenceLibrarySketchRepository.scala deleted file mode 100644 index b9240985..00000000 --- a/app/repositories/SequenceLibrarySketchRepository.scala +++ /dev/null @@ -1,75 +0,0 @@ -package repositories - -import models.domain.genomics.{MinHashSketch, SequenceLibrary, SequenceLibrarySketch} - -import java.util.UUID -import scala.concurrent.Future - -enum SketchType { - case Autosomal, YChromosome, MtDna -} - - -trait SequenceLibrarySketchRepository { - /** - * Creates or updates a sketch for a sequence library - */ - def upsert(sketch: SequenceLibrarySketch): Future[SequenceLibrarySketch] - - /** - * Creates or updates multiple sketches in batch - */ - def upsertBatch(sketches: Seq[SequenceLibrarySketch]): Future[Seq[SequenceLibrarySketch]] - - /** - * Finds sequence libraries with similar autosomal sketches - */ - def findSimilarAutosomal(sketchHash: String, threshold: Double): Future[Seq[SequenceLibrary]] - - /** - * Finds sequence libraries with matching Y chromosome sketches - */ - def findMatchingYChromosome(sketchHash: String): Future[Seq[SequenceLibrary]] - - /** - * Finds sequence libraries with matching mtDNA sketches - */ - def findMatchingMtDna(sketchHash: String): Future[Seq[SequenceLibrary]] - - /** - * Retrieves the sketch for a specific sequence library - */ - def findBySequenceLibraryId(sequenceLibraryId: Int): Future[Option[SequenceLibrarySketch]] - - /** - * Finds all sketches for a given sample GUID - */ - def findBySampleGuid(sampleGuid: UUID): Future[Seq[SequenceLibrarySketch]] - - /** - * Finds sequence libraries with similar autosomal sketches using actual Jaccard similarity - */ - def findSimilarAutosomal( - sketch: MinHashSketch, - threshold: Double - ): Future[Seq[(SequenceLibrary, Double)]] // Returns libraries with their similarity scores - - /** - * Validates consistency between sequence libraries claiming to be from the same sample - * Now using full MinHash comparison for accurate Jaccard similarities - */ - def validateSampleConsistency( - sampleGuid: UUID, - autosomalThreshold: Double - ): Future[Seq[(SequenceLibrary, MinHashSketch, Double)]] - - /** - * Batch comparison of sketches across a set of libraries - * Useful for finding all pairs of similar libraries in a dataset - */ - def findAllSimilarPairs( - threshold: Double, - sketchType: SketchType = SketchType.Autosomal - ): Future[Seq[(SequenceLibrary, SequenceLibrary, Double)]] - -} diff --git a/app/repositories/SequencerInstrumentRepository.scala b/app/repositories/SequencerInstrumentRepository.scala deleted file mode 100644 index 8510730e..00000000 --- a/app/repositories/SequencerInstrumentRepository.scala +++ /dev/null @@ -1,165 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.api.SequencerLabInfo -import models.api.genomics.AssociateLabWithInstrumentResponse -import models.dal.DatabaseSchema -import models.domain.genomics.SequencingLab -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository interface for managing sequencer instrument data. - */ -trait SequencerInstrumentRepository { - /** - * Retrieves lab information for a given instrument ID. - * - * @param instrumentId the unique instrument ID from BAM/CRAM headers - * @return a future containing optional lab information if the instrument is found - */ - def findLabByInstrumentId(instrumentId: String): Future[Option[SequencerLabInfo]] - - /** - * Retrieves all lab-instrument associations. - * - * @return a future containing a list of all lab-instrument associations - */ - def findAllLabInstrumentAssociations(): Future[Seq[SequencerLabInfo]] - - /** - * Associates a lab with an instrument ID. - * If the lab doesn't exist, creates a placeholder record. - * - * @param instrumentId the unique instrument ID from BAM/CRAM headers - * @param labName the name of the lab to associate - * @param manufacturer optional manufacturer name - * @param model optional model name - * @return a future containing the association response - */ - def associateLabWithInstrument( - instrumentId: String, - labName: String, - manufacturer: Option[String] = None, - model: Option[String] = None - ): Future[AssociateLabWithInstrumentResponse] -} - -@Singleton -class SequencerInstrumentRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with SequencerInstrumentRepository { - - import models.dal.MyPostgresProfile.api.* - - private val instrumentsTable = DatabaseSchema.domain.genomics.sequencerInstruments - private val labsTable = DatabaseSchema.domain.genomics.sequencingLabs - - override def findLabByInstrumentId(instrumentId: String): Future[Option[SequencerLabInfo]] = { - val query = instrumentsTable - .filter(_.instrumentId === instrumentId) - .join(labsTable).on(_.labId === _.id) - .map { case (instrument, lab) => - ( - instrument.instrumentId, - lab.name, - lab.isD2c, - instrument.manufacturer, - instrument.model, - lab.websiteUrl - ) - } - - db.run(query.result.headOption).map { - case Some((instId, labName, isD2c, manufacturer, model, websiteUrl)) => - Some(SequencerLabInfo( - instrumentId = instId, - labName = labName, - isD2c = isD2c, - manufacturer = manufacturer, - model = model, - websiteUrl = websiteUrl - )) - case None => None - } - } - - override def findAllLabInstrumentAssociations(): Future[Seq[SequencerLabInfo]] = { - val query = instrumentsTable - .join(labsTable).on(_.labId === _.id) - .map { case (instrument, lab) => - ( - instrument.instrumentId, - lab.name, - lab.isD2c, - instrument.manufacturer, - instrument.model, - lab.websiteUrl - ) - } - .sortBy(_._1) // Sort by instrumentId for consistency - - db.run(query.result).map { results => - results.map { case (instId, labName, isD2c, manufacturer, model, websiteUrl) => - SequencerLabInfo( - instrumentId = instId, - labName = labName, - isD2c = isD2c, - manufacturer = manufacturer, - model = model, - websiteUrl = websiteUrl - ) - } - } - } - - override def associateLabWithInstrument( - instrumentId: String, - labName: String, - manufacturer: Option[String] = None, - model: Option[String] = None - ): Future[AssociateLabWithInstrumentResponse] = { - db.run( - (for { - // Check if lab already exists - existingLab <- labsTable.filter(_.name === labName).result.headOption - - labId <- if (existingLab.isDefined) { - // Lab exists, use it - DBIO.successful(existingLab.get.id.get) - } else { - // Create placeholder lab - val newLab = SequencingLab(name = labName) - (labsTable returning labsTable.map(_.id)) += newLab - } - - // Update instrument with lab ID and instrument details - _ <- instrumentsTable - .filter(_.instrumentId === instrumentId) - .map(inst => (inst.labId, inst.manufacturer, inst.model)) - .update((labId, manufacturer, model)) - - } yield (labId, existingLab.isDefined)).transactionally - ).map { case (labId, labExists) => - AssociateLabWithInstrumentResponse( - instrumentId = instrumentId, - labId = labId, - labName = labName, - manufacturer = manufacturer, - model = model, - isNewLab = !labExists, - message = if (labExists) { - s"Lab '$labName' associated with instrument '$instrumentId'" - } else { - s"New lab placeholder '$labName' created and associated with instrument '$instrumentId'" - } - ) - }.recover { - case e: Exception => - throw new Exception(s"Failed to associate lab with instrument: ${e.getMessage}", e) - } - } -} \ No newline at end of file diff --git a/app/repositories/SequencingLabRepository.scala b/app/repositories/SequencingLabRepository.scala deleted file mode 100644 index 69cc5dd6..00000000 --- a/app/repositories/SequencingLabRepository.scala +++ /dev/null @@ -1,60 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.genomics.SequencingLab -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait SequencingLabRepository { - def list(): Future[Seq[SequencingLab]] - - def findById(id: Int): Future[Option[SequencingLab]] - - def create(lab: SequencingLab): Future[SequencingLab] - - def update(id: Int, update: SequencingLab): Future[Option[SequencingLab]] - - def delete(id: Int): Future[Boolean] -} - -@Singleton -class SequencingLabRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) with SequencingLabRepository { - - import models.dal.MyPostgresProfile.api.* - - private val labs = DatabaseSchema.domain.genomics.sequencingLabs - - override def list(): Future[Seq[SequencingLab]] = db.run(labs.sortBy(_.id.asc.nullsLast).result) - - override def findById(id: Int): Future[Option[SequencingLab]] = db.run(labs.filter(_.id === id).result.headOption) - - override def create(lab: SequencingLab): Future[SequencingLab] = { - val insert = (labs returning labs.map(_.id) into { case (row, id) => row.copy(id = Some(id)) }) += lab.copy(id = None) - db.run(insert) - } - - override def update(id: Int, update: SequencingLab): Future[Option[SequencingLab]] = { - val q = labs.filter(_.id === id) - val action = for { - existing <- q.result.headOption - updatedOpt <- existing match { - case Some(_) => - q.map(l => (l.name, l.isD2c, l.websiteUrl, l.descriptionMarkdown, l.updatedAt)) - .update((update.name, update.isD2c, update.websiteUrl, update.descriptionMarkdown, update.updatedAt)) - .map(_ => Some(update.copy(id = Some(id)))) - case None => DBIO.successful(None) - } - } yield updatedOpt - - db.run(action.transactionally) - } - - override def delete(id: Int): Future[Boolean] = { - db.run(labs.filter(_.id === id).delete.map(_ > 0)) - } -} diff --git a/app/repositories/SpecimanDonorRepository.scala b/app/repositories/SpecimanDonorRepository.scala deleted file mode 100644 index 7e106f24..00000000 --- a/app/repositories/SpecimanDonorRepository.scala +++ /dev/null @@ -1,205 +0,0 @@ -package repositories - -import com.vividsolutions.jts.geom.Point -import jakarta.inject.{Inject, Singleton} -import models.dal.{DatabaseSchema, MyPostgresProfile} -import models.domain.genomics.{BiologicalSex, BiosampleType, SpecimenDonor} -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait SpecimenDonorRepository { - def findById(id: Int): Future[Option[SpecimenDonor]] - - def findByAtUri(atUri: String): Future[Option[SpecimenDonor]] - - def create(donor: SpecimenDonor): Future[SpecimenDonor] - - def update(donor: SpecimenDonor): Future[Boolean] - - def upsert(donor: SpecimenDonor): Future[SpecimenDonor] - - def findByIdentifier(identifier: String): Future[Option[SpecimenDonor]] - - def findByOriginBiobank(biobank: String): Future[Seq[SpecimenDonor]] - - def findByType(donorType: BiosampleType): Future[Seq[SpecimenDonor]] - - def findBySex(sex: BiologicalSex): Future[Seq[SpecimenDonor]] - - def getAllGeoLocations: Future[Seq[(Point, Int)]] - - def findByBiobankAndType(biobank: String, donorType: BiosampleType): Future[Seq[SpecimenDonor]] - - def deleteMany(ids: Seq[Int]): Future[Int] - - def transferBiosamples(fromDonorIds: Seq[Int], toDonorId: Int): Future[Int] - - def findByDidAndIdentifier(did: String, identifier: String): Future[Option[SpecimenDonor]] - -} - -@Singleton -class SpecimenDonorRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with SpecimenDonorRepository { - - import models.dal.MyPostgresProfile.api.* - - private val donorsTable = DatabaseSchema.domain.genomics.specimenDonors - private val biosamplesTable = DatabaseSchema.domain.genomics.biosamples - - override def findById(id: Int): Future[Option[SpecimenDonor]] = { - db.run(donorsTable.filter(_.id === id).result.headOption) - } - - override def findByAtUri(atUri: String): Future[Option[SpecimenDonor]] = { - db.run(donorsTable.filter(_.atUri === atUri).result.headOption) - } - - override def findByDidAndIdentifier(did: String, identifier: String): Future[Option[SpecimenDonor]] = { - db.run(donorsTable - .filter(d => d.atUri === did && d.donorIdentifier === identifier) - .result.headOption - ) - } - - override def create(donor: SpecimenDonor): Future[SpecimenDonor] = { - val insertQuery = (donorsTable returning donorsTable.map(_.id) - into ((d, id) => d.copy(id = Some(id)))) - .+=(donor) - - db.run(insertQuery.transactionally) - } - - override def update(donor: SpecimenDonor): Future[Boolean] = { - donor.id match { - case None => Future.successful(false) - case Some(id) => - db.run( - donorsTable - .filter(_.id === id) - .map(d => ( - d.donorIdentifier, - d.originBiobank, - d.donorType, - d.sex, - d.geocoord, - d.pgpParticipantId, - d.atUri, - d.dateRangeStart, - d.dateRangeEnd - )) - .update(( - donor.donorIdentifier, - donor.originBiobank, - donor.donorType, - donor.sex, - donor.geocoord, - donor.pgpParticipantId, - donor.atUri, - donor.dateRangeStart, - donor.dateRangeEnd - )) - .map(_ > 0) - ) - } - } - - override def upsert(donor: SpecimenDonor): Future[SpecimenDonor] = { - val query = for { - existing <- donorsTable.filter(_.donorIdentifier === donor.donorIdentifier).result.headOption - result <- existing match { - case Some(existingDonor) => - // Update existing donor - donorsTable - .filter(_.id === existingDonor.id) - .map(d => ( - d.originBiobank, - d.donorType, - d.sex, - d.geocoord, - d.pgpParticipantId, - d.atUri, - d.dateRangeStart, - d.dateRangeEnd - )) - .update(( - donor.originBiobank, - donor.donorType, - donor.sex, - donor.geocoord, - donor.pgpParticipantId, - donor.atUri, - donor.dateRangeStart, - donor.dateRangeEnd - )) - .map(_ => donor.copy(id = existingDonor.id)) - - case None => - // Insert new donor - (donorsTable returning donorsTable.map(_.id) - into ((d, id) => d.copy(id = Some(id)))) - .+=(donor) - } - } yield result - - db.run(query.transactionally) - } - - override def findByIdentifier(identifier: String): Future[Option[SpecimenDonor]] = { - db.run(donorsTable.filter(_.donorIdentifier === identifier).result.headOption) - } - - override def findByOriginBiobank(biobank: String): Future[Seq[SpecimenDonor]] = { - db.run(donorsTable.filter(_.originBiobank === biobank).result) - } - - override def findByType(donorType: BiosampleType): Future[Seq[SpecimenDonor]] = { - db.run(donorsTable.filter(_.donorType === donorType).result) - } - - override def findBySex(sex: BiologicalSex): Future[Seq[SpecimenDonor]] = { - db.run(donorsTable.filter(_.sex === sex).result) - } - - override def getAllGeoLocations: Future[Seq[(Point, Int)]] = { - val query = donorsTable - .filter(_.geocoord.isDefined) - .groupBy(_.geocoord) - .map { case (point, group) => - (point.asColumnOf[Point], group.length) - } - - db.run(query.result) - } - - def findByBiobankAndType( - biobank: String, - donorType: BiosampleType - ): Future[Seq[SpecimenDonor]] = { - db.run( - donorsTable - .filter(d => d.originBiobank === biobank && d.donorType === donorType) - .result - ) - } - - override def deleteMany(ids: Seq[Int]): Future[Int] = { - db.run(donorsTable.filter(_.id.inSet(ids)).delete) - } - - def transferBiosamples(fromDonorIds: Seq[Int], toDonorId: Int): Future[Int] = { - import MyPostgresProfile.api.* - - db.run( - biosamplesTable - .filter(_.specimenDonorId inSet fromDonorIds) - .map(_.specimenDonorId) - .update(Some(toDonorId)) - ) - } - -} \ No newline at end of file diff --git a/app/repositories/StrMutationRateRepository.scala b/app/repositories/StrMutationRateRepository.scala deleted file mode 100644 index fb142410..00000000 --- a/app/repositories/StrMutationRateRepository.scala +++ /dev/null @@ -1,38 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.domain.genomics.StrMutationRate -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait StrMutationRateRepository { - def findByMarker(markerName: String): Future[Option[StrMutationRate]] - def findByMarkers(markerNames: Seq[String]): Future[Seq[StrMutationRate]] - def findAll(): Future[Seq[StrMutationRate]] - def upsert(rate: StrMutationRate): Future[Int] -} - -class StrMutationRateRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with StrMutationRateRepository - with Logging { - - import models.dal.DatabaseSchema.domain.genomics.strMutationRates - import models.dal.MyPostgresProfile.api.* - - override def findByMarker(markerName: String): Future[Option[StrMutationRate]] = - runQuery(strMutationRates.filter(_.markerName === markerName).result.headOption) - - override def findByMarkers(markerNames: Seq[String]): Future[Seq[StrMutationRate]] = - runQuery(strMutationRates.filter(_.markerName.inSet(markerNames)).result) - - override def findAll(): Future[Seq[StrMutationRate]] = - runQuery(strMutationRates.result) - - override def upsert(rate: StrMutationRate): Future[Int] = - runQuery(strMutationRates.insertOrUpdate(rate)) -} diff --git a/app/repositories/TestTypeRepository.scala b/app/repositories/TestTypeRepository.scala deleted file mode 100644 index aad33c8c..00000000 --- a/app/repositories/TestTypeRepository.scala +++ /dev/null @@ -1,21 +0,0 @@ -package repositories - -import models.domain.genomics.{DataGenerationMethod, TestTypeRow} - -import scala.concurrent.Future - -trait TestTypeRepository { - def findByCode(code: String): Future[Option[TestTypeRow]] - def listAll(): Future[Seq[TestTypeRow]] - def findByCategory(category: DataGenerationMethod): Future[Seq[TestTypeRow]] - def findByCapability( - supportsY: Option[Boolean] = None, - supportsMt: Option[Boolean] = None, - supportsAutosomalIbd: Option[Boolean] = None, - supportsAncestry: Option[Boolean] = None - ): Future[Seq[TestTypeRow]] - def create(testType: TestTypeRow): Future[TestTypeRow] - def update(testType: TestTypeRow): Future[Boolean] - def delete(id: Int): Future[Boolean] - def getTestTypeRowsByIds(ids: Seq[Int]): Future[Seq[TestTypeRow]] -} diff --git a/app/repositories/TestTypeRepositoryImpl.scala b/app/repositories/TestTypeRepositoryImpl.scala deleted file mode 100644 index c5feea36..00000000 --- a/app/repositories/TestTypeRepositoryImpl.scala +++ /dev/null @@ -1,77 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.dal.MyPostgresProfile.api.* -import models.domain.genomics.{DataGenerationMethod, TestTypeRow} -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class TestTypeRepositoryImpl @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends TestTypeRepository with HasDatabaseConfigProvider[JdbcProfile] { - - private val testTypeDefinitions = DatabaseSchema.domain.genomics.testTypeDefinition - - override def findByCode(code: String): Future[Option[TestTypeRow]] = { - db.run(testTypeDefinitions.filter(_.code === code).result.headOption) // Changed .name to .code - } - - override def listAll(): Future[Seq[TestTypeRow]] = { - db.run(testTypeDefinitions.result) - } - - override def findByCategory(category: DataGenerationMethod): Future[Seq[TestTypeRow]] = { - db.run(testTypeDefinitions.filter(_.category === category).result) // Correctly accesses .category - } - - override def findByCapability( - supportsY: Option[Boolean] = None, - supportsMt: Option[Boolean] = None, - supportsAutosomalIbd: Option[Boolean] = None, - supportsAncestry: Option[Boolean] = None - ): Future[Seq[TestTypeRow]] = { - val query = testTypeDefinitions.filter { ttd => - val conditions = Seq( - supportsY.map(s => ttd.supportsHaplogroupY === s), - supportsMt.map(s => ttd.supportsHaplogroupMt === s), - supportsAutosomalIbd.map(s => ttd.supportsAutosomalIbd === s), - supportsAncestry.map(s => ttd.supportsAncestry === s) - ).flatten // This creates Seq[Rep[Boolean]] - - if (conditions.isEmpty) { - true.asColumnOf[Boolean] // If no conditions, return true - } else { - conditions.reduceLeft(_ && _) // Combine conditions with AND - } - } - db.run(query.result) - } - - override def create(testType: TestTypeRow): Future[TestTypeRow] = { - db.run( - (testTypeDefinitions returning testTypeDefinitions.map(_.id) - into ((tt, id) => tt.copy(id = Some(id)))) += testType - ) - } - - override def update(testType: TestTypeRow): Future[Boolean] = { - testType.id match { - case None => Future.successful(false) - case Some(id) => - db.run(testTypeDefinitions.filter(_.id === id).update(testType)).map(_ > 0) - } - } - - override def delete(id: Int): Future[Boolean] = { - db.run(testTypeDefinitions.filter(_.id === id).delete).map(_ > 0) - } - - override def getTestTypeRowsByIds(ids: Seq[Int]): Future[Seq[TestTypeRow]] = { - db.run(testTypeDefinitions.filter(_.id inSet ids).result) - } -} \ No newline at end of file diff --git a/app/repositories/TestTypeTargetRegionRepository.scala b/app/repositories/TestTypeTargetRegionRepository.scala deleted file mode 100644 index 8635614e..00000000 --- a/app/repositories/TestTypeTargetRegionRepository.scala +++ /dev/null @@ -1,56 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.domain.genomics.TestTypeTargetRegion -import play.api.db.slick.DatabaseConfigProvider - -import scala.concurrent.{ExecutionContext, Future} - -trait TestTypeTargetRegionRepository { - def findByTestTypeId(testTypeId: Int): Future[Seq[TestTypeTargetRegion]] - def findByTestTypeCode(code: String): Future[Seq[TestTypeTargetRegion]] - def findByContigName(contigName: String): Future[Seq[TestTypeTargetRegion]] - def create(region: TestTypeTargetRegion): Future[TestTypeTargetRegion] - def delete(id: Int): Future[Boolean] -} - -@Singleton -class TestTypeTargetRegionRepositoryImpl @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with TestTypeTargetRegionRepository { - - import models.dal.MyPostgresProfile.api.* - - private val regions = DatabaseSchema.domain.genomics.testTypeTargetRegions - private val testTypes = DatabaseSchema.domain.genomics.testTypeDefinition - - override def findByTestTypeId(testTypeId: Int): Future[Seq[TestTypeTargetRegion]] = { - db.run(regions.filter(_.testTypeId === testTypeId).result) - } - - override def findByTestTypeCode(code: String): Future[Seq[TestTypeTargetRegion]] = { - val query = regions - .join(testTypes).on(_.testTypeId === _.id) - .filter(_._2.code === code) - .map(_._1) - db.run(query.result) - } - - override def findByContigName(contigName: String): Future[Seq[TestTypeTargetRegion]] = { - db.run(regions.filter(_.contigName === contigName).result) - } - - override def create(region: TestTypeTargetRegion): Future[TestTypeTargetRegion] = { - db.run( - (regions returning regions.map(_.id) - into ((r, id) => r.copy(id = Some(id)))) += region - ) - } - - override def delete(id: Int): Future[Boolean] = { - db.run(regions.filter(_.id === id).delete.map(_ > 0)) - } -} diff --git a/app/repositories/TreeVersioningRepository.scala b/app/repositories/TreeVersioningRepository.scala deleted file mode 100644 index bffdd37f..00000000 --- a/app/repositories/TreeVersioningRepository.scala +++ /dev/null @@ -1,762 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.HaplogroupType -import models.dal.domain.haplogroups.{ChangeSetCommentRow, ChangeSetRow, TreeChangeRow} -import models.domain.haplogroups.* -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository interface for Tree Versioning System. - * - * Manages change sets and individual tree changes for Production/WIP versioning. - */ -trait TreeVersioningRepository { - - // ============================================================================ - // Change Set Operations - // ============================================================================ - - /** - * Create a new change set. - */ - def createChangeSet(changeSet: ChangeSet): Future[Int] - - /** - * Get a change set by ID. - */ - def getChangeSet(id: Int): Future[Option[ChangeSet]] - - /** - * Get a change set by name and type. - */ - def getChangeSetByName(name: String, haplogroupType: HaplogroupType): Future[Option[ChangeSet]] - - /** - * Get the active (DRAFT or READY_FOR_REVIEW) change set for a type. - * Only one active change set per type at a time. - */ - def getActiveChangeSet(haplogroupType: HaplogroupType): Future[Option[ChangeSet]] - - /** - * List change sets with optional filters. - */ - def listChangeSets( - haplogroupType: Option[HaplogroupType] = None, - status: Option[ChangeSetStatus] = None, - limit: Int = 20, - offset: Int = 0 - ): Future[Seq[ChangeSet]] - - /** - * Count change sets matching filters. - */ - def countChangeSets( - haplogroupType: Option[HaplogroupType] = None, - status: Option[ChangeSetStatus] = None - ): Future[Int] - - /** - * Update a change set. - */ - def updateChangeSet(changeSet: ChangeSet): Future[Boolean] - - /** - * Update change set status. - */ - def updateChangeSetStatus(id: Int, status: ChangeSetStatus): Future[Boolean] - - /** - * Finalize a change set (move from DRAFT to READY_FOR_REVIEW). - */ - def finalizeChangeSet( - id: Int, - statistics: ChangeSetStatistics, - ambiguityReportPath: Option[String] - ): Future[Boolean] - - /** - * Apply a change set (move to APPLIED status). - */ - def applyChangeSet(id: Int, appliedBy: String): Future[Boolean] - - /** - * Discard a change set (move to DISCARDED status). - */ - def discardChangeSet(id: Int, discardedBy: String, reason: String): Future[Boolean] - - // ============================================================================ - // Tree Change Operations - // ============================================================================ - - /** - * Record a new tree change. - */ - def createTreeChange(change: TreeChange): Future[Int] - - /** - * Bulk insert tree changes. - */ - def createTreeChanges(changes: Seq[TreeChange]): Future[Seq[Int]] - - /** - * Get a tree change by ID. - */ - def getTreeChange(id: Int): Future[Option[TreeChange]] - - /** - * List tree changes for a change set. - */ - def listTreeChanges( - changeSetId: Int, - changeType: Option[TreeChangeType] = None, - status: Option[ChangeStatus] = None, - limit: Int = 100, - offset: Int = 0 - ): Future[Seq[TreeChange]] - - /** - * Count tree changes for a change set. - */ - def countTreeChanges( - changeSetId: Int, - changeType: Option[TreeChangeType] = None, - status: Option[ChangeStatus] = None - ): Future[Int] - - /** - * Get the next sequence number for a change set. - */ - def getNextSequenceNum(changeSetId: Int): Future[Int] - - /** - * Update a tree change. - */ - def updateTreeChange(change: TreeChange): Future[Boolean] - - /** - * Update tree change status. - */ - def updateTreeChangeStatus(id: Int, status: ChangeStatus): Future[Boolean] - - /** - * Mark a tree change as reviewed. - */ - def reviewTreeChange( - id: Int, - reviewedBy: String, - notes: Option[String], - newStatus: ChangeStatus - ): Future[Boolean] - - /** - * Apply all pending changes in a change set (batch update to APPLIED). - */ - def applyAllPendingChanges(changeSetId: Int): Future[Int] - - /** - * Get pending changes for review (ordered by ambiguity confidence ASC). - */ - def getPendingReviewChanges(changeSetId: Int, limit: Int = 50): Future[Seq[TreeChange]] - - /** - * Get all changes for a change set (for diff computation). - * Returns all changes without pagination, ordered by sequence number. - */ - def getChangesForChangeSet(changeSetId: Int): Future[Seq[TreeChange]] - - /** - * Get change summary by type for a change set. - */ - def getChangeSummaryByType(changeSetId: Int): Future[Map[TreeChangeType, Int]] - - /** - * Get change summary by status for a change set. - */ - def getChangeSummaryByStatus(changeSetId: Int): Future[Map[ChangeStatus, Int]] - - // ============================================================================ - // Comment Operations - // ============================================================================ - - /** - * Add a comment to a change set or specific change. - */ - def addComment(comment: ChangeSetComment): Future[Int] - - /** - * List comments for a change set. - */ - def listComments(changeSetId: Int): Future[Seq[ChangeSetComment]] - - /** - * List comments for a specific tree change. - */ - def listCommentsForChange(treeChangeId: Int): Future[Seq[ChangeSetComment]] - - /** - * Get haplogroup names by IDs. - * Returns a map of haplogroup ID -> name for lookup. - */ - def getHaplogroupNamesById(ids: Set[Int]): Future[Map[Int, String]] -} - -class TreeVersioningRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with TreeVersioningRepository - with Logging { - - import models.dal.DatabaseSchema.domain.haplogroups.{changeSets, treeChanges, changeSetComments, haplogroups} - import models.dal.MyPostgresProfile.api.* - - // ============================================================================ - // Conversion Helpers - // ============================================================================ - - private def toChangeSet(row: ChangeSetRow): ChangeSet = ChangeSet( - id = row.id, - haplogroupType = row.haplogroupType, - name = row.name, - description = row.description, - sourceName = row.sourceName, - createdAt = row.createdAt, - createdBy = row.createdBy, - finalizedAt = row.finalizedAt, - appliedAt = row.appliedAt, - appliedBy = row.appliedBy, - discardedAt = row.discardedAt, - discardedBy = row.discardedBy, - discardReason = row.discardReason, - status = ChangeSetStatus.fromString(row.status), - statistics = ChangeSetStatistics( - nodesProcessed = row.nodesProcessed, - nodesCreated = row.nodesCreated, - nodesUpdated = row.nodesUpdated, - nodesUnchanged = row.nodesUnchanged, - variantsAdded = row.variantsAdded, - relationshipsCreated = row.relationshipsCreated, - relationshipsUpdated = row.relationshipsUpdated, - splitOperations = row.splitOperations, - ambiguityCount = row.ambiguityCount - ), - ambiguityReportPath = row.ambiguityReportPath - ) - - private def toChangeSetRow(cs: ChangeSet): ChangeSetRow = ChangeSetRow( - id = cs.id, - haplogroupType = cs.haplogroupType, - name = cs.name, - description = cs.description, - sourceName = cs.sourceName, - createdAt = cs.createdAt, - createdBy = cs.createdBy, - finalizedAt = cs.finalizedAt, - appliedAt = cs.appliedAt, - appliedBy = cs.appliedBy, - discardedAt = cs.discardedAt, - discardedBy = cs.discardedBy, - discardReason = cs.discardReason, - status = ChangeSetStatus.toDbString(cs.status), - nodesProcessed = cs.statistics.nodesProcessed, - nodesCreated = cs.statistics.nodesCreated, - nodesUpdated = cs.statistics.nodesUpdated, - nodesUnchanged = cs.statistics.nodesUnchanged, - variantsAdded = cs.statistics.variantsAdded, - relationshipsCreated = cs.statistics.relationshipsCreated, - relationshipsUpdated = cs.statistics.relationshipsUpdated, - splitOperations = cs.statistics.splitOperations, - ambiguityCount = cs.statistics.ambiguityCount, - ambiguityReportPath = cs.ambiguityReportPath, - metadata = None - ) - - private def toTreeChange(row: TreeChangeRow): TreeChange = TreeChange( - id = row.id, - changeSetId = row.changeSetId, - changeType = TreeChangeType.fromString(row.changeType), - haplogroupId = row.haplogroupId, - variantId = row.variantId, - oldParentId = row.oldParentId, - newParentId = row.newParentId, - haplogroupData = row.haplogroupData, - oldData = row.oldData, - createdHaplogroupId = row.createdHaplogroupId, - sequenceNum = row.sequenceNum, - status = ChangeStatus.fromString(row.status), - reviewedAt = row.reviewedAt, - reviewedBy = row.reviewedBy, - reviewNotes = row.reviewNotes, - createdAt = row.createdAt, - appliedAt = row.appliedAt, - ambiguityType = row.ambiguityType, - ambiguityConfidence = row.ambiguityConfidence - ) - - private def toTreeChangeRow(tc: TreeChange): TreeChangeRow = TreeChangeRow( - id = tc.id, - changeSetId = tc.changeSetId, - changeType = TreeChangeType.toDbString(tc.changeType), - haplogroupId = tc.haplogroupId, - variantId = tc.variantId, - oldParentId = tc.oldParentId, - newParentId = tc.newParentId, - haplogroupData = tc.haplogroupData, - oldData = tc.oldData, - createdHaplogroupId = tc.createdHaplogroupId, - sequenceNum = tc.sequenceNum, - status = ChangeStatus.toDbString(tc.status), - reviewedAt = tc.reviewedAt, - reviewedBy = tc.reviewedBy, - reviewNotes = tc.reviewNotes, - createdAt = tc.createdAt, - appliedAt = tc.appliedAt, - ambiguityType = tc.ambiguityType, - ambiguityConfidence = tc.ambiguityConfidence - ) - - private def toComment(row: ChangeSetCommentRow): ChangeSetComment = ChangeSetComment( - id = row.id, - changeSetId = row.changeSetId, - treeChangeId = row.treeChangeId, - author = row.author, - content = row.content, - createdAt = row.createdAt, - updatedAt = row.updatedAt - ) - - // ============================================================================ - // Change Set Implementations - // ============================================================================ - - override def createChangeSet(changeSet: ChangeSet): Future[Int] = { - import java.sql.Timestamp - - val sql = """ - INSERT INTO tree.change_set ( - haplogroup_type, name, description, source_name, created_at, created_by, - finalized_at, applied_at, applied_by, discarded_at, discarded_by, discard_reason, - status, nodes_processed, nodes_created, nodes_updated, nodes_unchanged, - variants_added, relationships_created, relationships_updated, split_operations, - ambiguity_count, ambiguity_report_path, metadata - ) VALUES ( - ?, ?, ?, ?, ?, ?, - ?, ?, ?, ?, ?, ?, - CAST(? AS tree.change_set_status), ?, ?, ?, ?, - ?, ?, ?, ?, - ?, ?, NULL - ) RETURNING id - """ - executeInsertReturningId(sql) { ps => - ps.setString(1, changeSet.haplogroupType.toString) - ps.setString(2, changeSet.name) - ps.setString(3, changeSet.description.orNull) - ps.setString(4, changeSet.sourceName) - ps.setTimestamp(5, Timestamp.valueOf(changeSet.createdAt)) - ps.setString(6, changeSet.createdBy) - ps.setTimestamp(7, changeSet.finalizedAt.map(Timestamp.valueOf).orNull) - ps.setTimestamp(8, changeSet.appliedAt.map(Timestamp.valueOf).orNull) - ps.setString(9, changeSet.appliedBy.orNull) - ps.setTimestamp(10, changeSet.discardedAt.map(Timestamp.valueOf).orNull) - ps.setString(11, changeSet.discardedBy.orNull) - ps.setString(12, changeSet.discardReason.orNull) - ps.setString(13, ChangeSetStatus.toDbString(changeSet.status)) - ps.setInt(14, changeSet.statistics.nodesProcessed) - ps.setInt(15, changeSet.statistics.nodesCreated) - ps.setInt(16, changeSet.statistics.nodesUpdated) - ps.setInt(17, changeSet.statistics.nodesUnchanged) - ps.setInt(18, changeSet.statistics.variantsAdded) - ps.setInt(19, changeSet.statistics.relationshipsCreated) - ps.setInt(20, changeSet.statistics.relationshipsUpdated) - ps.setInt(21, changeSet.statistics.splitOperations) - ps.setInt(22, changeSet.statistics.ambiguityCount) - ps.setString(23, changeSet.ambiguityReportPath.orNull) - } - } - - override def getChangeSet(id: Int): Future[Option[ChangeSet]] = { - val query = changeSets.filter(_.id === id).result.headOption - runQuery(query).map(_.map(toChangeSet)) - } - - override def getChangeSetByName(name: String, haplogroupType: HaplogroupType): Future[Option[ChangeSet]] = { - val query = changeSets - .filter(cs => cs.name === name && cs.haplogroupType === haplogroupType) - .result.headOption - runQuery(query).map(_.map(toChangeSet)) - } - - override def getActiveChangeSet(haplogroupType: HaplogroupType): Future[Option[ChangeSet]] = { - val activeStatuses = Seq("DRAFT", "READY_FOR_REVIEW", "UNDER_REVIEW") - val query = changeSets - .filter(cs => cs.haplogroupType === haplogroupType && cs.status.inSet(activeStatuses)) - .sortBy(_.createdAt.desc) - .result.headOption - runQuery(query).map(_.map(toChangeSet)) - } - - override def listChangeSets( - haplogroupType: Option[HaplogroupType], - status: Option[ChangeSetStatus], - limit: Int, - offset: Int - ): Future[Seq[ChangeSet]] = { - val query = changeSets - .filterOpt(haplogroupType)((cs, ht) => cs.haplogroupType === ht) - .filterOpt(status.map(ChangeSetStatus.toDbString))((cs, s) => cs.status === s) - .sortBy(_.createdAt.desc) - .drop(offset) - .take(limit) - .result - runQuery(query).map(_.map(toChangeSet)) - } - - override def countChangeSets( - haplogroupType: Option[HaplogroupType], - status: Option[ChangeSetStatus] - ): Future[Int] = { - val query = changeSets - .filterOpt(haplogroupType)((cs, ht) => cs.haplogroupType === ht) - .filterOpt(status.map(ChangeSetStatus.toDbString))((cs, s) => cs.status === s) - .length - .result - runQuery(query) - } - - override def updateChangeSet(changeSet: ChangeSet): Future[Boolean] = { - changeSet.id match { - case Some(id) => - val row = toChangeSetRow(changeSet) - val query = changeSets.filter(_.id === id).update(row) - runQuery(query).map(_ > 0) - case None => - Future.successful(false) - } - } - - override def updateChangeSetStatus(id: Int, status: ChangeSetStatus): Future[Boolean] = { - executeUpdate("UPDATE tree.change_set SET status = CAST(? AS tree.change_set_status) WHERE id = ?") { ps => - ps.setString(1, ChangeSetStatus.toDbString(status)) - ps.setInt(2, id) - } - } - - override def finalizeChangeSet( - id: Int, - statistics: ChangeSetStatistics, - ambiguityReportPath: Option[String] - ): Future[Boolean] = { - import java.sql.Timestamp - val now = LocalDateTime.now() - val sql = """ - UPDATE tree.change_set - SET status = CAST('READY_FOR_REVIEW' AS tree.change_set_status), - finalized_at = ?, - nodes_processed = ?, - nodes_created = ?, - nodes_updated = ?, - nodes_unchanged = ?, - variants_added = ?, - relationships_created = ?, - relationships_updated = ?, - split_operations = ?, - ambiguity_count = ?, - ambiguity_report_path = ? - WHERE id = ? - """ - executeUpdate(sql) { ps => - ps.setTimestamp(1, Timestamp.valueOf(now)) - ps.setInt(2, statistics.nodesProcessed) - ps.setInt(3, statistics.nodesCreated) - ps.setInt(4, statistics.nodesUpdated) - ps.setInt(5, statistics.nodesUnchanged) - ps.setInt(6, statistics.variantsAdded) - ps.setInt(7, statistics.relationshipsCreated) - ps.setInt(8, statistics.relationshipsUpdated) - ps.setInt(9, statistics.splitOperations) - ps.setInt(10, statistics.ambiguityCount) - ps.setString(11, ambiguityReportPath.orNull) - ps.setInt(12, id) - } - } - - override def applyChangeSet(id: Int, appliedBy: String): Future[Boolean] = { - import java.sql.Timestamp - val now = LocalDateTime.now() - val sql = """ - UPDATE tree.change_set - SET status = CAST('APPLIED' AS tree.change_set_status), - applied_at = ?, - applied_by = ? - WHERE id = ? - """ - executeUpdate(sql) { ps => - ps.setTimestamp(1, Timestamp.valueOf(now)) - ps.setString(2, appliedBy) - ps.setInt(3, id) - } - } - - override def discardChangeSet(id: Int, discardedBy: String, reason: String): Future[Boolean] = { - import java.sql.Timestamp - val now = LocalDateTime.now() - val sql = """ - UPDATE tree.change_set - SET status = CAST('DISCARDED' AS tree.change_set_status), - discarded_at = ?, - discarded_by = ?, - discard_reason = ? - WHERE id = ? - """ - executeUpdate(sql) { ps => - ps.setTimestamp(1, Timestamp.valueOf(now)) - ps.setString(2, discardedBy) - ps.setString(3, reason) - ps.setInt(4, id) - } - } - - // ============================================================================ - // Tree Change Implementations - // ============================================================================ - - override def createTreeChange(change: TreeChange): Future[Int] = { - import java.sql.{Timestamp, Types} - - val sql = """ - INSERT INTO tree.tree_change ( - change_set_id, change_type, haplogroup_id, variant_id, old_parent_id, new_parent_id, - haplogroup_data, old_data, created_haplogroup_id, sequence_num, status, - reviewed_at, reviewed_by, review_notes, created_at, applied_at, - ambiguity_type, ambiguity_confidence - ) VALUES ( - ?, CAST(? AS tree.tree_change_type), ?, ?, ?, ?, - CAST(? AS jsonb), CAST(? AS jsonb), ?, ?, CAST(? AS tree.change_status), - ?, ?, ?, ?, ?, - ?, ? - ) RETURNING id - """ - executeInsertReturningId(sql) { ps => - ps.setInt(1, change.changeSetId) - ps.setString(2, TreeChangeType.toDbString(change.changeType)) - change.haplogroupId.fold(ps.setNull(3, Types.INTEGER))(v => ps.setInt(3, v)) - change.variantId.fold(ps.setNull(4, Types.INTEGER))(v => ps.setInt(4, v)) - change.oldParentId.fold(ps.setNull(5, Types.INTEGER))(v => ps.setInt(5, v)) - change.newParentId.fold(ps.setNull(6, Types.INTEGER))(v => ps.setInt(6, v)) - ps.setString(7, change.haplogroupData.orNull) - ps.setString(8, change.oldData.orNull) - change.createdHaplogroupId.fold(ps.setNull(9, Types.INTEGER))(v => ps.setInt(9, v)) - ps.setInt(10, change.sequenceNum) - ps.setString(11, ChangeStatus.toDbString(change.status)) - change.reviewedAt.fold(ps.setNull(12, Types.TIMESTAMP))(v => ps.setTimestamp(12, Timestamp.valueOf(v))) - ps.setString(13, change.reviewedBy.orNull) - ps.setString(14, change.reviewNotes.orNull) - ps.setTimestamp(15, Timestamp.valueOf(change.createdAt)) - change.appliedAt.fold(ps.setNull(16, Types.TIMESTAMP))(v => ps.setTimestamp(16, Timestamp.valueOf(v))) - ps.setString(17, change.ambiguityType.orNull) - change.ambiguityConfidence.fold(ps.setNull(18, Types.DOUBLE))(v => ps.setDouble(18, v)) - } - } - - override def createTreeChanges(changes: Seq[TreeChange]): Future[Seq[Int]] = { - // For bulk inserts, insert one at a time using raw SQL - // This is less efficient but ensures proper enum casting - Future.sequence(changes.map(createTreeChange)) - } - - override def getTreeChange(id: Int): Future[Option[TreeChange]] = { - val query = treeChanges.filter(_.id === id).result.headOption - runQuery(query).map(_.map(toTreeChange)) - } - - override def listTreeChanges( - changeSetId: Int, - changeType: Option[TreeChangeType], - status: Option[ChangeStatus], - limit: Int, - offset: Int - ): Future[Seq[TreeChange]] = { - val query = treeChanges - .filter(_.changeSetId === changeSetId) - .filterOpt(changeType.map(TreeChangeType.toDbString))((tc, ct) => tc.changeType === ct) - .filterOpt(status.map(ChangeStatus.toDbString))((tc, s) => tc.status === s) - .sortBy(_.sequenceNum) - .drop(offset) - .take(limit) - .result - runQuery(query).map(_.map(toTreeChange)) - } - - override def countTreeChanges( - changeSetId: Int, - changeType: Option[TreeChangeType], - status: Option[ChangeStatus] - ): Future[Int] = { - val query = treeChanges - .filter(_.changeSetId === changeSetId) - .filterOpt(changeType.map(TreeChangeType.toDbString))((tc, ct) => tc.changeType === ct) - .filterOpt(status.map(ChangeStatus.toDbString))((tc, s) => tc.status === s) - .length - .result - runQuery(query) - } - - override def getNextSequenceNum(changeSetId: Int): Future[Int] = { - val query = treeChanges - .filter(_.changeSetId === changeSetId) - .map(_.sequenceNum) - .max - .result - runQuery(query).map(_.getOrElse(0) + 1) - } - - override def updateTreeChange(change: TreeChange): Future[Boolean] = { - change.id match { - case Some(id) => - val row = toTreeChangeRow(change) - val query = treeChanges.filter(_.id === id).update(row) - runQuery(query).map(_ > 0) - case None => - Future.successful(false) - } - } - - override def updateTreeChangeStatus(id: Int, status: ChangeStatus): Future[Boolean] = { - executeUpdate("UPDATE tree.tree_change SET status = CAST(? AS tree.change_status) WHERE id = ?") { ps => - ps.setString(1, ChangeStatus.toDbString(status)) - ps.setInt(2, id) - } - } - - override def reviewTreeChange( - id: Int, - reviewedBy: String, - notes: Option[String], - newStatus: ChangeStatus - ): Future[Boolean] = { - import java.sql.Timestamp - val now = LocalDateTime.now() - val sql = """ - UPDATE tree.tree_change - SET status = CAST(? AS tree.change_status), - reviewed_at = ?, - reviewed_by = ?, - review_notes = ? - WHERE id = ? - """ - executeUpdate(sql) { ps => - ps.setString(1, ChangeStatus.toDbString(newStatus)) - ps.setTimestamp(2, Timestamp.valueOf(now)) - ps.setString(3, reviewedBy) - ps.setString(4, notes.orNull) - ps.setInt(5, id) - } - } - - override def applyAllPendingChanges(changeSetId: Int): Future[Int] = { - import java.sql.Timestamp - val now = LocalDateTime.now() - val sql = """ - UPDATE tree.tree_change - SET status = CAST('APPLIED' AS tree.change_status), - applied_at = ? - WHERE change_set_id = ? AND status = 'PENDING' - """ - executeUpdateCount(sql) { ps => - ps.setTimestamp(1, Timestamp.valueOf(now)) - ps.setInt(2, changeSetId) - } - } - - override def getPendingReviewChanges(changeSetId: Int, limit: Int): Future[Seq[TreeChange]] = { - val query = treeChanges - .filter(tc => tc.changeSetId === changeSetId && tc.status === "PENDING") - .sortBy(tc => (tc.ambiguityConfidence.asc.nullsLast, tc.sequenceNum)) - .take(limit) - .result - runQuery(query).map(_.map(toTreeChange)) - } - - override def getChangesForChangeSet(changeSetId: Int): Future[Seq[TreeChange]] = { - val query = treeChanges - .filter(_.changeSetId === changeSetId) - .sortBy(_.sequenceNum) - .result - runQuery(query).map(_.map(toTreeChange)) - } - - override def getChangeSummaryByType(changeSetId: Int): Future[Map[TreeChangeType, Int]] = { - val query = treeChanges - .filter(_.changeSetId === changeSetId) - .groupBy(_.changeType) - .map { case (changeType, group) => (changeType, group.length) } - .result - runQuery(query).map(_.map { case (ct, count) => - TreeChangeType.fromString(ct) -> count - }.toMap) - } - - override def getChangeSummaryByStatus(changeSetId: Int): Future[Map[ChangeStatus, Int]] = { - val query = treeChanges - .filter(_.changeSetId === changeSetId) - .groupBy(_.status) - .map { case (status, group) => (status, group.length) } - .result - runQuery(query).map(_.map { case (s, count) => - ChangeStatus.fromString(s) -> count - }.toMap) - } - - // ============================================================================ - // Comment Implementations - // ============================================================================ - - override def addComment(comment: ChangeSetComment): Future[Int] = { - val row = ChangeSetCommentRow( - id = comment.id, - changeSetId = comment.changeSetId, - treeChangeId = comment.treeChangeId, - author = comment.author, - content = comment.content, - createdAt = comment.createdAt, - updatedAt = comment.updatedAt - ) - val query = (changeSetComments returning changeSetComments.map(_.id)) += row - runQuery(query) - } - - override def listComments(changeSetId: Int): Future[Seq[ChangeSetComment]] = { - val query = changeSetComments - .filter(_.changeSetId === changeSetId) - .sortBy(_.createdAt) - .result - runQuery(query).map(_.map(toComment)) - } - - override def listCommentsForChange(treeChangeId: Int): Future[Seq[ChangeSetComment]] = { - val query = changeSetComments - .filter(_.treeChangeId === treeChangeId) - .sortBy(_.createdAt) - .result - runQuery(query).map(_.map(toComment)) - } - - override def getHaplogroupNamesById(ids: Set[Int]): Future[Map[Int, String]] = { - if (ids.isEmpty) { - Future.successful(Map.empty) - } else { - val query = haplogroups - .filter(_.haplogroupId.inSet(ids)) - .map(h => (h.haplogroupId, h.name)) - .result - runQuery(query).map { rows => - rows.map { case (id, name) => id -> name }.toMap - } - } - } -} diff --git a/app/repositories/UserPdsInfoRepository.scala b/app/repositories/UserPdsInfoRepository.scala deleted file mode 100644 index 4c0bfa28..00000000 --- a/app/repositories/UserPdsInfoRepository.scala +++ /dev/null @@ -1,55 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.dal.DatabaseSchema -import models.dal.MyPostgresProfile.api.* -import models.domain.user.UserPdsInfo -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class UserPdsInfoRepository @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[JdbcProfile] { - - private val userPdsInfos = DatabaseSchema.auth.userPdsInfos - - def findByUserId(userId: UUID): Future[Option[UserPdsInfo]] = { - db.run(userPdsInfos.filter(_.userId === userId).result.headOption) - } - - def findByDid(did: String): Future[Option[UserPdsInfo]] = { - db.run(userPdsInfos.filter(_.did === did).result.headOption) - } - - def create(info: UserPdsInfo): Future[UserPdsInfo] = { - val infoWithId = info.copy(id = Some(info.id.getOrElse(UUID.randomUUID()))) - db.run((userPdsInfos returning userPdsInfos) += infoWithId) - } - - def upsertByDid(info: UserPdsInfo): Future[UserPdsInfo] = { - findByDid(info.did).flatMap { - case Some(existing) => - val updated = info.copy( - id = existing.id, - createdAt = existing.createdAt, - updatedAt = LocalDateTime.now() - ) - db.run(userPdsInfos.filter(_.did === info.did).update(updated)).map(_ => updated) - case None => - create(info.copy(updatedAt = LocalDateTime.now())) - } - } - - def update(info: UserPdsInfo): Future[Int] = { - db.run(userPdsInfos.filter(_.id === info.id).update(info.copy(updatedAt = LocalDateTime.now()))) - } - - def delete(id: UUID): Future[Int] = { - db.run(userPdsInfos.filter(_.id === id).delete) - } -} diff --git a/app/repositories/UserRepository.scala b/app/repositories/UserRepository.scala deleted file mode 100644 index 570c24ab..00000000 --- a/app/repositories/UserRepository.scala +++ /dev/null @@ -1,43 +0,0 @@ -package repositories - -import models.dal.DatabaseSchema -import models.dal.MyPostgresProfile.api.* -import models.domain.user.User -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.JdbcProfile - -import java.util.UUID -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class UserRepository @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[JdbcProfile] { - - private val users = DatabaseSchema.domain.users - - def create(user: User): Future[User] = { - db.run((users returning users) += user) - } - - def findById(id: UUID): Future[Option[User]] = { - db.run(users.filter(_.id === id).result.headOption) - } - - def findByDid(did: String): Future[Option[User]] = { - db.run(users.filter(_.did === did).result.headOption) - } - - def findByEmail(email: String): Future[Option[User]] = { - db.run(users.filter(_.email === email).result.headOption) - } - - def update(user: User): Future[Int] = { - db.run(users.filter(_.id === user.id).update(user)) - } - - def delete(id: UUID): Future[Int] = { - db.run(users.filter(_.id === id).delete) - } -} \ No newline at end of file diff --git a/app/repositories/UserRoleRepository.scala b/app/repositories/UserRoleRepository.scala deleted file mode 100644 index 3c8743fa..00000000 --- a/app/repositories/UserRoleRepository.scala +++ /dev/null @@ -1,57 +0,0 @@ -package repositories - -import jakarta.inject.{Inject, Singleton} -import models.auth.UserRole -import models.dal.DatabaseSchema -import play.api.db.slick.DatabaseConfigProvider - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class UserRoleRepository @Inject()( - override protected val dbConfigProvider: DatabaseConfigProvider - )(implicit override protected val ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) { - - import models.dal.MyPostgresProfile.api.* - - private val userRoles = DatabaseSchema.auth.userRoles - private val roles = DatabaseSchema.auth.roles - - def assignRole(userId: UUID, roleId: UUID): Future[Int] = { - db.run(userRoles += UserRole(userId, roleId)) - } - - def getUserRoles(userId: UUID): Future[Seq[String]] = { - val query = for { - ur <- userRoles if ur.userId === userId - r <- roles if r.id === ur.roleId - } yield r.name - - db.run(query.result) - } - - def hasRole(userId: UUID, roleName: String): Future[Boolean] = { - val query = for { - ur <- userRoles if ur.userId === userId - r <- roles if r.id === ur.roleId && r.name === roleName - } yield r - - db.run(query.exists.result) - } - - def hasPermission(userId: UUID, permissionName: String): Future[Boolean] = { - // Check if user has any role that has the given permission - val rolePermissions = DatabaseSchema.auth.rolePermissionsTable - val permissions = DatabaseSchema.auth.permissions - - val query = for { - ur <- userRoles if ur.userId === userId - rp <- rolePermissions if rp.roleId === ur.roleId - p <- permissions if p.id === rp.permissionId && p.name === permissionName - } yield p - - db.run(query.exists.result) - } -} diff --git a/app/repositories/VariantV2Repository.scala b/app/repositories/VariantV2Repository.scala deleted file mode 100644 index 726b5e74..00000000 --- a/app/repositories/VariantV2Repository.scala +++ /dev/null @@ -1,739 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.dal.MyPostgresProfile -import models.dal.MyPostgresProfile.api.* -import models.dal.domain.genomics.* -import models.domain.genomics.{MutationType, NamingStatus, VariantV2} -import org.postgresql.util.PSQLException -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider -import play.api.libs.json.{JsArray, JsObject, Json} -import slick.jdbc.GetResult - -import java.time.Instant -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository interface for consolidated variant_v2 table. - * - * Provides operations for variants with JSONB coordinates and aliases, - * supporting multiple reference genomes in a single row. - */ -trait VariantV2Repository { - - // === Basic Lookups === - - def findById(id: Int): Future[Option[VariantV2]] - def findByCanonicalName(name: String, definingHaplogroupId: Option[Int] = None): Future[Option[VariantV2]] - def findAllByCanonicalName(name: String): Future[Seq[VariantV2]] - - // === JSONB Alias Search === - - def findByAlias(aliasValue: String): Future[Seq[VariantV2]] - def searchByName(query: String): Future[Seq[VariantV2]] - - // === JSONB Coordinate Search === - - def findByCoordinates( - refGenome: String, - contig: String, - position: Int, - ref: String, - alt: String - ): Future[Option[VariantV2]] - - def findByPositionRange( - refGenome: String, - contig: String, - startPosition: Int, - endPosition: Int - ): Future[Seq[VariantV2]] - - // === Upsert Operations === - - def create(variant: VariantV2): Future[Int] - def createBatch(variants: Seq[VariantV2]): Future[Seq[Int]] - - /** - * Perform a batch upsert (INSERT or UPDATE) for a sequence of variants. - * Matches on either canonical name + defining haplogroup (for named variants) - * or hs1 coordinates (for unnamed variants). - */ - def upsertBatch(variants: Seq[VariantV2]): Future[Seq[Int]] - - /** - * Bulk update the `annotations` column by finding overlapping regions and STRs. - * This is a heavy operation intended for background jobs. - */ - def updateRegionAnnotations(): Future[Int] - - // === JSONB Update Operations === - - def addCoordinates(variantId: Int, refGenome: String, coordinates: JsObject): Future[Boolean] - def addAlias(variantId: Int, aliasType: String, aliasValue: String, source: Option[String] = None): Future[Boolean] - - // === Alias Source Management === - - def bulkUpdateAliasSource(aliasPrefix: String, newSource: String, oldSource: Option[String]): Future[Int] - def getAliasSourceStats(): Future[Seq[(String, Int)]] - def countAliasesByPrefixAndSource(aliasPrefix: String, source: Option[String]): Future[Int] - def updateEvidence(variantId: Int, evidence: JsObject): Future[Boolean] - - // === Curator CRUD === - - def update(variant: VariantV2): Future[Boolean] - def updateBatch(variants: Seq[VariantV2]): Future[Int] - def delete(id: Int): Future[Boolean] - def searchPaginated( - query: String, - offset: Int, - limit: Int, - mutationType: Option[String] = None - ): Future[(Seq[VariantV2], Int)] - def count(query: Option[String] = None, mutationType: Option[String] = None): Future[Int] - - // === Bulk Operations === - - def countAll(): Future[Int] - def fetchBatch(offset: Int, limit: Int): Future[Seq[VariantV2]] - def findByIds(ids: Seq[Int]): Future[Seq[VariantV2]] - - /** - * Bulk search for variants by a list of names (canonical names or aliases). - * Returns a map from search name (uppercased) to matching variants. - * Much more efficient than individual searchByName calls for large batches. - */ - def searchByNames(names: Seq[String]): Future[Map[String, Seq[VariantV2]]] - - // === DU Naming Authority === - - def nextDuName(): Future[String] - def currentDuName(): Future[Option[String]] - def isDuName(name: String): Boolean - def createWithDuName(variant: VariantV2): Future[VariantV2] -} - -class VariantV2RepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with VariantV2Repository with Logging { - - import slick.ast.BaseTypedType - import slick.jdbc.JdbcType - - private val variantsV2 = TableQuery[VariantV2Table] - - implicit val mutationTypeMapper: JdbcType[MutationType] with BaseTypedType[MutationType] = - MappedColumnType.base[MutationType, String]( - _.dbValue, - MutationType.fromStringOrDefault(_) - ) - - implicit val namingStatusMapper: JdbcType[NamingStatus] with BaseTypedType[NamingStatus] = - MappedColumnType.base[NamingStatus, String]( - _.dbValue, - NamingStatus.fromStringOrDefault(_) - ) - - // === Basic Lookups === - - override def findById(id: Int): Future[Option[VariantV2]] = { - db.run(variantsV2.filter(_.variantId === id).result.headOption) - } - - override def findByCanonicalName(name: String, definingHaplogroupId: Option[Int] = None): Future[Option[VariantV2]] = { - definingHaplogroupId match { - case Some(hgId) => - db.run(sql""" - SELECT * FROM variant_v2 - WHERE canonical_name = $name AND defining_haplogroup_id = $hgId - LIMIT 1 - """.as[VariantV2](variantV2GetResult).headOption) - case None => - db.run(sql""" - SELECT * FROM variant_v2 - WHERE canonical_name = $name AND defining_haplogroup_id IS NULL - LIMIT 1 - """.as[VariantV2](variantV2GetResult).headOption) - } - } - - override def findAllByCanonicalName(name: String): Future[Seq[VariantV2]] = { - db.run(variantsV2.filter(_.canonicalName === name).result) - } - - // === JSONB Alias Search === - - override def findByAlias(aliasValue: String): Future[Seq[VariantV2]] = { - val query = sql""" - SELECT * FROM variant_v2 - WHERE aliases->'common_names' ? $aliasValue - OR aliases->'rs_ids' ? $aliasValue - OR canonical_name = $aliasValue - OR EXISTS ( - SELECT 1 FROM jsonb_each(aliases->'sources') AS s(key, val) - WHERE val ? $aliasValue - ) - """.as[VariantV2](variantV2GetResult) - db.run(query) - } - - override def searchByName(query: String): Future[Seq[VariantV2]] = { - val upperQuery = query.toUpperCase - val searchPattern = s"%$upperQuery%" - val searchQuery = sql""" - SELECT * FROM variant_v2 - WHERE UPPER(canonical_name) LIKE $searchPattern - OR EXISTS ( - SELECT 1 FROM jsonb_array_elements_text(aliases->'common_names') AS name - WHERE UPPER(name) LIKE $searchPattern - ) - OR EXISTS ( - SELECT 1 FROM jsonb_array_elements_text(aliases->'rs_ids') AS rsid - WHERE UPPER(rsid) LIKE $searchPattern - ) - ORDER BY canonical_name - LIMIT 100 - """.as[VariantV2](variantV2GetResult) - db.run(searchQuery) - } - - // === JSONB Coordinate Search === - - override def findByCoordinates( - refGenome: String, - contig: String, - position: Int, - ref: String, - alt: String - ): Future[Option[VariantV2]] = { - val query = sql""" - SELECT * FROM variant_v2 - WHERE coordinates->$refGenome->>'contig' = $contig - AND (coordinates->$refGenome->>'position')::int = $position - AND coordinates->$refGenome->>'ref' = $ref - AND coordinates->$refGenome->>'alt' = $alt - LIMIT 1 - """.as[VariantV2](variantV2GetResult).headOption - db.run(query) - } - - override def findByPositionRange( - refGenome: String, - contig: String, - startPosition: Int, - endPosition: Int - ): Future[Seq[VariantV2]] = { - val query = sql""" - SELECT * FROM variant_v2 - WHERE coordinates->$refGenome->>'contig' = $contig - AND (coordinates->$refGenome->>'position')::int >= $startPosition - AND (coordinates->$refGenome->>'position')::int <= $endPosition - ORDER BY (coordinates->$refGenome->>'position')::int - """.as[VariantV2](variantV2GetResult) - db.run(query) - } - - // === Upsert Operations === - - override def create(variant: VariantV2): Future[Int] = { - val insertion = (variantsV2 returning variantsV2.map(_.variantId)) += variant - db.run(insertion) - } - - override def createBatch(variants: Seq[VariantV2]): Future[Seq[Int]] = { - if (variants.isEmpty) { - Future.successful(Seq.empty) - } else { - val insertion = (variantsV2 returning variantsV2.map(_.variantId)) ++= variants - db.run(insertion.transactionally) - } - } - - override def upsertBatch(variants: Seq[VariantV2]): Future[Seq[Int]] = { - if (variants.isEmpty) return Future.successful(Seq.empty) - - val (namedVariantsRaw, unnamedVariantsRaw) = variants.partition(_.canonicalName.isDefined) - - // Deduplicate named variants by conflict key - val namedVariants = namedVariantsRaw - .groupBy(v => (v.canonicalName, v.definingHaplogroupId)) - .values.map(_.head).toSeq - - // Deduplicate unnamed variants by conflict key - val unnamedVariants = unnamedVariantsRaw - .groupBy(v => v.getCoordinates("hs1").toString) - .values.map(_.head).toSeq - - // Sanitize strings for SQL: escape quotes, backslashes, and strip null bytes - def escapeSql(s: String): String = - s.replace("\u0000", "") - .replace("\\", "\\\\") - .replace("'", "''") - def toJsonb(jsValue: play.api.libs.json.JsValue): String = escapeSql(Json.stringify(jsValue)) - def optString(s: Option[String]): String = s.map(v => s"'${escapeSql(v)}'").getOrElse("NULL") - def optInt(i: Option[Int]): String = i.map(_.toString).getOrElse("NULL") - // Validate variant names: only allow alphanumeric, dots, dashes, underscores, slashes, parens - val validNamePattern = "^[A-Za-z0-9._\\-/()~*+# ]+$".r - def validateName(name: String): String = { - require(name.length <= 255, s"Variant name too long: ${name.length} chars") - require(validNamePattern.matches(name), s"Variant name contains invalid characters: $name") - name - } - - // === Named Variants Upsert === - val namedUpsertAction = if (namedVariants.nonEmpty) { - val namedValues = namedVariants.map { v => - val canonicalName = escapeSql(validateName(v.canonicalName.getOrElse(throw new IllegalArgumentException("Named variant must have a canonical name")))) - val definingHaplogroupId = optInt(v.definingHaplogroupId) - val mutationType = escapeSql(v.mutationType.dbValue) - val namingStatus = escapeSql(v.namingStatus.dbValue) - val aliases = toJsonb(v.aliases) - val coordinates = toJsonb(v.coordinates) - val evidence = toJsonb(v.evidence) - val primers = toJsonb(v.primers) - val notes = optString(v.notes) - val annotations = toJsonb(v.annotations) - val createdAt = v.createdAt.getEpochSecond - val updatedAt = v.updatedAt.getEpochSecond - - s"(NEXTVAL('variant_v2_variant_id_seq'), '$canonicalName', '$mutationType', '$namingStatus', '$aliases', '$coordinates', $definingHaplogroupId, '$evidence', '$primers', $notes, '$annotations', TO_TIMESTAMP($createdAt), TO_TIMESTAMP($updatedAt))" - }.mkString(",") - - sql""" - INSERT INTO variant_v2 (variant_id, canonical_name, mutation_type, naming_status, aliases, coordinates, defining_haplogroup_id, evidence, primers, notes, annotations, created_at, updated_at) - VALUES #$namedValues - ON CONFLICT (canonical_name, COALESCE(defining_haplogroup_id, -1)) WHERE canonical_name IS NOT NULL DO UPDATE SET - mutation_type = EXCLUDED.mutation_type, - aliases = variant_v2.aliases || EXCLUDED.aliases, - coordinates = variant_v2.coordinates || EXCLUDED.coordinates, - evidence = variant_v2.evidence || EXCLUDED.evidence, - primers = variant_v2.primers || EXCLUDED.primers, - annotations = variant_v2.annotations || EXCLUDED.annotations, - notes = COALESCE(variant_v2.notes, EXCLUDED.notes), - naming_status = CASE - WHEN variant_v2.naming_status = 'UNNAMED' AND EXCLUDED.naming_status = 'NAMED' THEN 'NAMED' - ELSE variant_v2.naming_status - END, - updated_at = NOW() - RETURNING variant_id - """.as[Int] - } else DBIO.successful(Seq.empty[Int]) - - // === Unnamed Variants Upsert === - val unnamedUpsertAction = if (unnamedVariants.nonEmpty) { - val unnamedValues = unnamedVariants.map { v => - val hs1CoordsOpt = v.getCoordinates("hs1") - val (contig, position, ref, alt) = hs1CoordsOpt match { - case Some(c) => - ((c \ "contig").asOpt[String].getOrElse(""), (c \ "position").asOpt[Int].getOrElse(0).toString, (c \ "ref").asOpt[String].getOrElse(""), (c \ "alt").asOpt[String].getOrElse("")) - case None => throw new IllegalArgumentException("Unnamed variant without hs1 coordinates cannot be upserted.") - } - - val mutationType = escapeSql(v.mutationType.dbValue) - val namingStatus = escapeSql(v.namingStatus.dbValue) - val aliases = toJsonb(v.aliases) - val coordinates = toJsonb(v.coordinates) - val evidence = toJsonb(v.evidence) - val primers = toJsonb(v.primers) - val notes = optString(v.notes) - val annotations = toJsonb(v.annotations) - val createdAt = v.createdAt.getEpochSecond - val updatedAt = v.updatedAt.getEpochSecond - - s"(NEXTVAL('variant_v2_variant_id_seq'), NULL, '$mutationType', '$namingStatus', '$aliases', '$coordinates', NULL, '$evidence', '$primers', $notes, '$annotations', TO_TIMESTAMP($createdAt), TO_TIMESTAMP($updatedAt))" - }.mkString(",") - - sql""" - INSERT INTO variant_v2 (variant_id, canonical_name, mutation_type, naming_status, aliases, coordinates, defining_haplogroup_id, evidence, primers, notes, annotations, created_at, updated_at) - VALUES #$unnamedValues - ON CONFLICT ( - (coordinates->'hs1'->>'contig'), - ((coordinates->'hs1'->>'position')::int), - (coordinates->'hs1'->>'ref'), - (coordinates->'hs1'->>'alt') - ) WHERE canonical_name IS NULL DO UPDATE SET - mutation_type = EXCLUDED.mutation_type, - aliases = variant_v2.aliases || EXCLUDED.aliases, - coordinates = variant_v2.coordinates || EXCLUDED.coordinates, - evidence = variant_v2.evidence || EXCLUDED.evidence, - primers = variant_v2.primers || EXCLUDED.primers, - annotations = variant_v2.annotations || EXCLUDED.annotations, - notes = COALESCE(variant_v2.notes, EXCLUDED.notes), - naming_status = EXCLUDED.naming_status, - updated_at = NOW() - RETURNING variant_id - """.as[Int] - } else DBIO.successful(Seq.empty[Int]) - - db.run( - DBIO.sequence(Seq(namedUpsertAction, unnamedUpsertAction)).map(_.flatten).transactionally - ) - } - - override def updateRegionAnnotations(): Future[Int] = { - // Updates annotations with region and STR overlaps - val query = sqlu""" - WITH region_overlaps AS ( - SELECT v.variant_id, jsonb_agg( - jsonb_build_object('type', r.region_type, 'name', r.name) - ) as region_list - FROM variant_v2 v - JOIN genome_region_v2 r ON ( - v.coordinates->'GRCh38'->>'contig' = r.coordinates->'GRCh38'->>'contig' AND - (v.coordinates->'GRCh38'->>'position')::int >= (r.coordinates->'GRCh38'->>'start')::int AND - (v.coordinates->'GRCh38'->>'position')::int <= (r.coordinates->'GRCh38'->>'end')::int - ) - GROUP BY v.variant_id - ), - str_overlaps AS ( - SELECT v.variant_id, jsonb_agg( - jsonb_build_object( - 'name', s.canonical_name, - 'motif', s.coordinates->'GRCh38'->>'repeatMotif', - 'period', (s.coordinates->'GRCh38'->>'period')::int - ) - ) as str_list - FROM variant_v2 v - JOIN variant_v2 s ON ( - s.mutation_type = 'STR' AND - v.mutation_type != 'STR' AND - v.coordinates->'GRCh38'->>'contig' = s.coordinates->'GRCh38'->>'contig' AND - (v.coordinates->'GRCh38'->>'position')::int >= (s.coordinates->'GRCh38'->>'start')::int AND - (v.coordinates->'GRCh38'->>'position')::int <= (s.coordinates->'GRCh38'->>'end')::int - ) - GROUP BY v.variant_id - ) - UPDATE variant_v2 v - SET annotations = - jsonb_build_object( - 'regions', COALESCE(ro.region_list, '[]'::jsonb), - 'strs', COALESCE(so.str_list, '[]'::jsonb) - ), - updated_at = NOW() - FROM region_overlaps ro - LEFT JOIN str_overlaps so ON ro.variant_id = so.variant_id - WHERE v.variant_id = ro.variant_id - AND (ro.region_list IS NOT NULL OR so.str_list IS NOT NULL) - """ - db.run(query) - } - - // === JSONB Update Operations === - - override def addCoordinates(variantId: Int, refGenome: String, coordinates: JsObject): Future[Boolean] = { - val coordsJson = Json.stringify(coordinates) - val query = sql""" - UPDATE variant_v2 - SET coordinates = coordinates || jsonb_build_object($refGenome, $coordsJson::jsonb), - updated_at = NOW() - WHERE variant_id = $variantId - """.asUpdate - db.run(query).map(_ > 0) - } - - override def addAlias(variantId: Int, aliasType: String, aliasValue: String, source: Option[String] = None): Future[Boolean] = { - val updateQuery = aliasType match { - case "common_name" => - sql""" - UPDATE variant_v2 - SET aliases = jsonb_set( - aliases, - '{common_names}', - COALESCE(aliases->'common_names', '[]'::jsonb) || to_jsonb($aliasValue::text), - true - ), - updated_at = NOW() - WHERE variant_id = $variantId - AND NOT (COALESCE(aliases->'common_names', '[]'::jsonb) ? $aliasValue) - """.asUpdate - case "rs_id" => - sql""" - UPDATE variant_v2 - SET aliases = jsonb_set( - aliases, - '{rs_ids}', - COALESCE(aliases->'rs_ids', '[]'::jsonb) || to_jsonb($aliasValue::text), - true - ), - updated_at = NOW() - WHERE variant_id = $variantId - AND NOT (COALESCE(aliases->'rs_ids', '[]'::jsonb) ? $aliasValue) - """.asUpdate - case srcType => - val effectiveSource = source.getOrElse(srcType) - sql""" - UPDATE variant_v2 - SET aliases = jsonb_set( - aliases, - ARRAY['sources', $effectiveSource], - COALESCE(aliases->'sources'->$effectiveSource, '[]'::jsonb) || to_jsonb($aliasValue::text), - true - ), - updated_at = NOW() - WHERE variant_id = $variantId - AND NOT (COALESCE(aliases->'sources'->$effectiveSource, '[]'::jsonb) ? $aliasValue) - """.asUpdate - } - db.run(updateQuery).map(_ > 0) - } - - // === Alias Source Management === - - override def bulkUpdateAliasSource(aliasPrefix: String, newSource: String, oldSource: Option[String]): Future[Int] = { - val oldSourceFilter = oldSource.map(s => s"AND aliases->'sources' ? '${s.replace("'", "''")}'").getOrElse("") - val upperPrefix = aliasPrefix.toUpperCase - db.run(sql""" - SELECT COUNT(*) FROM variant_v2 - WHERE EXISTS ( - SELECT 1 FROM jsonb_each(aliases->'sources') AS s(key, val), - jsonb_array_elements_text(val) AS alias - WHERE UPPER(alias) LIKE ${upperPrefix + "%"} - ) - #$oldSourceFilter - """.as[Int].head) - } - - override def getAliasSourceStats(): Future[Seq[(String, Int)]] = { - db.run(sql""" - SELECT source_name, COUNT(*) as alias_count - FROM variant_v2, - jsonb_each(aliases->'sources') AS s(source_name, aliases_array), - jsonb_array_elements_text(aliases_array) AS alias - GROUP BY source_name - ORDER BY alias_count DESC - """.as[(String, Int)]) - } - - override def countAliasesByPrefixAndSource(aliasPrefix: String, source: Option[String]): Future[Int] = { - val upperPrefix = aliasPrefix.toUpperCase - source match { - case Some(src) => - db.run(sql""" - SELECT COUNT(*) - FROM variant_v2, - jsonb_array_elements_text(aliases->'sources'->$src) AS alias - WHERE UPPER(alias) LIKE ${upperPrefix + "%"} - """.as[Int].head) - case None => - db.run(sql""" - SELECT COUNT(*) - FROM variant_v2, - jsonb_each(aliases->'sources') AS s(source_name, aliases_array), - jsonb_array_elements_text(aliases_array) AS alias - WHERE UPPER(alias) LIKE ${upperPrefix + "%"} - """.as[Int].head) - } - } - - override def updateEvidence(variantId: Int, evidence: JsObject): Future[Boolean] = { - val evidenceJson = Json.stringify(evidence) - val query = sql""" - UPDATE variant_v2 - SET evidence = evidence || $evidenceJson::jsonb, - updated_at = NOW() - WHERE variant_id = $variantId - """.asUpdate - db.run(query).map(_ > 0) - } - - // === Curator CRUD === - - override def update(variant: VariantV2): Future[Boolean] = { - variant.variantId match { - case Some(id) => - val now = Instant.now() - db.run( - variantsV2 - .filter(_.variantId === id) - .map(v => ( - v.canonicalName, - v.mutationType, - v.namingStatus, - v.aliases, - v.coordinates, - v.definingHaplogroupId, - v.evidence, - v.primers, - v.notes, - v.annotations, - v.updatedAt - )) - .update(( - variant.canonicalName, - variant.mutationType, - variant.namingStatus, - variant.aliases, - variant.coordinates, - variant.definingHaplogroupId, - variant.evidence, - variant.primers, - variant.notes, - variant.annotations, - now - )) - ).map(_ > 0) - case None => Future.successful(false) - } - } - - override def updateBatch(variants: Seq[VariantV2]): Future[Int] = { - if (variants.isEmpty) return Future.successful(0) - val actions = DBIO.sequence(variants.flatMap { variant => - variant.variantId.map { id => - val now = Instant.now() - variantsV2.filter(_.variantId === id).map(v => (v.canonicalName, v.mutationType, v.namingStatus, v.aliases, v.coordinates, v.definingHaplogroupId, v.evidence, v.primers, v.notes, v.annotations, v.updatedAt)).update((variant.canonicalName, variant.mutationType, variant.namingStatus, variant.aliases, variant.coordinates, variant.definingHaplogroupId, variant.evidence, variant.primers, variant.notes, variant.annotations, now)) - } - }) - db.run(actions.transactionally).map(_.sum) - } - - override def delete(id: Int): Future[Boolean] = { - db.run(variantsV2.filter(_.variantId === id).delete).map(_ > 0) - } - - override def searchPaginated(query: String, offset: Int, limit: Int, mutationType: Option[String] = None): Future[(Seq[VariantV2], Int)] = { - val upperQuery = query.toUpperCase - val searchPattern = s"%$upperQuery%" - val hasQuery = query.trim.nonEmpty - val validatedType = mutationType.flatMap(MutationType.fromString) - val typeFilter = validatedType.map(t => s"AND mutation_type = '${t.dbValue}'").getOrElse("") - - val searchSql = if (hasQuery) { - sql""" - SELECT * FROM variant_v2 - WHERE (UPPER(canonical_name) LIKE $searchPattern OR EXISTS (SELECT 1 FROM jsonb_array_elements_text(aliases->'common_names') AS name WHERE UPPER(name) LIKE $searchPattern) OR EXISTS (SELECT 1 FROM jsonb_array_elements_text(aliases->'rs_ids') AS rsid WHERE UPPER(rsid) LIKE $searchPattern)) - #$typeFilter - ORDER BY canonical_name NULLS LAST OFFSET $offset LIMIT $limit - """.as[VariantV2](variantV2GetResult) - } else { - sql"""SELECT * FROM variant_v2 WHERE 1=1 #$typeFilter ORDER BY canonical_name NULLS LAST OFFSET $offset LIMIT $limit""".as[VariantV2](variantV2GetResult) - } - - val countSql = if (hasQuery) { - sql""" - SELECT COUNT(*) FROM variant_v2 - WHERE (UPPER(canonical_name) LIKE $searchPattern OR EXISTS (SELECT 1 FROM jsonb_array_elements_text(aliases->'common_names') AS name WHERE UPPER(name) LIKE $searchPattern) OR EXISTS (SELECT 1 FROM jsonb_array_elements_text(aliases->'rs_ids') AS rsid WHERE UPPER(rsid) LIKE $searchPattern)) - #$typeFilter - """.as[Int].head - } else { - sql"""SELECT COUNT(*) FROM variant_v2 WHERE 1=1 #$typeFilter""".as[Int].head - } - - for { results <- db.run(searchSql); count <- db.run(countSql) } yield (results, count) - } - - override def count(query: Option[String] = None, mutationType: Option[String] = None): Future[Int] = { - val validatedType = mutationType.flatMap(MutationType.fromString) - val typeFilter = validatedType.map(t => s"AND mutation_type = '${t.dbValue}'").getOrElse("") - query match { - case Some(q) if q.trim.nonEmpty => - val upperQuery = q.toUpperCase - val searchPattern = s"%$upperQuery%" - db.run(sql""" - SELECT COUNT(*) FROM variant_v2 - WHERE (UPPER(canonical_name) LIKE $searchPattern OR EXISTS (SELECT 1 FROM jsonb_array_elements_text(aliases->'common_names') AS name WHERE UPPER(name) LIKE $searchPattern)) - #$typeFilter - """.as[Int].head) - case _ => - db.run(sql"""SELECT COUNT(*) FROM variant_v2 WHERE 1=1 #$typeFilter""".as[Int].head) - } - } - - // === Bulk Operations === - - override def countAll(): Future[Int] = db.run(variantsV2.length.result) - - override def fetchBatch(offset: Int, limit: Int): Future[Seq[VariantV2]] = - db.run(variantsV2.sortBy(_.variantId).drop(offset).take(limit).result) - override def findByIds(ids: Seq[Int]): Future[Seq[VariantV2]] = if (ids.isEmpty) Future.successful(Seq.empty) else db.run(variantsV2.filter(_.variantId.inSet(ids)).result) - - override def searchByNames(names: Seq[String]): Future[Map[String, Seq[VariantV2]]] = { - if (names.isEmpty) return Future.successful(Map.empty) - - // Variant names are typically uppercase (M269, L21, etc.) - val searchNames = names.flatMap(n => Seq(n, n.toUpperCase)).distinct - val batchSize = 2000 // PostgreSQL handles IN/ANY efficiently up to several thousand - - val batches = searchNames.grouped(batchSize).toSeq - - // Process batches sequentially - batches.foldLeft(Future.successful(Map.empty[String, Seq[VariantV2]])) { (accFuture, batch) => - accFuture.flatMap { acc => - val namesArray = batch.map(n => s"'${n.replace("'", "''")}'").mkString(",") - val query = sql""" - SELECT variant_id, canonical_name, mutation_type, naming_status, - aliases, coordinates, defining_haplogroup_id, evidence, - primers, notes, created_at, updated_at, annotations - FROM variant_v2 - WHERE canonical_name = ANY(ARRAY[#$namesArray]) - """.as[VariantV2](variantV2GetResult) - - db.run(query).map { variants => - val batchResults = variants.flatMap { v => - v.canonicalName.map(cn => cn.toUpperCase -> v) - }.groupMap(_._1)(_._2) - - // Merge into accumulator - batchResults.foldLeft(acc) { case (map, (name, vars)) => - map.updatedWith(name) { - case Some(existing) => Some(existing ++ vars) - case None => Some(vars) - } - } - } - } - } - } - - // === DU Naming Authority === - - private val DuNamePattern = "^DU[1-9][0-9]*$".r - override def nextDuName(): Future[String] = db.run(sql"SELECT next_du_name()".as[String].head) - override def currentDuName(): Future[Option[String]] = db.run(sql"SELECT current_du_name()".as[String].headOption).recover { case _: PSQLException => None } - override def isDuName(name: String): Boolean = DuNamePattern.matches(name) - override def createWithDuName(variant: VariantV2): Future[VariantV2] = { - val action = for { - duName <- sql"SELECT next_du_name()".as[String].head - now = Instant.now() - id <- (variantsV2 returning variantsV2.map(_.variantId)) += variant.copy(canonicalName = Some(duName), namingStatus = NamingStatus.Named, createdAt = now, updatedAt = now) - } yield variant.copy(variantId = Some(id), canonicalName = Some(duName), namingStatus = NamingStatus.Named) - db.run(action.transactionally) - } - - // === GetResult for raw SQL queries === - - private val variantV2GetResult: GetResult[VariantV2] = GetResult { r => - val variantId = r.nextIntOption() // 1 - val canonicalName = r.nextStringOption() // 2 - val mutationTypeStr = r.nextString() // 3 - val namingStatusStr = r.nextString() // 4 - val aliasesStr = r.nextString() // 5 - val coordinatesStr = r.nextString() // 6 - val definingHaplogroupId = r.nextIntOption() // 7 - val evidenceStr = r.nextString() // 8 - val primersStr = r.nextString() // 9 - val notes = r.nextStringOption() // 10 - val createdAt = r.nextTimestamp().toInstant // 11 - val updatedAt = r.nextTimestamp().toInstant // 12 - val annotationsStr = r.nextString() // 13 - - VariantV2( - variantId = variantId, - canonicalName = canonicalName, - mutationType = MutationType.fromStringOrDefault(mutationTypeStr), - namingStatus = NamingStatus.fromStringOrDefault(namingStatusStr), - aliases = Json.parse(aliasesStr), - coordinates = Json.parse(coordinatesStr), - definingHaplogroupId = definingHaplogroupId, - evidence = Json.parse(evidenceStr), - primers = Json.parse(primersStr), - notes = notes, - annotations = Json.parse(annotationsStr), - createdAt = createdAt, - updatedAt = updatedAt - ) - } -} diff --git a/app/repositories/WipTreeRepository.scala b/app/repositories/WipTreeRepository.scala deleted file mode 100644 index 9ff4f856..00000000 --- a/app/repositories/WipTreeRepository.scala +++ /dev/null @@ -1,517 +0,0 @@ -package repositories - -import jakarta.inject.Inject -import models.HaplogroupType -import models.dal.domain.haplogroups.* -import play.api.Logging -import play.api.db.slick.DatabaseConfigProvider - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -/** - * Repository interface for WIP (Work In Progress) shadow tables. - * - * These tables stage merge changes before they are applied to production. - * All operations are scoped by change_set_id for easy cleanup. - */ -trait WipTreeRepository { - - // ============================================================================ - // WIP Haplogroup Operations - // ============================================================================ - - /** - * Create a new WIP haplogroup (staged node not yet in production). - * Returns the generated wip_haplogroup_id. - */ - def createWipHaplogroup(row: WipHaplogroupRow): Future[Int] - - /** - * Bulk insert WIP haplogroups. - */ - def createWipHaplogroups(rows: Seq[WipHaplogroupRow]): Future[Seq[Int]] - - /** - * Get a WIP haplogroup by change set and placeholder ID. - */ - def getWipHaplogroup(changeSetId: Int, placeholderId: Int): Future[Option[WipHaplogroupRow]] - - /** - * Get all WIP haplogroups for a change set. - */ - def getWipHaplogroupsForChangeSet(changeSetId: Int): Future[Seq[WipHaplogroupRow]] - - /** - * Get WIP haplogroups by name within a change set. - */ - def getWipHaplogroupByName(changeSetId: Int, name: String): Future[Option[WipHaplogroupRow]] - - // ============================================================================ - // WIP Relationship Operations - // ============================================================================ - - /** - * Create a new WIP relationship (staged parent-child relationship). - */ - def createWipRelationship(row: WipRelationshipRow): Future[Int] - - /** - * Bulk insert WIP relationships. - */ - def createWipRelationships(rows: Seq[WipRelationshipRow]): Future[Seq[Int]] - - /** - * Get all WIP relationships for a change set. - */ - def getWipRelationshipsForChangeSet(changeSetId: Int): Future[Seq[WipRelationshipRow]] - - /** - * Get relationships where a specific placeholder ID is the child. - */ - def getWipRelationshipsForChild(changeSetId: Int, childPlaceholderId: Int): Future[Seq[WipRelationshipRow]] - - /** - * Get relationships where a specific placeholder ID is the parent. - */ - def getWipRelationshipsForParent(changeSetId: Int, parentPlaceholderId: Int): Future[Seq[WipRelationshipRow]] - - // ============================================================================ - // WIP Variant Operations - // ============================================================================ - - /** - * Create a new WIP variant association. - */ - def createWipHaplogroupVariant(row: WipHaplogroupVariantRow): Future[Int] - - /** - * Bulk insert WIP variant associations. - */ - def createWipHaplogroupVariants(rows: Seq[WipHaplogroupVariantRow]): Future[Seq[Int]] - - /** - * Bulk insert WIP variant associations, ignoring duplicates. - * Filters out any variants that already exist for the same haplogroup/placeholder. - */ - def upsertWipHaplogroupVariants(rows: Seq[WipHaplogroupVariantRow]): Future[Seq[Int]] - - /** - * Get all WIP variant associations for a change set. - */ - def getWipVariantsForChangeSet(changeSetId: Int): Future[Seq[WipHaplogroupVariantRow]] - - /** - * Get variants for a specific placeholder haplogroup. - */ - def getWipVariantsForPlaceholder(changeSetId: Int, placeholderId: Int): Future[Seq[WipHaplogroupVariantRow]] - - /** - * Get variants for a specific production haplogroup. - */ - def getWipVariantsForHaplogroup(changeSetId: Int, haplogroupId: Int): Future[Seq[WipHaplogroupVariantRow]] - - // ============================================================================ - // WIP Reparent Operations - // ============================================================================ - - /** - * Create a new WIP reparent operation. - */ - def createWipReparent(row: WipReparentRow): Future[Int] - - /** - * Create or update a WIP reparent operation. - * If a reparent already exists for this haplogroup in this change set, update it. - * This handles cases where a node is reparented multiple times during a merge - * (e.g., once by SUBTREE_LOOK_AHEAD and again by DEPTH_GRAFT). - */ - def upsertWipReparent(row: WipReparentRow): Future[Int] - - /** - * Bulk insert WIP reparent operations. - */ - def createWipReparents(rows: Seq[WipReparentRow]): Future[Seq[Int]] - - /** - * Get all WIP reparent operations for a change set. - */ - def getWipReparentsForChangeSet(changeSetId: Int): Future[Seq[WipReparentRow]] - - /** - * Get reparent for a specific haplogroup. - */ - def getWipReparent(changeSetId: Int, haplogroupId: Int): Future[Option[WipReparentRow]] - - // ============================================================================ - // WIP Resolution Operations (Curator Conflict Corrections) - // ============================================================================ - - /** - * Create a new resolution for a WIP item. - */ - def createResolution(row: WipResolutionRow): Future[Int] - - /** - * Get all resolutions for a change set. - */ - def getResolutionsForChangeSet(changeSetId: Int): Future[Seq[WipResolutionRow]] - - /** - * Get pending resolutions for a change set. - */ - def getPendingResolutions(changeSetId: Int): Future[Seq[WipResolutionRow]] - - /** - * Get deferred items for a change set. - */ - def getDeferredItems(changeSetId: Int): Future[Seq[WipResolutionRow]] - - /** - * Get resolution for a specific WIP haplogroup. - */ - def getResolutionForWipHaplogroup(changeSetId: Int, wipHaplogroupId: Int): Future[Option[WipResolutionRow]] - - /** - * Get resolution for a specific WIP reparent. - */ - def getResolutionForWipReparent(changeSetId: Int, wipReparentId: Int): Future[Option[WipResolutionRow]] - - /** - * Update resolution status (APPLIED or CANCELLED). - */ - def updateResolutionStatus(resolutionId: Int, status: String, appliedAt: Option[java.time.LocalDateTime] = None): Future[Int] - - /** - * Cancel a resolution. - */ - def cancelResolution(resolutionId: Int): Future[Int] - - // ============================================================================ - // Cleanup Operations - // ============================================================================ - - /** - * Delete all WIP data for a change set. - * Called when discarding a change set. - */ - def deleteWipDataForChangeSet(changeSetId: Int): Future[Int] - - // ============================================================================ - // Statistics - // ============================================================================ - - /** - * Get counts of WIP data for a change set. - */ - def getWipStatistics(changeSetId: Int): Future[WipStatistics] -} - -/** - * Statistics about WIP data for a change set. - */ -case class WipStatistics( - haplogroups: Int, - relationships: Int, - variants: Int, - reparents: Int -) - -class WipTreeRepositoryImpl @Inject()( - dbConfigProvider: DatabaseConfigProvider -)(implicit ec: ExecutionContext) - extends BaseRepository(dbConfigProvider) - with WipTreeRepository - with Logging { - - import models.dal.DatabaseSchema.domain.haplogroups.{wipHaplogroups, wipRelationships, wipHaplogroupVariants, wipReparents} - import models.dal.MyPostgresProfile.api.* - - // ============================================================================ - // WIP Haplogroup Implementations - // ============================================================================ - - override def createWipHaplogroup(row: WipHaplogroupRow): Future[Int] = { - val query = (wipHaplogroups returning wipHaplogroups.map(_.id)) += row - runQuery(query) - } - - override def createWipHaplogroups(rows: Seq[WipHaplogroupRow]): Future[Seq[Int]] = { - val query = (wipHaplogroups returning wipHaplogroups.map(_.id)) ++= rows - runQuery(query) - } - - override def getWipHaplogroup(changeSetId: Int, placeholderId: Int): Future[Option[WipHaplogroupRow]] = { - val query = wipHaplogroups - .filter(h => h.changeSetId === changeSetId && h.placeholderId === placeholderId) - .result.headOption - runQuery(query) - } - - override def getWipHaplogroupsForChangeSet(changeSetId: Int): Future[Seq[WipHaplogroupRow]] = { - val query = wipHaplogroups - .filter(_.changeSetId === changeSetId) - .sortBy(_.placeholderId) - .result - runQuery(query) - } - - override def getWipHaplogroupByName(changeSetId: Int, name: String): Future[Option[WipHaplogroupRow]] = { - val query = wipHaplogroups - .filter(h => h.changeSetId === changeSetId && h.name === name) - .result.headOption - runQuery(query) - } - - // ============================================================================ - // WIP Relationship Implementations - // ============================================================================ - - override def createWipRelationship(row: WipRelationshipRow): Future[Int] = { - val query = (wipRelationships returning wipRelationships.map(_.id)) += row - runQuery(query) - } - - override def createWipRelationships(rows: Seq[WipRelationshipRow]): Future[Seq[Int]] = { - val query = (wipRelationships returning wipRelationships.map(_.id)) ++= rows - runQuery(query) - } - - override def getWipRelationshipsForChangeSet(changeSetId: Int): Future[Seq[WipRelationshipRow]] = { - val query = wipRelationships - .filter(_.changeSetId === changeSetId) - .result - runQuery(query) - } - - override def getWipRelationshipsForChild(changeSetId: Int, childPlaceholderId: Int): Future[Seq[WipRelationshipRow]] = { - val query = wipRelationships - .filter(r => r.changeSetId === changeSetId && r.childPlaceholderId === childPlaceholderId) - .result - runQuery(query) - } - - override def getWipRelationshipsForParent(changeSetId: Int, parentPlaceholderId: Int): Future[Seq[WipRelationshipRow]] = { - val query = wipRelationships - .filter(r => r.changeSetId === changeSetId && r.parentPlaceholderId === parentPlaceholderId) - .result - runQuery(query) - } - - // ============================================================================ - // WIP Variant Implementations - // ============================================================================ - - override def createWipHaplogroupVariant(row: WipHaplogroupVariantRow): Future[Int] = { - val query = (wipHaplogroupVariants returning wipHaplogroupVariants.map(_.id)) += row - runQuery(query) - } - - override def createWipHaplogroupVariants(rows: Seq[WipHaplogroupVariantRow]): Future[Seq[Int]] = { - if (rows.isEmpty) { - Future.successful(Seq.empty) - } else { - val query = (wipHaplogroupVariants returning wipHaplogroupVariants.map(_.id)) ++= rows - runQuery(query) - } - } - - override def upsertWipHaplogroupVariants(rows: Seq[WipHaplogroupVariantRow]): Future[Seq[Int]] = { - if (rows.isEmpty) { - Future.successful(Seq.empty) - } else { - // Get the change set ID (all rows should have the same one) - val changeSetId = rows.head.changeSetId - - // Get existing variants for this change set, then filter out duplicates - getWipVariantsForChangeSet(changeSetId).flatMap { existing => - // Build a set of existing keys: (haplogroupId, placeholderId, variantId) - val existingKeys = existing.map { v => - (v.haplogroupId, v.haplogroupPlaceholderId, v.variantId) - }.toSet - - // Filter out rows that would be duplicates - val newRows = rows.filterNot { row => - existingKeys.contains((row.haplogroupId, row.haplogroupPlaceholderId, row.variantId)) - } - - if (newRows.isEmpty) { - Future.successful(Seq.empty) - } else { - val query = (wipHaplogroupVariants returning wipHaplogroupVariants.map(_.id)) ++= newRows - runQuery(query) - } - } - } - } - - override def getWipVariantsForChangeSet(changeSetId: Int): Future[Seq[WipHaplogroupVariantRow]] = { - val query = wipHaplogroupVariants - .filter(_.changeSetId === changeSetId) - .result - runQuery(query) - } - - override def getWipVariantsForPlaceholder(changeSetId: Int, placeholderId: Int): Future[Seq[WipHaplogroupVariantRow]] = { - val query = wipHaplogroupVariants - .filter(v => v.changeSetId === changeSetId && v.haplogroupPlaceholderId === placeholderId) - .result - runQuery(query) - } - - override def getWipVariantsForHaplogroup(changeSetId: Int, haplogroupId: Int): Future[Seq[WipHaplogroupVariantRow]] = { - val query = wipHaplogroupVariants - .filter(v => v.changeSetId === changeSetId && v.haplogroupId === haplogroupId) - .result - runQuery(query) - } - - // ============================================================================ - // WIP Reparent Implementations - // ============================================================================ - - override def createWipReparent(row: WipReparentRow): Future[Int] = { - val query = (wipReparents returning wipReparents.map(_.id)) += row - runQuery(query) - } - - override def upsertWipReparent(row: WipReparentRow): Future[Int] = { - // Check if a reparent already exists for this haplogroup in this change set - getWipReparent(row.changeSetId, row.haplogroupId).flatMap { - case Some(existing) => - // Update the existing reparent with the new parent - val updateQuery = wipReparents - .filter(r => r.changeSetId === row.changeSetId && r.haplogroupId === row.haplogroupId) - .map(r => (r.newParentId, r.newParentPlaceholderId, r.source, r.createdAt)) - .update((row.newParentId, row.newParentPlaceholderId, row.source, row.createdAt)) - runQuery(updateQuery).map(_ => existing.id.getOrElse(0)) - case None => - // No existing reparent, create a new one - createWipReparent(row) - } - } - - override def createWipReparents(rows: Seq[WipReparentRow]): Future[Seq[Int]] = { - if (rows.isEmpty) { - Future.successful(Seq.empty) - } else { - val query = (wipReparents returning wipReparents.map(_.id)) ++= rows - runQuery(query) - } - } - - override def getWipReparentsForChangeSet(changeSetId: Int): Future[Seq[WipReparentRow]] = { - val query = wipReparents - .filter(_.changeSetId === changeSetId) - .result - runQuery(query) - } - - override def getWipReparent(changeSetId: Int, haplogroupId: Int): Future[Option[WipReparentRow]] = { - val query = wipReparents - .filter(r => r.changeSetId === changeSetId && r.haplogroupId === haplogroupId) - .result.headOption - runQuery(query) - } - - // ============================================================================ - // WIP Resolution Implementations - // ============================================================================ - - import models.dal.DatabaseSchema.domain.haplogroups.wipResolutions - - override def createResolution(row: WipResolutionRow): Future[Int] = { - val query = (wipResolutions returning wipResolutions.map(_.id)) += row - runQuery(query) - } - - override def getResolutionsForChangeSet(changeSetId: Int): Future[Seq[WipResolutionRow]] = { - val query = wipResolutions - .filter(_.changeSetId === changeSetId) - .sortBy(_.createdAt.desc) - .result - runQuery(query) - } - - override def getPendingResolutions(changeSetId: Int): Future[Seq[WipResolutionRow]] = { - val query = wipResolutions - .filter(r => r.changeSetId === changeSetId && r.status === "PENDING") - .sortBy(_.createdAt.desc) - .result - runQuery(query) - } - - override def getDeferredItems(changeSetId: Int): Future[Seq[WipResolutionRow]] = { - val query = wipResolutions - .filter(r => r.changeSetId === changeSetId && r.resolutionType === "DEFER" && r.status === "PENDING") - .sortBy(r => (r.deferPriority.desc, r.createdAt.desc)) - .result - runQuery(query) - } - - override def getResolutionForWipHaplogroup(changeSetId: Int, wipHaplogroupId: Int): Future[Option[WipResolutionRow]] = { - val query = wipResolutions - .filter(r => r.changeSetId === changeSetId && r.wipHaplogroupId === wipHaplogroupId && r.status === "PENDING") - .result.headOption - runQuery(query) - } - - override def getResolutionForWipReparent(changeSetId: Int, wipReparentId: Int): Future[Option[WipResolutionRow]] = { - val query = wipResolutions - .filter(r => r.changeSetId === changeSetId && r.wipReparentId === wipReparentId && r.status === "PENDING") - .result.headOption - runQuery(query) - } - - override def updateResolutionStatus(resolutionId: Int, status: String, appliedAt: Option[LocalDateTime] = None): Future[Int] = { - val query = wipResolutions - .filter(_.id === resolutionId) - .map(r => (r.status, r.appliedAt)) - .update((status, appliedAt)) - runQuery(query) - } - - override def cancelResolution(resolutionId: Int): Future[Int] = { - updateResolutionStatus(resolutionId, "CANCELLED", None) - } - - // ============================================================================ - // Cleanup Implementations - // ============================================================================ - - override def deleteWipDataForChangeSet(changeSetId: Int): Future[Int] = { - // Tables have ON DELETE CASCADE, but we can also explicitly delete - // Delete in order: variants, relationships, reparents, haplogroups - val deleteVariants = wipHaplogroupVariants.filter(_.changeSetId === changeSetId).delete - val deleteRelationships = wipRelationships.filter(_.changeSetId === changeSetId).delete - val deleteReparents = wipReparents.filter(_.changeSetId === changeSetId).delete - val deleteHaplogroups = wipHaplogroups.filter(_.changeSetId === changeSetId).delete - - val action = for { - v <- deleteVariants - rel <- deleteRelationships - rep <- deleteReparents - h <- deleteHaplogroups - } yield v + rel + rep + h - - runQuery(action) - } - - // ============================================================================ - // Statistics Implementations - // ============================================================================ - - override def getWipStatistics(changeSetId: Int): Future[WipStatistics] = { - val countHaplogroups = wipHaplogroups.filter(_.changeSetId === changeSetId).length.result - val countRelationships = wipRelationships.filter(_.changeSetId === changeSetId).length.result - val countVariants = wipHaplogroupVariants.filter(_.changeSetId === changeSetId).length.result - val countReparents = wipReparents.filter(_.changeSetId === changeSetId).length.result - - for { - h <- runQuery(countHaplogroups) - rel <- runQuery(countRelationships) - v <- runQuery(countVariants) - rep <- runQuery(countReparents) - } yield WipStatistics(h, rel, v, rep) - } -} diff --git a/app/repositories/social/ReputationEventRepository.scala b/app/repositories/social/ReputationEventRepository.scala deleted file mode 100644 index dedf112a..00000000 --- a/app/repositories/social/ReputationEventRepository.scala +++ /dev/null @@ -1,28 +0,0 @@ -package repositories.social - -import models.dal.DatabaseSchema -import models.domain.social.ReputationEvent -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.PostgresProfile - -import java.util.UUID -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class ReputationEventRepository @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[PostgresProfile] { - - import profile.api.* - - private val events = DatabaseSchema.domain.social.reputationEvents - - def create(event: ReputationEvent): Future[ReputationEvent] = { - db.run((events returning events) += event) - } - - def findByUserId(userId: UUID): Future[Seq[ReputationEvent]] = { - db.run(events.filter(_.userId === userId).sortBy(_.createdAt.desc).result) - } -} diff --git a/app/repositories/social/ReputationEventTypeRepository.scala b/app/repositories/social/ReputationEventTypeRepository.scala deleted file mode 100644 index 1a763495..00000000 --- a/app/repositories/social/ReputationEventTypeRepository.scala +++ /dev/null @@ -1,32 +0,0 @@ -package repositories.social - -import models.dal.DatabaseSchema -import models.domain.social.ReputationEventType -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.PostgresProfile - -import java.util.UUID -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class ReputationEventTypeRepository @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[PostgresProfile] { - - import profile.api.* - - private val eventTypes = DatabaseSchema.domain.social.reputationEventTypes - - def findByName(name: String): Future[Option[ReputationEventType]] = { - db.run(eventTypes.filter(_.name === name).result.headOption) - } - - def getById(id: UUID): Future[Option[ReputationEventType]] = { - db.run(eventTypes.filter(_.id === id).result.headOption) - } - - def create(eventType: ReputationEventType): Future[ReputationEventType] = { - db.run((eventTypes returning eventTypes) += eventType) - } -} diff --git a/app/repositories/social/UserReputationScoreRepository.scala b/app/repositories/social/UserReputationScoreRepository.scala deleted file mode 100644 index ce24b259..00000000 --- a/app/repositories/social/UserReputationScoreRepository.scala +++ /dev/null @@ -1,34 +0,0 @@ -package repositories.social - -import models.dal.DatabaseSchema -import models.domain.social.UserReputationScore -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.PostgresProfile - -import java.time.LocalDateTime -import java.util.UUID -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class UserReputationScoreRepository @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[PostgresProfile] { - - import profile.api.* - - private val scores = DatabaseSchema.domain.social.userReputationScores - - def findByUserId(userId: UUID): Future[Option[UserReputationScore]] = { - db.run(scores.filter(_.userId === userId).result.headOption) - } - - def upsertScore(userId: UUID, newScore: Long): Future[Int] = { - val action = scores.insertOrUpdate(UserReputationScore(userId, newScore, LocalDateTime.now())) - db.run(action) - } - - def create(score: UserReputationScore): Future[UserReputationScore] = { - db.run((scores returning scores) += score) - } -} diff --git a/app/services/ATProtocolClient.scala b/app/services/ATProtocolClient.scala deleted file mode 100644 index 32e934e1..00000000 --- a/app/services/ATProtocolClient.scala +++ /dev/null @@ -1,390 +0,0 @@ -package services - -import com.google.inject.Inject -import play.api.libs.json.{JsError, JsSuccess, Json, JsValue} -import play.api.libs.ws.WSClient -import play.api.{Configuration, Logging} -import play.api.libs.ws.JsonBodyWritables._ - -import javax.naming.directory.{InitialDirContext, Attributes} -import java.util.Hashtable -import scala.concurrent.duration.* -import scala.concurrent.{ExecutionContext, Future} -import scala.util.{Try, Success, Failure} - -/** - * Service to interact with the AT Protocol for PDS (Personal Data Server) operations. - * - * This client provides methods to resolve DIDs to PDS endpoints and verify repository commits. - * - * @param ws The `WSClient` used for making HTTP requests. - * @param configuration Play configuration for settings like timeouts. - * @param ec The execution context for asynchronous operations. - */ -class ATProtocolClient @Inject()( - ws: WSClient, - configuration: Configuration - )(implicit ec: ExecutionContext) extends Logging { - - private val timeout: FiniteDuration = configuration.getOptional[Int]("atproto.client.timeout").getOrElse(5000).millis - - // PLC Directory for did:plc resolution - private val plcDirectoryUrl = configuration.getOptional[String]("atproto.plc.directory") - .getOrElse("https://plc.directory") - - /** - * Resolves a handle to its DID using the AT Protocol handle resolution mechanism. - * - * Resolution order (per AT Protocol spec): - * 1. DNS TXT record at _atproto.{handle} - * 2. HTTPS well-known at https://{handle}/.well-known/atproto-did - * - * @param handle The handle to resolve (e.g., "alice.bsky.social" or "alice.example.com") - * @return A Future containing the DID if resolved, otherwise None. - */ - def resolveHandle(handle: String): Future[Option[String]] = { - // Normalize handle (remove @ prefix if present) - val normalizedHandle = handle.stripPrefix("@").toLowerCase - - // Try DNS first, then fall back to well-known - resolveHandleViaDns(normalizedHandle).flatMap { - case Some(did) => - logger.debug(s"Resolved handle $normalizedHandle via DNS to $did") - Future.successful(Some(did)) - case None => - resolveHandleViaWellKnown(normalizedHandle).map { didOpt => - didOpt.foreach(did => logger.debug(s"Resolved handle $normalizedHandle via well-known to $did")) - didOpt - } - } - } - - /** - * Resolves a handle via DNS TXT record lookup. - * Looks for TXT record at _atproto.{handle} containing "did=did:plc:xxx" or "did=did:web:xxx" - */ - private def resolveHandleViaDns(handle: String): Future[Option[String]] = Future { - Try { - val env = new Hashtable[String, String]() - env.put("java.naming.factory.initial", "com.sun.jndi.dns.DnsContextFactory") - - val ctx = new InitialDirContext(env) - try { - val attrs: Attributes = ctx.getAttributes(s"_atproto.$handle", Array("TXT")) - val txtRecord = attrs.get("TXT") - - if (txtRecord != null && txtRecord.size() > 0) { - val value = txtRecord.get(0).toString.replaceAll("\"", "") - // TXT record format: "did=did:plc:xxxx" or "did=did:web:xxxx" - if (value.startsWith("did=")) { - Some(value.substring(4)) - } else { - None - } - } else { - None - } - } finally { - ctx.close() - } - } match { - case Success(result) => result - case Failure(e) => - logger.debug(s"DNS resolution failed for _atproto.$handle: ${e.getMessage}") - None - } - } - - /** - * Resolves a handle via HTTPS well-known endpoint. - * Fetches https://{handle}/.well-known/atproto-did - */ - private def resolveHandleViaWellKnown(handle: String): Future[Option[String]] = { - val url = s"https://$handle/.well-known/atproto-did" - - ws.url(url) - .withRequestTimeout(timeout) - .get() - .map { response => - if (response.status == 200) { - val did = response.body.trim - if (did.startsWith("did:")) Some(did) else None - } else { - logger.debug(s"Well-known resolution failed for $handle: ${response.status}") - None - } - } - .recover { - case e: Exception => - logger.debug(s"Well-known resolution error for $handle: ${e.getMessage}") - None - } - } - - /** - * Resolves a DID to its associated PDS endpoint URL. - * - * For did:plc - queries plc.directory - * For did:web - fetches /.well-known/did.json from the domain - * - * @param did The Decentralized Identifier (DID) to resolve. - * @return A Future containing the PDS URL if resolved, otherwise None. - */ - def resolveDid(did: String): Future[Option[DidDocument]] = { - if (did.startsWith("did:plc:")) { - resolveDidPlc(did) - } else if (did.startsWith("did:web:")) { - resolveDidWeb(did) - } else { - logger.warn(s"Unsupported DID method: $did") - Future.successful(None) - } - } - - /** - * Resolves a did:plc identifier via the PLC directory. - */ - private def resolveDidPlc(did: String): Future[Option[DidDocument]] = { - val url = s"$plcDirectoryUrl/$did" - - ws.url(url) - .withRequestTimeout(timeout) - .get() - .map { response => - if (response.status == 200) { - Json.fromJson[DidDocument](response.json) match { - case JsSuccess(doc, _) => Some(doc) - case JsError(errors) => - logger.error(s"Failed to parse DID document for $did: $errors") - None - } - } else { - logger.warn(s"Failed to resolve $did from PLC directory: ${response.status}") - None - } - } - .recover { - case e: Exception => - logger.error(s"Error resolving $did from PLC directory: ${e.getMessage}", e) - None - } - } - - /** - * Resolves a did:web identifier by fetching the DID document from the domain. - * did:web:example.com → https://example.com/.well-known/did.json - * did:web:example.com:path:to:doc → https://example.com/path/to/doc/did.json - */ - private def resolveDidWeb(did: String): Future[Option[DidDocument]] = { - val parts = did.stripPrefix("did:web:").split(":") - val domain = java.net.URLDecoder.decode(parts.head, "UTF-8") - val path = if (parts.length > 1) { - "/" + parts.tail.map(p => java.net.URLDecoder.decode(p, "UTF-8")).mkString("/") + "/did.json" - } else { - "/.well-known/did.json" - } - - val url = s"https://$domain$path" - - ws.url(url) - .withRequestTimeout(timeout) - .get() - .map { response => - if (response.status == 200) { - Json.fromJson[DidDocument](response.json) match { - case JsSuccess(doc, _) => Some(doc) - case JsError(errors) => - logger.error(s"Failed to parse DID document for $did: $errors") - None - } - } else { - logger.warn(s"Failed to resolve $did via did:web: ${response.status}") - None - } - } - .recover { - case e: Exception => - logger.error(s"Error resolving $did via did:web: ${e.getMessage}", e) - None - } - } - - /** - * Convenience method: Resolves a handle all the way to its PDS URL. - * - * @param handle The handle to resolve - * @return A Future containing (DID, PDS URL) if fully resolved - */ - def resolveHandleToPds(handle: String): Future[Option[(String, String)]] = { - resolveHandle(handle).flatMap { - case Some(did) => - resolveDid(did).map { - case Some(doc) => - doc.getPdsEndpoint.map(pds => (did, pds)) - case None => None - } - case None => - Future.successful(None) - } - } - - /** - * Authenticates with a PDS and creates a session. - * - * @param identifier The handle or DID of the user. - * @param password The app password. - * @param pdsUrl The PDS URL (defaulting to bsky.social if not provided, though in reality we should resolve it). - * @return A Future containing the session response if successful. - */ - def createSession(identifier: String, password: String, pdsUrl: String = "https://bsky.social"): Future[Option[SessionResponse]] = { - val url = s"$pdsUrl/xrpc/com.atproto.server.createSession" - - val body = Json.obj( - "identifier" -> identifier, - "password" -> password - ) - - ws.url(url) - .withRequestTimeout(timeout) - .post(body) - .map { response => - if (response.status == 200) { - Json.fromJson[SessionResponse](response.json) match { - case JsSuccess(value, _) => Some(value) - case JsError(errors) => - logger.error(s"Failed to parse createSession response: $errors") - None - } - } else { - logger.warn(s"Failed to create session for $identifier on $pdsUrl. Status: ${response.status}, Body: ${response.body}") - None - } - } - .recover { - case e: Exception => - logger.error(s"Error calling createSession on $pdsUrl: ${e.getMessage}", e) - None - } - } - - /** - * Verifies a PDS and retrieves its latest commit information using the provided authentication token. - * - * @param pdsUrl The base URL of the PDS. - * @param repoDid The DID of the repository on the PDS. - * @param authToken The authentication token (JWT) for accessing the PDS. - * @return A Future containing `Option[LatestCommitResponse]` if successful, otherwise None. - */ - def getLatestCommit(pdsUrl: String, repoDid: String, authToken: String): Future[Option[LatestCommitResponse]] = { - val url = s"$pdsUrl/xrpc/com.atproto.repo.getCommit" // ATProto spec uses getCommit for this info - - ws.url(url) - .addQueryStringParameters("repo" -> repoDid) - .withHttpHeaders("Authorization" -> s"Bearer $authToken") - .withRequestTimeout(timeout) - .get() - .map { response => - if (response.status == 200) { - Json.fromJson[LatestCommitResponse](response.json) match { - case JsSuccess(value, _) => Some(value) - case JsError(errors) => - logger.error(s"Failed to parse getLatestCommit response from $pdsUrl for $repoDid: $errors") - None - } - } else { - logger.warn(s"Failed to get latest commit from $pdsUrl for $repoDid. Status: ${response.status}, Body: ${response.body}") - None - } - } - .recover { - case e: Exception => - logger.error(s"Error calling getLatestCommit on $pdsUrl for $repoDid: ${e.getMessage}", e) - None - } - } -} - -// Define case class for the expected response from com.atproto.repo.getCommit -// This is a simplified representation. The actual response might be more complex. -// Based on AT Protocol spec, getCommit returns 'cid', 'rev', 'seq' etc. -case class LatestCommitResponse( - cid: String, // The CID of the latest commit - rev: String, // The repository revision - seq: Long // The sequence number of the latest commit - ) - -object LatestCommitResponse { - implicit val format: play.api.libs.json.Format[LatestCommitResponse] = Json.format[LatestCommitResponse] -} - -case class SessionResponse( - did: String, - handle: String, - email: Option[String], - accessJwt: String, - refreshJwt: String - ) - -object SessionResponse { - implicit val format: play.api.libs.json.Format[SessionResponse] = Json.format[SessionResponse] -} - -/** - * Represents a service endpoint in a DID document. - */ -case class DidService( - id: String, - `type`: String, - serviceEndpoint: String - ) - -object DidService { - implicit val format: play.api.libs.json.Format[DidService] = Json.format[DidService] -} - -/** - * Represents a verification method in a DID document, containing a public key. - */ -case class DidVerificationMethod( - id: String, - `type`: String, - controller: String, - publicKeyMultibase: Option[String] = None - ) - -object DidVerificationMethod { - implicit val format: play.api.libs.json.Format[DidVerificationMethod] = Json.format[DidVerificationMethod] -} - -/** - * Represents a DID Document returned from PLC directory or did:web resolution. - */ -case class DidDocument( - id: String, - alsoKnownAs: Option[Seq[String]] = None, - verificationMethod: Option[Seq[DidVerificationMethod]] = None, - service: Option[Seq[DidService]] = None - ) { - /** - * Extracts the PDS endpoint URL from the DID document. - * Looks for a service with type "AtprotoPersonalDataServer". - */ - def getPdsEndpoint: Option[String] = { - service.flatMap { services => - services.find(_.`type` == "AtprotoPersonalDataServer").map(_.serviceEndpoint) - } - } - - /** - * Extracts the handle from alsoKnownAs (format: "at://handle") - */ - def getHandle: Option[String] = { - alsoKnownAs.flatMap { aliases => - aliases.find(_.startsWith("at://")).map(_.stripPrefix("at://")) - } - } -} - -object DidDocument { - implicit val format: play.api.libs.json.Format[DidDocument] = Json.format[DidDocument] -} diff --git a/app/services/AccessionNumberGenerator.scala b/app/services/AccessionNumberGenerator.scala deleted file mode 100644 index b84dede6..00000000 --- a/app/services/AccessionNumberGenerator.scala +++ /dev/null @@ -1,111 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.BiosampleType -import org.hashids.Hashids -import play.api.Configuration -import repositories.CitizenSequenceRepository - -import scala.concurrent.{ExecutionContext, Future} - -case class AccessionMetadata( - pgpParticipantId: Option[String] = None, - citizenBiosampleDid: Option[String] = None, - existingAccession: Option[String] = None - ) - -/** - * Trait for generating and decoding accession numbers associated with biological samples. - * An accession number is a unique identifier assigned to a biosample, which can be - * generated based on the biosample type and associated metadata and decoded for reverse lookups. - */ -trait AccessionNumberGenerator { - def generateAccession(biosampleType: BiosampleType, metadata: AccessionMetadata): Future[String] - - def decodeAccession(accession: String): Future[Option[Long]] // Added for reverse lookup -} - -/** - * Singleton class responsible for generating and decoding accession numbers for biosamples. - * This implementation uses hashing and unique sequence generation to support multiple - * biosample types including Standard, Ancient, PGP, and Citizen. - * - * It extends the `AccessionNumberGenerator` trait and relies on a hashing library - * for encoding and decoding accession numbers. - * - * @constructor Creates a new BiosampleAccessionGenerator instance. - * @param sequenceRepo Repository for fetching sequence values. - * @param config Application configuration for loading settings like hashing salt. - * @param ec ExecutionContext for managing asynchronous operations. - */ -@Singleton -class BiosampleAccessionGenerator @Inject()( - sequenceRepo: CitizenSequenceRepository, - config: Configuration - )(implicit ec: ExecutionContext) - extends AccessionNumberGenerator { - - private val DuPrefix = "DU" - private val alphabet = "ABCDEFGHIJKLMNPQRSTUVWXYZ1234567890" // Removed O to avoid confusion with 0 - private val salt = config.get[String]("biosample.hash.salt") - private val hashLength = 6 // Gives us plenty of combinations while keeping it readable - - private lazy val hasher = new Hashids(salt, hashLength, alphabet) - - /** - * Decodes the given accession string to retrieve an optional numeric identifier. - * - * @param accession The encoded accession string to be decoded. It is typically - * prefixed with a specific identifier (`DuPrefix`). - * @return A Future containing an Option of Long. The Option will contain the - * decoded numeric identifier if successful, or None if decoding fails - * or the input does not follow the expected format. - */ - override def decodeAccession(accession: String): Future[Option[Long]] = Future { - if (accession.startsWith(DuPrefix)) { - val hash = accession.substring(DuPrefix.length) - try { - val decoded = hasher.decode(hash) - decoded.headOption - } catch { - case _: Exception => None - } - } else None - } - - /** - * Generates an accession string based on the specified biosample type and associated metadata. - * - * @param biosampleType The type of biosample, which determines the logic for generating the accession. - * Acceptable values are Standard, Ancient, PGP, or Citizen. - * @param metadata The metadata required to generate the accession. This may include - * existing accession information, PGP participant ID, or other relevant details - * based on the biosample type. - * @return A Future containing the generated accession string. If required metadata is missing - * or invalid for the specified biosample type, the Future will fail with an exception. - */ - override def generateAccession(biosampleType: BiosampleType, metadata: AccessionMetadata): Future[String] = { - biosampleType match { - case BiosampleType.Standard | BiosampleType.Ancient => - metadata.existingAccession match { - case Some(accession) => Future.successful(accession) - case None => Future.failed( - new IllegalArgumentException("ENA/NCBI accession is required for Standard and Ancient samples") - ) - } - - case BiosampleType.PGP => - metadata.pgpParticipantId match { - case Some(id) => Future.successful(s"PGP-$id") - case None => Future.failed( - new IllegalArgumentException("PGP participant ID is required") - ) - } - - case BiosampleType.Citizen => - sequenceRepo.getNextSequence().map { seq => - s"$DuPrefix-${hasher.encode(seq)}" - } - } - } -} \ No newline at end of file diff --git a/app/services/AncestralMotifService.scala b/app/services/AncestralMotifService.scala deleted file mode 100644 index 949622ec..00000000 --- a/app/services/AncestralMotifService.scala +++ /dev/null @@ -1,149 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.domain.haplogroups.{HaplogroupAncestralStr, MotifMethod} -import play.api.Logging -import repositories.{BiosampleVariantCallRepository, HaplogroupAncestralStrRepository} - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -/** - * Computes and manages ancestral STR motifs (modal haplotypes) for haplogroups. - * - * The modal haplotype is the most common STR repeat count across all samples - * assigned to a haplogroup. It represents the ancestral state for that branch, - * used in STR-based age estimation. - * - * Methods: - * - MODAL: Simple mode (most frequent value) across samples - * - PHYLOGENETIC: Inferred from ASR across the tree (future) - * - MANUAL: Curator-set values for well-studied haplogroups - */ -class AncestralMotifService @Inject()( - ancestralStrRepo: HaplogroupAncestralStrRepository, - variantCallRepo: BiosampleVariantCallRepository -)(implicit ec: ExecutionContext) extends Logging { - - /** - * Get the ancestral motif for a haplogroup. - * - * @return Map of markerName -> ancestral repeat count - */ - def getMotifForHaplogroup(haplogroupId: Int): Future[Map[String, Int]] = { - ancestralStrRepo.findByHaplogroup(haplogroupId).map { motifs => - motifs.flatMap(m => m.ancestralValue.map(v => m.markerName -> v)).toMap - } - } - - /** - * Compute modal haplotype from a set of sample STR observations. - * - * For each marker, finds the mode (most common value) across all samples. - * - * @param observations Seq of (markerName, observedValue) pairs from multiple samples - * @param haplogroupId Target haplogroup - * @return Computed ancestral STR motifs ready for persistence - */ - private[services] def computeModalHaplotype( - observations: Seq[MarkerObservation], - haplogroupId: Int - ): Seq[HaplogroupAncestralStr] = { - val now = LocalDateTime.now() - - // Group by marker name - val byMarker = observations.groupBy(_.markerName) - - byMarker.map { case (markerName, obs) => - val values = obs.map(_.value) - val sampleCount = values.size - - // Compute mode - val valueCounts = values.groupBy(identity).view.mapValues(_.size).toMap - val mode = valueCounts.maxBy(_._2)._1 - val modeCount = valueCounts(mode) - - // Compute confidence: fraction of samples agreeing with mode - val confidence = if (sampleCount > 0) BigDecimal(modeCount.toDouble / sampleCount) else BigDecimal(0) - - // Compute variance - val mean = values.sum.toDouble / sampleCount - val variance = if (sampleCount > 1) { - BigDecimal(values.map(v => math.pow(v - mean, 2)).sum / (sampleCount - 1)) - } else BigDecimal(0) - - // Find alternative modal values (any value with count >= 2 that isn't the mode) - val alternatives = valueCounts - .filter { case (v, count) => v != mode && count >= 2 } - .keys.toList.sorted - - HaplogroupAncestralStr( - haplogroupId = haplogroupId, - markerName = markerName, - ancestralValue = Some(mode), - ancestralValueAlt = if (alternatives.nonEmpty) Some(alternatives) else None, - confidence = Some(confidence), - supportingSamples = Some(sampleCount), - variance = Some(variance), - computedAt = now, - method = MotifMethod.Modal - ) - }.toSeq - } - - /** - * Compute and save the modal haplotype for a haplogroup from sample observations. - * - * @param observations Collected STR observations from samples in this haplogroup - * @param haplogroupId Target haplogroup - * @return Number of markers computed - */ - def computeAndSaveMotif( - observations: Seq[MarkerObservation], - haplogroupId: Int - ): Future[Int] = { - val motifs = computeModalHaplotype(observations, haplogroupId) - if (motifs.isEmpty) { - Future.successful(0) - } else { - ancestralStrRepo.upsertBatch(motifs).map(_.size) - } - } - - /** - * Manually set an ancestral STR value for a specific marker. - */ - def setManualMotif( - haplogroupId: Int, - markerName: String, - value: Int, - confidence: Option[BigDecimal] = None - ): Future[Int] = { - val motif = HaplogroupAncestralStr( - haplogroupId = haplogroupId, - markerName = markerName, - ancestralValue = Some(value), - ancestralValueAlt = None, - confidence = confidence, - supportingSamples = None, - variance = None, - method = MotifMethod.Manual - ) - ancestralStrRepo.upsert(motif) - } - - /** - * Delete all computed motifs for a haplogroup (e.g., before recomputation). - */ - def clearMotifs(haplogroupId: Int): Future[Int] = - ancestralStrRepo.deleteByHaplogroup(haplogroupId) -} - -/** - * A single STR marker observation from a sample. - */ -case class MarkerObservation( - markerName: String, - value: Int, - biosampleId: Int -) diff --git a/app/services/AuthService.scala b/app/services/AuthService.scala deleted file mode 100644 index 36a15541..00000000 --- a/app/services/AuthService.scala +++ /dev/null @@ -1,143 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.auth.UserRole -import models.domain.user.{User, UserPdsInfo} -import play.api.Logging -import repositories.{RoleRepository, UserPdsInfoRepository, UserRepository, UserRoleRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class AuthService @Inject()( - atProtocolClient: ATProtocolClient, - userRepository: UserRepository, - userPdsInfoRepository: UserPdsInfoRepository, - roleRepository: RoleRepository, - userRoleRepository: UserRoleRepository - )(implicit ec: ExecutionContext) extends Logging { - - /** - * Authenticates a user against their PDS and ensures a local User record exists. - * - * The authentication flow: - * 1. If identifier is a DID, resolve it directly to find PDS URL - * 2. If identifier is a handle, resolve handle → DID → PDS URL - * 3. Authenticate against the resolved PDS - * 4. Create or update local User record - * - * @param identifier User handle (e.g., "alice.bsky.social") or DID (e.g., "did:plc:xxx") - * @param password App Password - * @return Future[Option[User]] The authenticated user, if successful. - */ - def login(identifier: String, password: String): Future[Option[User]] = { - val normalizedIdentifier = identifier.stripPrefix("@").trim - - // Resolve the PDS URL based on identifier type - val pdsResolution: Future[Option[(String, String)]] = if (normalizedIdentifier.startsWith("did:")) { - // Direct DID - resolve to PDS - atProtocolClient.resolveDid(normalizedIdentifier).map { - case Some(doc) => doc.getPdsEndpoint.map(pds => (normalizedIdentifier, pds)) - case None => None - } - } else { - // Handle - resolve to DID then to PDS - atProtocolClient.resolveHandleToPds(normalizedIdentifier) - } - - pdsResolution.flatMap { - case Some((resolvedDid, pdsUrl)) => - logger.info(s"Resolved $normalizedIdentifier to DID $resolvedDid at PDS $pdsUrl") - - // Authenticate against the resolved PDS - atProtocolClient.createSession(normalizedIdentifier, password, pdsUrl).flatMap { - case Some(session) => - logger.info(s"AT Protocol session created for ${session.handle} (${session.did}) on $pdsUrl") - - // Find or Create User - userRepository.findByDid(session.did).flatMap { - case Some(user) => - // Update handle/email if changed - val updatedUser = user.copy( - handle = Some(session.handle), - email = session.email.orElse(user.email), - updatedAt = LocalDateTime.now() - ) - for { - _ <- userRepository.update(updatedUser) - _ <- updateUserPdsInfo(user.id.get, session.did, session.handle, pdsUrl) - } yield Some(updatedUser) - - case None => - // Create new user - val newUser = User( - id = Some(UUID.randomUUID()), - email = session.email, - did = session.did, - handle = Some(session.handle), - displayName = None, - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now(), - isActive = true - ) - for { - createdUser <- userRepository.create(newUser) - _ <- updateUserPdsInfo(createdUser.id.get, session.did, session.handle, pdsUrl) - } yield Some(createdUser) - } - - case None => - logger.warn(s"Failed to authenticate $normalizedIdentifier at PDS $pdsUrl") - Future.successful(None) - } - - case None => - logger.warn(s"Failed to resolve PDS for identifier: $normalizedIdentifier") - Future.successful(None) - } - } - - /** - * Updates or creates the UserPdsInfo record for a user. - */ - private def updateUserPdsInfo(userId: UUID, did: String, handle: String, pdsUrl: String): Future[UserPdsInfo] = { - val now = LocalDateTime.now() - val info = UserPdsInfo( - id = None, - userId = userId, - pdsUrl = pdsUrl, - did = did, - handle = Some(handle), - createdAt = now, - updatedAt = now - ) - userPdsInfoRepository.upsertByDid(info) - } - - /** - * Checks if a user has a specific role. - */ - def hasRole(userId: UUID, roleName: String): Future[Boolean] = { - userRoleRepository.hasRole(userId, roleName) - } - - /** - * Checks if a user has any of the provided roles. - */ - def hasAnyRole(userId: UUID, roleNames: Seq[String]): Future[Boolean] = { - // This is not efficient if checking many roles individually, but fine for a few. - // Better to fetch all user roles and check intersection. - userRoleRepository.getUserRoles(userId).map { userRoles => - roleNames.exists(requiredRole => userRoles.contains(requiredRole)) - } - } - - /** - * Checks if a user has a specific permission. - */ - def hasPermission(userId: UUID, permissionName: String): Future[Boolean] = { - userRoleRepository.hasPermission(userId, permissionName) - } -} diff --git a/app/services/AwsSesEmailService.scala b/app/services/AwsSesEmailService.scala deleted file mode 100644 index cf3f2357..00000000 --- a/app/services/AwsSesEmailService.scala +++ /dev/null @@ -1,85 +0,0 @@ -package services - -import play.api.{Configuration, Logging} -import software.amazon.awssdk.regions.Region -import software.amazon.awssdk.services.ses.SesClient -import software.amazon.awssdk.services.ses.model.* - -import javax.inject.{Inject, Singleton} -import scala.jdk.CollectionConverters.* - -/** - * A concrete implementation of the EmailService trait that uses Amazon SES to send emails. - * - * @param configuration Play Framework configuration to get AWS settings - */ -@Singleton -class AwsSesEmailService @Inject()(configuration: Configuration) extends EmailService with Logging { - - private val sesClient: SesClient = { - val region = configuration - .getOptional[String]("aws.region") - .getOrElse("us-east-1") - - SesClient.builder() - .region(Region.of(region)) - .build() - } - - /** - * Sends an email using Amazon SES. - * - * @param to the list of recipient email addresses - * @param from the sender's email address - * @param subject the subject of the email - * @param body the body content of the email - * @return Either a String containing an error message in case of failure, or a Unit upon successful email delivery - */ - def sendEmail( - to: Seq[String], - from: String, - subject: String, - body: String - ): Either[String, Unit] = { - logger.info(s"Sending Contact Request: ${(to, from, subject, body)}") - try { - val destination = Destination.builder() - .toAddresses(to.asJava) - .build() - - val messageBody = Body.builder() - .text(Content.builder() - .data(body) - .charset("UTF-8") - .build()) - .build() - - val message = Message.builder() - .subject(Content.builder() - .data(subject) - .charset("UTF-8") - .build()) - .body(messageBody) - .build() - - val request = SendEmailRequest.builder() - .source(from) - .destination(destination) - .message(message) - .build() - - val response: SendEmailResponse = sesClient.sendEmail(request) - logger.info(s"Email sent successfully. MessageId: ${response.messageId()}") - Right(()) - } catch { - case e: MessageRejectedException => - val msg = s"Email rejected by SES: ${e.getMessage}" - logger.error(msg, e) - Left(msg) - case e: SesException => - val msg = s"SES service error: ${e.getMessage}" - logger.error(msg, e) - Left(msg) - } - } -} \ No newline at end of file diff --git a/app/services/BiosampleDataService.scala b/app/services/BiosampleDataService.scala deleted file mode 100644 index 44321744..00000000 --- a/app/services/BiosampleDataService.scala +++ /dev/null @@ -1,247 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.api.{FileInfo, LocationInfo, PublicationInfo, SequenceDataInfo} -import models.domain.genomics.{SequenceFile, SequenceLibrary, SequenceFileAtpLocationJsonb, SequenceFileChecksumJsonb, SequenceFileHttpLocationJsonb} -import models.domain.genomics.OriginalHaplogroupEntry -import models.domain.publications.{Publication, PublicationBiosample} -import repositories.* - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service class for managing biosample data, sequence data, and their associations with publications. - * This class interacts with various repository interfaces to perform database operations - * including the creation, association, and linking of biosample-related data. - * - * @constructor Creates an instance of the BiosampleDataService class. - * @param biosampleRepository Repository for managing biosample entities. - * @param sequenceLibraryRepository Repository for managing sequence libraries. - * @param sequenceFileRepository Repository for managing sequence files. - * @param sequenceHttpLocationRepository Repository for managing sequence file HTTP locations. - * @param publicationRepository Repository for managing publication entities. - * @param biosampleOriginalHaplogroupRepository Repository for managing the original haplogroup information associated with biosamples. - * @param sequenceFileChecksumRepository Repository for managing sequence file checksums. - * @param publicationBiosampleRepository Repository for managing associations between publications and biosamples. - * @param ec Execution context for handling asynchronous operations. - */ -@Singleton -class BiosampleDataService @Inject()( - biosampleRepository: BiosampleRepository, - sequenceLibraryRepository: SequenceLibraryRepository, - sequenceFileRepository: SequenceFileRepository, - publicationRepository: PublicationRepository, - biosampleOriginalHaplogroupRepository: BiosampleOriginalHaplogroupRepository, - publicationBiosampleRepository: PublicationBiosampleRepository, - testTypeService: TestTypeService - )(implicit ec: ExecutionContext) { - - /** - * Adds sequencing data to a specific sample identified by its unique GUID. - * - * This method accepts metadata and related information about the sequencing data, - * encapsulated within the `SequenceDataInfo` object, and associates it with the specified sample. - * - * @param sampleGuid The unique identifier of the sample to which the sequencing data will be added. - * @param data Metadata and details about the sequencing data, provided as a `SequenceDataInfo` object. - * @return A `Future` representing the asynchronous completion of the operation. The `Future` resolves to `Unit` if the operation succeeds, or it may fail with an exception if unsuccessful - * - */ - def addSequenceData(sampleGuid: UUID, data: SequenceDataInfo): Future[Unit] = { - createSequenceData(sampleGuid, data) - } - - /** - * Replaces the sequencing data for a specific sample. - * - * This method first removes all existing sequencing libraries and their associated files - * for the given sample GUID, and then adds the new sequencing data. - * - * @param sampleGuid The unique identifier of the sample to update. - * @param data The new metadata and details about the sequencing data. - * @return A `Future` representing the asynchronous completion of the operation. - */ - def replaceSequenceData(sampleGuid: UUID, data: SequenceDataInfo): Future[Unit] = { - for { - // 1. Find all existing libraries - libraries <- sequenceLibraryRepository.findBySampleGuid(sampleGuid) - - // 2. Delete files for each library - _ <- Future.sequence(libraries.map { lib => - sequenceFileRepository.deleteByLibraryId(lib.id.get) - }) - - // 3. Delete the libraries themselves - _ <- Future.sequence(libraries.map { lib => - sequenceLibraryRepository.delete(lib.id.get) - }) - - // 4. Create new sequence data - _ <- createSequenceData(sampleGuid, data) - } yield () - } - - /** - * Associates a publication with a specific biosample identified by its unique GUID. If the publication - * does not already exist in the repository, it is created. Optionally, original haplogroup information - * associated with the publication may also be stored for the biosample. - * - * @param sampleGuid The unique identifier (GUID) of the biosample to link the publication with. - * @param pubInfo The publication information, encapsulated in a `PublicationInfo` instance, which - * includes optional identifiers (e.g., DOI, PubMed ID) and haplogroup data. - * @return A `Future` representing the asynchronous operation. The `Future` resolves to `Unit` if the - * operation completes successfully, or fails with an exception if an error occurs. - */ - def linkPublication(sampleGuid: UUID, pubInfo: PublicationInfo): Future[Unit] = { - for { - maybeBiosampleWithDonor <- biosampleRepository.findByGuid(sampleGuid) - (biosample, _) <- maybeBiosampleWithDonor match { - case Some(b) => Future.successful(b) - case None => Future.failed(new IllegalArgumentException(s"Biosample not found for GUID: $sampleGuid")) - } - // First try to find existing publication by DOI - maybePublication <- pubInfo.doi.map(doi => - publicationRepository.findByDoi(doi) - ).getOrElse(Future.successful(None)) - // Use existing or create new publication - publication <- maybePublication match { - case Some(pub) => Future.successful(pub) - case None => publicationRepository.savePublication(Publication( - id = None, - openAlexId = None, - pubmedId = pubInfo.pubmedId, - doi = pubInfo.doi, - title = pubInfo.doi.map(d => s"Publication with DOI: $d").getOrElse("Unknown publication"), - authors = None, - abstractSummary = None, - journal = None, - publicationDate = None, - url = None, - citationNormalizedPercentile = None, - citedByCount = None, - openAccessStatus = None, - openAccessUrl = None, - primaryTopic = None, - publicationType = None, - publisher = None - )) - } - _ <- publicationBiosampleRepository.create(PublicationBiosample( - publicationId = publication.id.get, - biosampleId = biosample.id.get - )) - _ <- pubInfo.originalHaplogroups.map { haplogroupInfo => - biosampleOriginalHaplogroupRepository.upsert(biosample.id.get, OriginalHaplogroupEntry( - publicationId = publication.id.get, - yHaplogroupResult = haplogroupInfo.yHaplogroup, - mtHaplogroupResult = haplogroupInfo.mtHaplogroup, - notes = haplogroupInfo.notes - )) - }.getOrElse(Future.successful(())) - } yield () - } - - - /** - * Fully deletes a biosample and all its associated data (publication links, - * original haplogroups, sequence libraries, sequence files, file locations, and checksums). - * - * @param biosampleId The internal ID of the biosample to delete. - * @param sampleGuid The GUID of the biosample to delete. - * @return A `Future` that completes when all associated data and the biosample itself have been deleted. - */ - def fullyDeleteBiosampleAndDependencies(biosampleId: Int, sampleGuid: UUID): Future[Unit] = { - for { - // 1. Delete associated publication links - _ <- publicationBiosampleRepository.deleteByBiosampleId(biosampleId) - // 2. Clear embedded original haplogroup records - _ <- biosampleOriginalHaplogroupRepository.deleteAllByBiosampleId(biosampleId) - // 3. Find and delete all sequence libraries and their files - libraries <- sequenceLibraryRepository.findBySampleGuid(sampleGuid) - _ <- Future.sequence(libraries.map { lib => - for { - _ <- sequenceFileRepository.deleteByLibraryId(lib.id.get) // Deletes files, locations, checksums (if cascading) - _ <- sequenceLibraryRepository.delete(lib.id.get) // Deletes the library - } yield () - }) - // 4. Delete the biosample itself - _ <- biosampleRepository.delete(biosampleId) - } yield () - } - - private def createSequenceData(sampleGuid: UUID, data: SequenceDataInfo): Future[Unit] = { - // Look up testTypeId - for { - testTypeRowOpt <- testTypeService.getByCode(data.testType) - testTypeId <- testTypeRowOpt.map(tt => Future.successful(tt.id.getOrElse(throw new IllegalStateException("TestTypeRow ID not found")))) - .getOrElse(Future.failed(new IllegalArgumentException(s"Invalid test type code: ${data.testType}"))) - createdLibrary <- { - val library = SequenceLibrary( - id = None, - sampleGuid = sampleGuid, - lab = data.platformName, - testTypeId = testTypeId, - runDate = LocalDateTime.now(), - instrument = data.platformName, - reads = data.reads.getOrElse(0), - readLength = data.readLength.getOrElse(0), - pairedEnd = false, - insertSize = None, - atUri = None, - atCid = None, - created_at = LocalDateTime.now(), - updated_at = None - ) - sequenceLibraryRepository.create(library) - } - _ <- createFiles(createdLibrary.id.get, data) - } yield () - } - - private def createFiles(libraryId: Int, dataInfo: SequenceDataInfo): Future[Unit] = { - val fileCreations = dataInfo.files.map { fileInfo => - - val checksumsJsonb = fileInfo.checksums.map { cs => - SequenceFileChecksumJsonb( - checksum = cs.checksum, - algorithm = cs.algorithm, - verifiedAt = Some(LocalDateTime.now()), // Assuming verified upon creation for now - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - }.toList - - val httpLocationsJsonb = List( - SequenceFileHttpLocationJsonb( - url = fileInfo.location.fileUrl, - urlHash = UUID.nameUUIDFromBytes(fileInfo.location.fileUrl.getBytes).toString, // Generate a hash for the URL - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - ) - - val atpLocationJsonb: Option[SequenceFileAtpLocationJsonb] = None - - val file = SequenceFile( - id = None, - libraryId = libraryId, - fileName = fileInfo.fileName, - fileSizeBytes = fileInfo.fileSizeBytes, - fileFormat = fileInfo.fileFormat, - checksums = checksumsJsonb, - httpLocations = httpLocationsJsonb, - atpLocation = atpLocationJsonb, - aligner = fileInfo.aligner, - targetReference = fileInfo.targetReference, - createdAt = LocalDateTime.now(), - updatedAt = None - ) - - sequenceFileRepository.create(file).map(_ => ()) - } - - Future.sequence(fileCreations).map(_ => ()) - } -} \ No newline at end of file diff --git a/app/services/BiosampleDomainService.scala b/app/services/BiosampleDomainService.scala deleted file mode 100644 index 5f3ad11e..00000000 --- a/app/services/BiosampleDomainService.scala +++ /dev/null @@ -1,253 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.api.{BiosampleUpdate, BiosampleView, BiosampleWithOrigin, ExternalBiosampleRequest, PaginatedResult, PublicationInfo, SampleWithStudies, SequenceDataInfo} -import models.domain.genomics.{BiologicalSex, Biosample, BiosampleType, SpecimenDonor} -import models.domain.publications.PublicationBiosample -import play.api.Logging - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Facade service providing a unified API for all biosample domain operations. - * - * This service consolidates operations from multiple specialized biosample services - * into a single entry point, reducing controller coupling and providing consistent - * error handling across all biosample operations. - * - * Delegates to: - * - BiosampleService: Core CRUD and specimen donor management - * - BiosampleUpdateService: Biosample updates with cascading changes - * - BiosampleDataService: Sequence data and publication linking - * - BiosamplePublicationService: Publication-biosample associations - * - BiosampleReportService: Read-only reporting - * - ExternalBiosampleService: External/citizen biosample workflows - * - PgpBiosampleService: PGP-specific biosample creation - */ -@Singleton -class BiosampleDomainService @Inject()( - biosampleService: BiosampleService, - biosampleUpdateService: BiosampleUpdateService, - biosampleDataService: BiosampleDataService, - biosamplePublicationService: BiosamplePublicationService, - biosampleReportService: BiosampleReportService, - externalBiosampleService: ExternalBiosampleService, - pgpBiosampleService: PgpBiosampleService, - biosampleRepository: repositories.BiosampleRepository -)(implicit ec: ExecutionContext) extends Logging { - - // ========================================================================== - // Core Operations (from BiosampleService) - // ========================================================================== - - /** - * Retrieves a biosample by its unique identifier. - * - * @param id the unique identifier of the biosample - * @return a Future containing an optional tuple of (Biosample, Option[SpecimenDonor]) - */ - def getBiosampleById(id: Int): Future[Option[(Biosample, Option[SpecimenDonor])]] = - biosampleService.getBiosampleById(id) - - /** - * Searches for a biosample by alias or accession. - * - * @param query the alias or accession to search for - * @return a Future containing an optional tuple of (Biosample, Option[SpecimenDonor]) - */ - def findByAliasOrAccession(query: String): Future[Option[(Biosample, Option[SpecimenDonor])]] = - biosampleRepository.findByAliasOrAccession(query) - - /** - * Retrieves all biosamples with their associated studies. - * - * @return a Future containing a sequence of SampleWithStudies - */ - def findAllWithStudies(): Future[Seq[SampleWithStudies]] = - biosampleRepository.findAllWithStudies() - - /** - * Creates a new biosample record. - * - * @return the created Biosample - */ - def createBiosample( - sampleGuid: UUID, - sampleAccession: String, - description: String, - alias: Option[String], - centerName: String, - specimenDonorId: Option[Int], - sourcePlatform: Option[String] - ): Future[Biosample] = - biosampleService.createBiosample( - sampleGuid, sampleAccession, description, alias, centerName, specimenDonorId, sourcePlatform - ) - - // ========================================================================== - // Update Operations (from BiosampleUpdateService) - // ========================================================================== - - /** - * Updates a biosample with the given modifications. - * Handles cascading updates to specimen donor and haplogroups. - * - * @param id the unique identifier of the biosample - * @param update the update object containing field changes - * @return Either an error message or the updated BiosampleView - */ - def updateBiosample(id: Int, update: BiosampleUpdate): Future[Either[String, BiosampleView]] = - biosampleUpdateService.updateBiosample(id, update) - - // ========================================================================== - // Specimen Donor Operations (from BiosampleService) - // ========================================================================== - - /** - * Creates a new SpecimenDonor or returns an existing one if found. - * - * @return the donor ID if created/found, None otherwise - */ - def createOrUpdateSpecimenDonor( - donorIdentifier: String, - originBiobank: String, - donorType: BiosampleType, - sex: Option[BiologicalSex], - latitude: Option[Double], - longitude: Option[Double], - pgpParticipantId: Option[String] = None, - atUri: Option[String] = None - ): Future[Option[Int]] = - biosampleService.createOrUpdateSpecimenDonor( - donorIdentifier, originBiobank, donorType, sex, latitude, longitude, pgpParticipantId, atUri - ) - - // ========================================================================== - // Sequence Data Operations (from BiosampleDataService) - // ========================================================================== - - /** - * Adds sequencing data to a sample. - * - * @param sampleGuid the unique identifier of the sample - * @param data the sequence data information - */ - def addSequenceData(sampleGuid: UUID, data: SequenceDataInfo): Future[Unit] = - biosampleDataService.addSequenceData(sampleGuid, data) - - /** - * Replaces all sequencing data for a sample. - * - * @param sampleGuid the unique identifier of the sample - * @param data the new sequence data information - */ - def replaceSequenceData(sampleGuid: UUID, data: SequenceDataInfo): Future[Unit] = - biosampleDataService.replaceSequenceData(sampleGuid, data) - - // ========================================================================== - // Publication Operations (from BiosampleDataService and BiosamplePublicationService) - // ========================================================================== - - /** - * Associates a publication with a biosample by GUID. - * Creates the publication if it doesn't exist. - * - * @param sampleGuid the GUID of the biosample - * @param pubInfo the publication information - */ - def linkPublication(sampleGuid: UUID, pubInfo: PublicationInfo): Future[Unit] = - biosampleDataService.linkPublication(sampleGuid, pubInfo) - - /** - * Links an existing biosample to an existing publication by accession and DOI. - * - * @param sampleAccession the accession number of the biosample - * @param doi the DOI of the publication - * @return the created PublicationBiosample association - */ - def linkBiosampleToPublication(sampleAccession: String, doi: String): Future[PublicationBiosample] = - biosamplePublicationService.linkBiosampleToPublication(sampleAccession, doi) - - // ========================================================================== - // Deletion Operations (from BiosampleDataService and ExternalBiosampleService) - // ========================================================================== - - /** - * Fully deletes a biosample and all its associated data. - * - * @param biosampleId the internal ID of the biosample - * @param sampleGuid the GUID of the biosample - */ - def fullyDeleteBiosampleAndDependencies(biosampleId: Int, sampleGuid: UUID): Future[Unit] = - biosampleDataService.fullyDeleteBiosampleAndDependencies(biosampleId, sampleGuid) - - /** - * Deletes a biosample by accession if the citizen DID matches the owner. - * - * @param accession the sample accession - * @param citizenDid the DID of the requesting citizen - * @return true if deleted, false if not found or not authorized - */ - def deleteBiosample(accession: String, citizenDid: String): Future[Boolean] = - externalBiosampleService.deleteBiosample(accession, citizenDid) - - // ========================================================================== - // Reporting Operations (from BiosampleReportService) - // ========================================================================== - - /** - * Retrieves all biosamples for a publication. - * - * @param publicationId the publication ID - * @return sequence of biosamples with origin information - */ - def getBiosampleData(publicationId: Int): Future[Seq[BiosampleWithOrigin]] = - biosampleReportService.getBiosampleData(publicationId) - - /** - * Retrieves paginated biosamples for a publication. - * - * @param publicationId the publication ID - * @param page the page number (1-based) - * @param pageSize the number of items per page - * @return paginated result with biosamples - */ - def getPaginatedBiosampleData(publicationId: Int, page: Int, pageSize: Int): Future[PaginatedResult[BiosampleWithOrigin]] = - biosampleReportService.getPaginatedBiosampleData(publicationId, page, pageSize) - - // ========================================================================== - // Specialized Workflows - // ========================================================================== - - /** - * Creates an external biosample with all associated data. - * Handles both creation and update (upsert) patterns. - * - * @param request the external biosample request - * @return the GUID of the created/updated biosample - */ - def createExternalBiosample(request: ExternalBiosampleRequest): Future[UUID] = - externalBiosampleService.createBiosampleWithData(request) - - /** - * Creates a PGP-specific biosample with participant tracking. - * - * @param participantId the PGP participant ID - * @param description the biosample description - * @param centerName the center/biobank name - * @param sex optional biological sex - * @param latitude optional latitude coordinate - * @param longitude optional longitude coordinate - * @return the GUID of the created biosample - */ - def createPgpBiosample( - participantId: String, - description: String, - centerName: String, - sex: Option[BiologicalSex] = None, - latitude: Option[Double] = None, - longitude: Option[Double] = None - ): Future[UUID] = - pgpBiosampleService.createPgpBiosample(participantId, description, centerName, sex, latitude, longitude) -} diff --git a/app/services/BiosamplePublicationService.scala b/app/services/BiosamplePublicationService.scala deleted file mode 100644 index b3dee002..00000000 --- a/app/services/BiosamplePublicationService.scala +++ /dev/null @@ -1,69 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.publications.PublicationBiosample -import repositories.{BiosampleRepository, PublicationBiosampleRepository, PublicationRepository} - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service responsible for managing the association between biosamples and publications. - * This class provides functionality to link biosamples with publications based on their unique identifiers. - * - * @constructor Creates a new instance of `BiosamplePublicationService` and injects the required repositories. - * @param biosampleRepository Repository for handling operations related to biosamples. - * @param publicationRepository Repository for handling operations related to publications. - * @param publicationBiosampleRepository Repository for managing associations between publications and biosamples. - * @param ec Implicit `ExecutionContext` used for asynchronous operations. - */ -@Singleton -class BiosamplePublicationService @Inject()( - biosampleRepository: BiosampleRepository, - publicationRepository: PublicationRepository, - publicationBiosampleRepository: PublicationBiosampleRepository - )(implicit ec: ExecutionContext) { - - /** - * Links a biosample to a publication by their respective identifiers. - * - * @param sampleAccession The accession number of the biosample to be linked. - * @param doi The DOI (Digital Object Identifier) of the publication to be linked. - * The method will clean the DOI if it contains a URL. - * @return A `Future` containing the `PublicationBiosample` object that represents the association - * between the specified biosample and publication. - * The future will fail if the biosample or publication is not found, or if there is an issue - * creating the association. - */ - def linkBiosampleToPublication(sampleAccession: String, doi: String): Future[PublicationBiosample] = { - def cleanDoi(input: String): String = input.trim match { - case url if url.startsWith("https://doi.org/") => url.substring("https://doi.org/".length) - case url if url.startsWith("http://doi.org/") => url.substring("http://doi.org/".length) - case doi => doi - } - - for { - biosampleWithDonor <- biosampleRepository.findByAccession(sampleAccession).flatMap { - case Some(result) => Future.successful(result) - case None => Future.failed(new IllegalArgumentException(s"Biosample with accession $sampleAccession not found")) - } - publication <- publicationRepository.findByDoi(cleanDoi(doi)).flatMap { - case Some(pub) => Future.successful(pub) - case None => Future.failed(new IllegalArgumentException(s"Publication with DOI $doi not found")) - } - - (biosample, _) = biosampleWithDonor // Destructure the tuple to get just the biosample - - link <- publicationBiosampleRepository.create( - PublicationBiosample( - publicationId = publication.id.getOrElse( - throw new IllegalStateException("Publication ID is missing") - ), - biosampleId = biosample.id.getOrElse( - throw new IllegalStateException("Biosample ID is missing") - ) - ) - ) - } yield link - } - -} \ No newline at end of file diff --git a/app/services/BiosampleReportService.scala b/app/services/BiosampleReportService.scala deleted file mode 100644 index 413d3dcc..00000000 --- a/app/services/BiosampleReportService.scala +++ /dev/null @@ -1,42 +0,0 @@ -package services - -import models.api.{BiosampleWithOrigin, PaginatedResult} -import repositories.BiosampleRepository - -import javax.inject.Inject -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service responsible for providing biosample-related data and reports. - * - * @param biosampleRepository the repository used for retrieving biosample data - * @param ec the execution context in which the service operates - */ -class BiosampleReportService @Inject(biosampleRepository: BiosampleRepository)(implicit ec: ExecutionContext) { - /** - * Retrieves all biosamples with their origin metadata associated with a specific publication. - * - * @param publicationId the unique identifier of the publication for which biosamples are being queried - * @return a future containing a sequence of biosamples with their origin information - */ - def getBiosampleData(publicationId: Int): Future[Seq[BiosampleWithOrigin]] = - biosampleRepository.findBiosamplesWithOriginForPublication(publicationId) - - /** - * Retrieves a paginated list of biosamples with origin metadata associated with a specific publication. - * This method calculates the total number of biosamples, fetches the requested page of biosample data, - * and constructs a paginated result containing the items and metadata such as current page, page size, and total item count. - * - * @param publicationId the unique identifier of the publication for which biosamples are being queried - * @param page the page number to retrieve, starting from 1 - * @param pageSize the number of items to include on each page - * @return a future containing a `PaginatedResult` object, which includes the current page of biosample data, - * the page number, page size, and the total number of biosamples - */ - def getPaginatedBiosampleData(publicationId: Int, page: Int, pageSize: Int): Future[PaginatedResult[BiosampleWithOrigin]] = { - for { - totalItems <- biosampleRepository.countBiosamplesForPublication(publicationId) - items <- biosampleRepository.findPaginatedBiosamplesWithOriginForPublication(publicationId, page, pageSize) - } yield PaginatedResult(items, page, pageSize, totalItems) - } -} diff --git a/app/services/BiosampleService.scala b/app/services/BiosampleService.scala deleted file mode 100644 index 1efbe0d8..00000000 --- a/app/services/BiosampleService.scala +++ /dev/null @@ -1,111 +0,0 @@ -package services - -import com.vividsolutions.jts.geom.Point -import jakarta.inject.Inject -import models.domain.genomics.{BiologicalSex, Biosample, BiosampleType, SpecimenDonor} -import repositories.{BiosampleRepository, SpecimenDonorRepository} - -import java.util.UUID // Added import -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service that provides operations related to biosamples. - * - * This class interfaces with the BiosampleRepository to retrieve biosample data - * and provides functionality to access biosample information through various operations. - * - * @constructor Creates a new instance of the BiosampleService. - * @param biosampleRepository the repository used to perform operations on biosample data - */ -class BiosampleService @Inject()( - biosampleRepository: BiosampleRepository, - specimenDonorRepository: SpecimenDonorRepository - )(implicit ec: ExecutionContext) extends CoordinateValidation { - /** - * Retrieves a biosample by its unique identifier. - * - * This method interacts with the biosample repository to fetch a biosample - * entry associated with the given identifier. The result is wrapped in a Future - * to allow for asynchronous processing. - * - * @param id the unique identifier of the biosample to be retrieved - * @return a Future containing an optional biosample instance; None is returned if no match is found - */ - def getBiosampleById(id: Int): Future[Option[(Biosample, Option[SpecimenDonor])]] = biosampleRepository.findById(id) - - /** - * Creates a new SpecimenDonor or returns an existing one if a matching citizen/donorIdentifier is found. - * This method also handles coordinate validation. - */ - def createOrUpdateSpecimenDonor( - donorIdentifier: String, - originBiobank: String, - donorType: BiosampleType, - sex: Option[BiologicalSex], - latitude: Option[Double], - longitude: Option[Double], - pgpParticipantId: Option[String] = None, - atUri: Option[String] = None - ): Future[Option[Int]] = { - // Determine if a donor should be created based on provided PGP ID or coordinates - val shouldCreate = pgpParticipantId.isDefined || latitude.isDefined || longitude.isDefined || atUri.isDefined - - if (!shouldCreate) { - Future.successful(None) - } else { - // Check for existing citizen donor - val existingCitizenDonorFuture = (atUri, Some(donorIdentifier)) match { - case (Some(did), Some(identifier)) => - specimenDonorRepository.findByDidAndIdentifier(did, identifier) - case _ => Future.successful(None) - } - - existingCitizenDonorFuture.flatMap { - case Some(existingDonor) => Future.successful(existingDonor.id) - case None => - validateCoordinates(latitude, longitude).flatMap { (geocoord: Option[Point]) => // Fixed lambda syntax - val donor = SpecimenDonor( - id = None, - donorIdentifier = donorIdentifier, - originBiobank = originBiobank, - donorType = donorType, - sex = sex, - geocoord = geocoord, - pgpParticipantId = pgpParticipantId, - atUri = atUri, - dateRangeStart = None, - dateRangeEnd = None - ) - specimenDonorRepository.create(donor).map(_.id) - } - } - } - } - - /** - * Creates a new Biosample record. - */ - def createBiosample( - sampleGuid: UUID, - sampleAccession: String, - description: String, - alias: Option[String], - centerName: String, - specimenDonorId: Option[Int], - sourcePlatform: Option[String] - ): Future[Biosample] = { - val biosample = Biosample( - id = None, - sampleGuid = sampleGuid, - sampleAccession = sampleAccession, - description = description, - alias = alias, - centerName = centerName, - specimenDonorId = specimenDonorId, - locked = false, - sourcePlatform = sourcePlatform - ) - biosampleRepository.create(biosample) - } -} - diff --git a/app/services/BiosampleServiceException.scala b/app/services/BiosampleServiceException.scala deleted file mode 100644 index ee537774..00000000 --- a/app/services/BiosampleServiceException.scala +++ /dev/null @@ -1,26 +0,0 @@ -package services - -sealed trait BiosampleServiceException extends RuntimeException { - def message: String - - override def getMessage: String = message -} - -case class DuplicateAccessionException(accession: String) extends BiosampleServiceException { - override val message = s"A biosample with accession $accession already exists" -} - -case class DuplicateParticipantException(message: String) extends BiosampleServiceException - -case class InvalidCoordinatesException(latitude: Double, longitude: Double) extends BiosampleServiceException { - override val message = s"Invalid coordinates: latitude=$latitude, longitude=$longitude" -} - -case class SequenceDataValidationException(details: String) extends BiosampleServiceException { - override val message = s"Invalid sequence data: $details" -} - -case class PublicationLinkageException(details: String) extends BiosampleServiceException { - override val message = s"Failed to link publication: $details" -} - diff --git a/app/services/BiosampleUpdateService.scala b/app/services/BiosampleUpdateService.scala deleted file mode 100644 index b9418838..00000000 --- a/app/services/BiosampleUpdateService.scala +++ /dev/null @@ -1,172 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.api.{BiosampleUpdate, BiosampleView} -import models.domain.genomics.{Biosample, BiosampleType, HaplogroupResult, SpecimenDonor} -import models.domain.genomics.OriginalHaplogroupEntry -import repositories.{BiosampleOriginalHaplogroupRepository, BiosampleRepository, PublicationBiosampleRepository, SpecimenDonorRepository} -import utils.GeometryUtils - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service class responsible for updating biosamples and managing associated data. - * - * The `BiosampleUpdateService` provides methods for updating biosample records, - * handling related haplogroup updates, and ensuring consistency in the repository layer. - * - * @constructor Creates an instance of `BiosampleUpdateService` with the necessary repositories and execution context. - * @param biosampleRepository Repository for performing CRUD operations on biosamples. - * @param publicationBiosampleRepository Repository for managing the relationship between publications and biosamples. - * @param biosampleOriginalHaplogroupRepository Repository for handling the original haplogroup assignments for biosamples. - * @param ec Execution context used for asynchronous operations. - */ -@Singleton -class BiosampleUpdateService @Inject()( - biosampleRepository: BiosampleRepository, - publicationBiosampleRepository: PublicationBiosampleRepository, - biosampleOriginalHaplogroupRepository: BiosampleOriginalHaplogroupRepository, - specimenDonorRepository: SpecimenDonorRepository - )(implicit ec: ExecutionContext) { - - /** - * Updates a biosample with the given modifications if valid updates are provided. - * Handles updating optional fields of the biosample, and also ensures any - * haplogroup-related updates are processed if necessary. - * - * @param id The unique identifier of the biosample to be updated. - * @param update The object containing the updated fields for the biosample. - * @return A Future containing either a String message indicating an error - * (e.g., if the biosample is not found or update fails), or a BiosampleView - * representing the updated state of the biosample. - */ - def updateBiosample(id: Int, update: BiosampleUpdate): Future[Either[String, BiosampleView]] = { - if (!update.hasUpdates) { - Future.successful(Left("No valid fields to update")) - } else { - biosampleRepository.findById(id).flatMap { - case None => Future.successful(Left("Biosample not found")) - case Some((biosample, specimenDonor)) => - val updatedBiosample = createUpdatedBiosample(biosample, update) - - for { - _ <- updateHaplogroupsIfNeeded(id, update) - _ <- updateSpecimenDonorIfNeeded(biosample.specimenDonorId, specimenDonor, update) - updateResult <- biosampleRepository.update(updatedBiosample) - } yield { - if (updateResult) Right(BiosampleView.fromDomain(updatedBiosample, specimenDonor)) - else Left("Failed to update biosample") - } - } - } - } - - /** - * Creates an updated version of a Biosample by applying changes from a BiosampleUpdate object. - * Handles updates to optional fields and also updates the sample type if certain conditions are met. - * - * @param existing The existing Biosample object that serves as the baseline for updates. - * @param update The BiosampleUpdate object containing the modifications to apply to the existing Biosample. - * @return A new Biosample object reflecting the updates provided in the BiosampleUpdate. - */ - private def createUpdatedBiosample(existing: Biosample, update: BiosampleUpdate): Biosample = { - existing.copy( - alias = update.alias.orElse(existing.alias), - locked = update.locked.getOrElse(existing.locked) - ) - } - - private def updateSpecimenDonorIfNeeded( - specimenDonorId: Option[Int], - existingDonor: Option[SpecimenDonor], - update: BiosampleUpdate - ): Future[Unit] = { - if (hasSpecimenDonorUpdates(update)) { - (specimenDonorId, existingDonor) match { - case (Some(id), Some(donor)) => - // Update existing donor - val updatedDonor = donor.copy( - sex = update.sex.orElse(donor.sex), - geocoord = update.geoCoord.map(GeometryUtils.geoCoordToPoint).orElse(donor.geocoord), - dateRangeStart = update.dateRangeStart.orElse(donor.dateRangeStart), - dateRangeEnd = update.dateRangeEnd.orElse(donor.dateRangeEnd), - donorType = if (update.dateRangeStart.isDefined || update.dateRangeEnd.isDefined) { - BiosampleType.Ancient - } else { - donor.donorType - } - ) - specimenDonorRepository.update(updatedDonor).map(_ => ()) - - case (None, None) if shouldCreateNewDonor(update) => - // Create new donor if we have enough data - val newDonor = SpecimenDonor( - id = None, - donorIdentifier = s"DONOR_${java.util.UUID.randomUUID().toString}", - originBiobank = "Unknown", - donorType = if (update.dateRangeStart.isDefined || update.dateRangeEnd.isDefined) { - BiosampleType.Ancient - } else { - BiosampleType.Standard - }, - sex = update.sex, - geocoord = update.geoCoord.map(GeometryUtils.geoCoordToPoint), - dateRangeStart = update.dateRangeStart, - dateRangeEnd = update.dateRangeEnd - ) - specimenDonorRepository.create(newDonor).map(_ => ()) - - case _ => - Future.successful(()) // No updates needed - } - } else { - Future.successful(()) - } - } - - private def hasSpecimenDonorUpdates(update: BiosampleUpdate): Boolean = { - update.sex.isDefined || - update.geoCoord.isDefined || - update.dateRangeStart.isDefined || - update.dateRangeEnd.isDefined - } - - private def shouldCreateNewDonor(update: BiosampleUpdate): Boolean = { - // Create new donor only if we have at least two pieces of identifying information - val identifyingFields = Seq( - update.sex.isDefined, - update.geoCoord.isDefined, - update.dateRangeStart.isDefined || update.dateRangeEnd.isDefined - ) - identifyingFields.count(identity) >= 2 - } - - private def updateHaplogroupsIfNeeded(biosampleId: Int, update: BiosampleUpdate): Future[Unit] = { - if (update.yHaplogroup.isDefined || update.mtHaplogroup.isDefined) { - for { - pubBiosamples <- publicationBiosampleRepository.findByBiosampleId(biosampleId) - existingHaplogroups <- biosampleOriginalHaplogroupRepository.findByBiosampleId(biosampleId) - existingByPub = existingHaplogroups.map(h => h.publicationId -> h).toMap - _ <- Future.sequence(pubBiosamples.map { pubBiosample => - val entry = existingByPub.get(pubBiosample.publicationId) match { - case Some(existing) => - existing.copy( - yHaplogroupResult = update.yHaplogroup.orElse(existing.yHaplogroupResult), - mtHaplogroupResult = update.mtHaplogroup.orElse(existing.mtHaplogroupResult) - ) - case None => - OriginalHaplogroupEntry( - publicationId = pubBiosample.publicationId, - yHaplogroupResult = update.yHaplogroup, - mtHaplogroupResult = update.mtHaplogroup, - notes = None - ) - } - biosampleOriginalHaplogroupRepository.upsert(biosampleId, entry) - }) - } yield () - } else { - Future.successful(()) - } - } -} diff --git a/app/services/BranchAgeEstimationService.scala b/app/services/BranchAgeEstimationService.scala deleted file mode 100644 index 69ba40ae..00000000 --- a/app/services/BranchAgeEstimationService.scala +++ /dev/null @@ -1,382 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.HaplogroupType -import models.domain.haplogroups.{AgeEstimate, Haplogroup} -import play.api.Logging -import repositories.{BiosampleCallableLociRepository, HaplogroupCoreRepository, HaplogroupVariantRepository} - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * SNP-based branch age estimation using a Poisson mutation model. - * - * Core formula: P(t|m) = Poisson(m, t*b*µ) - * Where: - * t = time in years - * b = callable loci (base pairs of coverage) - * µ = SNP mutation rate per bp per year - * m = observed mutations (SNP count) - * - * The point estimate (median) comes from: t = m / (b * µ) - * 95% confidence intervals use Poisson quantiles. - */ -class BranchAgeEstimationService @Inject()( - haplogroupCoreRepo: HaplogroupCoreRepository, - haplogroupVariantRepo: HaplogroupVariantRepository, - callableLociRepo: BiosampleCallableLociRepository -)(implicit ec: ExecutionContext) extends Logging { - - // Default SNP mutation rate: ~8.33 × 10⁻¹⁰ SNPs/bp/year (Helgason 2015) - val DefaultMutationRate: Double = 8.33e-10 - val DefaultMutationRateSigma: Double = 0.4e-10 - - // Default callable loci for Y-DNA BigY-700 test (~15 Mbp) - val DefaultCallableLoci: Long = 15_000_000L - - /** - * Calculate age estimate for a single haplogroup based on its defining SNP count. - * - * @param haplogroupId The haplogroup to estimate age for - * @param callableLoci Callable base pairs (defaults to BigY-700 coverage) - * @param mutationRate SNP mutation rate per bp per year - * @return Age estimate with 95% confidence interval - */ - def calculateAge( - haplogroupId: Int, - callableLoci: Long = DefaultCallableLoci, - mutationRate: Double = DefaultMutationRate - ): Future[Option[AgeEstimateResult]] = { - for { - haplogroupOpt <- haplogroupCoreRepo.findById(haplogroupId) - variants <- haplogroupVariantRepo.getHaplogroupVariants(haplogroupId) - } yield { - haplogroupOpt.map { haplogroup => - val snpCount = variants.size - calculateFromSnpCount(snpCount, callableLoci, mutationRate, haplogroup.name) - } - } - } - - /** - * Calculate age from a raw SNP count. - */ - private[services] def calculateFromSnpCount( - snpCount: Int, - callableLoci: Long, - mutationRate: Double, - haplogroupName: String = "" - ): AgeEstimateResult = { - if (snpCount == 0) { - return AgeEstimateResult( - estimate = AgeEstimate(0, Some(0), Some(0)), - snpCount = 0, - callableLoci = callableLoci, - mutationRate = mutationRate, - method = "SNP_POISSON" - ) - } - - val lambda = callableLoci.toDouble * mutationRate - // Point estimate: t = m / (b * µ) - val pointEstimate = snpCount.toDouble / lambda - val pointYbp = math.round(pointEstimate).toInt - - // 95% confidence interval using Poisson quantiles - // Lower bound: use lower Poisson quantile for mutation count - // Upper bound: use upper Poisson quantile for mutation count - val (lowerSnps, upperSnps) = poissonConfidenceInterval(snpCount, 0.95) - val lowerYbp = math.round(lowerSnps / lambda).toInt - val upperYbp = math.round(upperSnps / lambda).toInt - - if (haplogroupName.nonEmpty) { - logger.debug(s"Age estimate for $haplogroupName: $pointYbp YBP ($lowerYbp–$upperYbp) from $snpCount SNPs") - } - - AgeEstimateResult( - estimate = AgeEstimate(pointYbp, Some(lowerYbp), Some(upperYbp)), - snpCount = snpCount, - callableLoci = callableLoci, - mutationRate = mutationRate, - method = "SNP_POISSON" - ) - } - - /** - * Calculate TMRCA between two sibling haplogroups. - * TMRCA = (SNPs_on_child1 + SNPs_on_child2) / (2 * b * µ) - */ - def calculateTmrca( - childId1: Int, - childId2: Int, - callableLoci: Long = DefaultCallableLoci, - mutationRate: Double = DefaultMutationRate - ): Future[Option[AgeEstimateResult]] = { - for { - variants1 <- haplogroupVariantRepo.getHaplogroupVariants(childId1) - variants2 <- haplogroupVariantRepo.getHaplogroupVariants(childId2) - } yield { - val totalSnps = variants1.size + variants2.size - if (totalSnps == 0) None - else { - val lambda = 2.0 * callableLoci.toDouble * mutationRate - val pointEstimate = totalSnps.toDouble / lambda - val pointYbp = math.round(pointEstimate).toInt - - val (lowerSnps, upperSnps) = poissonConfidenceInterval(totalSnps, 0.95) - val lowerYbp = math.round(lowerSnps / lambda).toInt - val upperYbp = math.round(upperSnps / lambda).toInt - - Some(AgeEstimateResult( - estimate = AgeEstimate(pointYbp, Some(lowerYbp), Some(upperYbp)), - snpCount = totalSnps, - callableLoci = callableLoci, - mutationRate = mutationRate, - method = "SNP_POISSON_TMRCA" - )) - } - } - } - - /** - * Recalculate ages for an entire subtree bottom-up. - * Applies causality constraint: parent must be older than any child. - */ - def recalculateSubtree( - rootId: Int, - callableLoci: Long = DefaultCallableLoci, - mutationRate: Double = DefaultMutationRate - ): Future[Seq[AgeUpdateResult]] = { - recalculateNode(rootId, callableLoci, mutationRate) - } - - /** - * Recursively calculate ages bottom-up, applying causality constraint. - */ - private def recalculateNode( - haplogroupId: Int, - callableLoci: Long, - mutationRate: Double - ): Future[Seq[AgeUpdateResult]] = { - for { - // First recurse into children - children <- haplogroupCoreRepo.getDirectChildren(haplogroupId) - childResults <- Future.sequence(children.flatMap(_.id).map { childId => - recalculateNode(childId, callableLoci, mutationRate) - }) - - // Calculate this node's age - resultOpt <- calculateAge(haplogroupId, callableLoci, mutationRate) - - // Apply causality: if any child is older, adjust this node - childAges = childResults.flatten.filter(_.haplogroupId == haplogroupId).flatMap(r => Some(r.newEstimate.ybp)) ++ - children.flatMap(_.id).flatMap { childId => - childResults.flatten.find(_.haplogroupId == childId).map(_.newEstimate.ybp) - } - maxChildAge = if (childAges.isEmpty) 0 else childAges.max - - adjustedResult = resultOpt.map { result => - if (result.estimate.ybp < maxChildAge) { - // Parent must be older than oldest child — adjust upward - val adjusted = result.copy( - estimate = result.estimate.copy( - ybp = maxChildAge + 1, // At least 1 year older - ybpLower = result.estimate.ybpLower.map(l => math.max(l, maxChildAge + 1)), - ybpUpper = result.estimate.ybpUpper - ), - method = "SNP_POISSON_CAUSALITY_ADJUSTED" - ) - logger.debug(s"Causality adjustment for haplogroup $haplogroupId: " + - s"${result.estimate.ybp} -> ${adjusted.estimate.ybp} YBP (child max: $maxChildAge)") - adjusted - } else result - } - - // Save result - thisUpdate = adjustedResult.map { result => - AgeUpdateResult( - haplogroupId = haplogroupId, - newEstimate = result.estimate, - previousEstimate = None, // Could look up existing, omitted for simplicity - method = result.method, - snpCount = result.snpCount - ) - }.toSeq - } yield childResults.flatten ++ thisUpdate - } - - /** - * Look up per-sample callable loci, falling back to the default if not available. - */ - def getCallableLociForSample(sampleGuid: UUID, chromosome: String = "chrY"): Future[Long] = { - callableLociRepo.findBySampleGuid(sampleGuid, chromosome).map { - case Some(loci) => loci.totalCallableBp - case None => DefaultCallableLoci - } - } - - /** - * Calculate age for a sample using its per-sample callable loci if available. - */ - def calculateAgeForSample( - haplogroupId: Int, - sampleGuid: UUID, - mutationRate: Double = DefaultMutationRate - ): Future[Option[AgeEstimateResult]] = { - for { - callableLoci <- getCallableLociForSample(sampleGuid) - result <- calculateAge(haplogroupId, callableLoci, mutationRate) - } yield result - } - - /** - * Combine SNP and STR age estimates using inverse-variance weighting. - * - * P(t|all) proportional to P(t|SNPs) x P(t|STRs) - * Approximated by inverse-variance weighted mean of the two point estimates. - */ - private[services] def combineSnpAndStrEstimates( - snpResult: AgeEstimateResult, - strResult: StrAgeEstimateResult - ): CombinedAgeEstimateResult = { - val snpEst = snpResult.estimate - val strEst = strResult.estimate - - // If either has zero age, use the other - if (snpEst.ybp == 0 && strEst.ybp == 0) { - return CombinedAgeEstimateResult( - estimate = AgeEstimate(0, Some(0), Some(0)), - snpEstimate = snpResult, - strEstimate = Some(strResult), - method = "COMBINED_SNP_STR" - ) - } - if (strEst.ybp == 0 || strResult.markerCount == 0) { - return CombinedAgeEstimateResult( - estimate = snpEst, snpEstimate = snpResult, strEstimate = Some(strResult), - method = "SNP_ONLY" - ) - } - if (snpEst.ybp == 0 || snpResult.snpCount == 0) { - return CombinedAgeEstimateResult( - estimate = strEst, snpEstimate = snpResult, strEstimate = Some(strResult), - method = "STR_ONLY" - ) - } - - // Inverse-variance weighting - // Variance approximated from CI width: sigma ≈ (upper - lower) / (2 * 1.96) - val snpSigma = ciToSigma(snpEst) - val strSigma = ciToSigma(strEst) - - if (snpSigma <= 0 || strSigma <= 0) { - // Fallback to simple average if CI is degenerate - val avg = (snpEst.ybp + strEst.ybp) / 2 - return CombinedAgeEstimateResult( - estimate = AgeEstimate(avg, snpEst.ybpLower, snpEst.ybpUpper), - snpEstimate = snpResult, strEstimate = Some(strResult), - method = "COMBINED_SNP_STR" - ) - } - - val snpWeight = 1.0 / (snpSigma * snpSigma) - val strWeight = 1.0 / (strSigma * strSigma) - val totalWeight = snpWeight + strWeight - - val combinedYbp = math.round((snpEst.ybp * snpWeight + strEst.ybp * strWeight) / totalWeight).toInt - val combinedSigma = math.sqrt(1.0 / totalWeight) - val combinedLower = math.max(0, math.round(combinedYbp - 1.96 * combinedSigma).toInt) - val combinedUpper = math.round(combinedYbp + 1.96 * combinedSigma).toInt - - CombinedAgeEstimateResult( - estimate = AgeEstimate(combinedYbp, Some(combinedLower), Some(combinedUpper)), - snpEstimate = snpResult, - strEstimate = Some(strResult), - method = "COMBINED_SNP_STR" - ) - } - - /** - * Extract sigma from an AgeEstimate's confidence interval. - */ - private def ciToSigma(est: AgeEstimate): Double = { - (est.ybpUpper, est.ybpLower) match { - case (Some(upper), Some(lower)) if upper > lower => - (upper - lower).toDouble / (2 * 1.96) - case _ => 0.0 - } - } - - /** - * Compute Poisson confidence interval for observed count m. - * Uses the chi-squared relationship: 2*sum(Poisson) ~ chi-squared(2m) - * - * For a 95% CI: - * - Lower: chi2_inv(0.025, 2m) / 2 - * - Upper: chi2_inv(0.975, 2(m+1)) / 2 - * - * Approximated using the Wilson-Hilferty transformation for chi-squared. - */ - private[services] def poissonConfidenceInterval(m: Int, confidence: Double): (Double, Double) = { - val alpha = 1.0 - confidence - - if (m == 0) { - // Special case: 0 observed mutations - // Lower bound is 0, upper bound from chi-squared - val upper = -0.5 * math.log(alpha / 2) // Simplified for m=0 - return (0.0, upper * 2) // Approximate - } - - // Normal approximation for Poisson CI (good for m >= 5) - // Exact would use chi-squared quantiles, but this is sufficient for Phase 1 - val z = 1.96 // z-score for 95% CI - val sqrtM = math.sqrt(m.toDouble) - val lower = math.max(0, m.toDouble - z * sqrtM) - val upper = m.toDouble + z * sqrtM - - (lower, upper) - } - - /** - * Temporal resolution in years per SNP for given callable loci. - */ - private[services] def temporalResolution( - callableLoci: Long, - mutationRate: Double = DefaultMutationRate - ): Double = { - 1.0 / (callableLoci.toDouble * mutationRate) - } -} - -/** - * Result of an age estimation calculation. - */ -case class AgeEstimateResult( - estimate: AgeEstimate, - snpCount: Int, - callableLoci: Long, - mutationRate: Double, - method: String -) - -/** - * Result of a combined SNP + STR age estimation. - */ -case class CombinedAgeEstimateResult( - estimate: AgeEstimate, - snpEstimate: AgeEstimateResult, - strEstimate: Option[StrAgeEstimateResult], - method: String -) - -/** - * Result of updating a haplogroup's age estimate. - */ -case class AgeUpdateResult( - haplogroupId: Int, - newEstimate: AgeEstimate, - previousEstimate: Option[AgeEstimate], - method: String, - snpCount: Int -) diff --git a/app/services/CachedSecretsManagerService.scala b/app/services/CachedSecretsManagerService.scala deleted file mode 100644 index 5cf57720..00000000 --- a/app/services/CachedSecretsManagerService.scala +++ /dev/null @@ -1,82 +0,0 @@ -package services - -import com.google.inject.Singleton -import config.AWSSecretsConfig -import software.amazon.awssdk.services.secretsmanager.SecretsManagerClient -import software.amazon.awssdk.services.secretsmanager.model.GetSecretValueRequest - -import java.time.Instant -import javax.inject.Inject -import scala.collection.concurrent.TrieMap -import scala.concurrent.ExecutionContext -import scala.concurrent.duration.* -import scala.util.Try - -/** - * A service for managing and caching secrets from AWS Secrets Manager. - * - * This service retrieves secrets, such as API keys, from AWS Secrets Manager - * and caches them locally for a specified duration to reduce the number of - * external API calls. Cached secrets are automatically refreshed after they expire. - * - * @constructor Creates a new instance of CachedSecretsManagerService. - * @param config Configuration object containing AWS region and secret name details. - * @param ec The implicit ExecutionContext for handling asynchronous operations. - */ -@Singleton -class CachedSecretsManagerService @Inject()( - config: AWSSecretsConfig - )(implicit ec: ExecutionContext) { - - private val client = SecretsManagerClient.builder() - .region(config.region) - .build() - - private val cache = TrieMap[String, (String, Instant)]() - private val CacheDuration = 1.hour - - /** - * Retrieves the cached API key if it exists and is not expired. If the cached key is expired - * or unavailable, retrieves a new API key from the AWS Secrets Manager, caches it, and returns it. - * - * @return An `Option` containing the API key as a `String`, or `None` if the key could not be retrieved. - */ - def getCachedApiKey: Option[String] = { - getSecret(config.apiKeySecretName) - } - - /** - * Retrieves the cached User Encryption Key for reversable email encryption. - */ - def getCachedUserEncryptionKey: Option[String] = { - getSecret(config.userEncryptionKeySecretName) - } - - private def getSecret(secretName: String): Option[String] = { - cache.get(secretName) match { - case Some((key, expiry)) if expiry.isAfter(Instant.now) => - Some(key) - case _ => - fetchSecret(secretName).toOption.map { key => - cache.put(secretName, (key, Instant.now.plusSeconds(CacheDuration.toSeconds))) - key - } - } - } - - private def getApiKey: Try[String] = { - // Deprecated: Use getSecret instead - fetchSecret(config.apiKeySecretName) - } - - private def fetchSecret(secretName: String): Try[String] = { - Try { - val request = GetSecretValueRequest.builder() - .secretId(secretName) - .build() - - val response = client.getSecretValue(request) - response.secretString() - } - } -} \ No newline at end of file diff --git a/app/services/ChipDataRegistrationService.scala b/app/services/ChipDataRegistrationService.scala deleted file mode 100644 index 2e09c689..00000000 --- a/app/services/ChipDataRegistrationService.scala +++ /dev/null @@ -1,175 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.atmosphere.{HaplogroupAssignments, VariantCall} -import models.domain.genomics.* -import play.api.Logging -import repositories.GenotypeDataRepository - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -case class ChipQualityAssessment( - overallQuality: String, - noCallRateAcceptable: Boolean, - yDnaCoverage: Option[String], - mtDnaCoverage: Option[String], - warnings: Seq[String] - ) - -case class ChipRegistrationResult( - genotypeDataId: Option[Int], - testTypeCode: String, - provider: String, - qualityAssessment: ChipQualityAssessment, - yHaplogroupAssigned: Option[String], - mtHaplogroupAssigned: Option[String], - yPrivateVariantsExtracted: Int, - mtPrivateVariantsExtracted: Int - ) - -@Singleton -class ChipDataRegistrationService @Inject()( - testTypeService: TestTypeService, - genotypeRepo: GenotypeDataRepository, - privateVariantService: PrivateVariantExtractionService - )(implicit ec: ExecutionContext) extends Logging { - - val MaxNoCallRate: Double = 0.05 - val MinMarkerCount: Int = 100000 - val MinYMarkersForHaplogroup: Int = 50 - val MinMtMarkersForHaplogroup: Int = 20 - - def assessQuality(metrics: GenotypeMetrics, testType: Option[TestTypeRow]): ChipQualityAssessment = { - val warnings = scala.collection.mutable.ArrayBuffer[String]() - - val noCallOk = metrics.noCallRate.forall(_ <= MaxNoCallRate) - if (!noCallOk) { - warnings += s"No-call rate ${metrics.noCallRate.getOrElse(0.0)} exceeds threshold $MaxNoCallRate" - } - - val totalMarkers = metrics.totalMarkersCalled.getOrElse(0) - if (totalMarkers < MinMarkerCount) { - warnings += s"Total markers called ($totalMarkers) below minimum ($MinMarkerCount)" - } - - val expectedMarkers = testType.flatMap(_.expectedMarkerCount) - expectedMarkers.foreach { expected => - val ratio = totalMarkers.toDouble / expected - if (ratio < 0.90) { - warnings += s"Marker count ($totalMarkers) is ${(ratio * 100).toInt}% of expected ($expected)" - } - } - - val yCoverage = metrics.yMarkersCalled.map { yCalled => - if (yCalled >= MinYMarkersForHaplogroup) "SUFFICIENT" - else if (yCalled > 0) "LIMITED" - else "NONE" - } - - val mtCoverage = metrics.mtMarkersCalled.map { mtCalled => - if (mtCalled >= MinMtMarkersForHaplogroup) "SUFFICIENT" - else if (mtCalled > 0) "LIMITED" - else "NONE" - } - - val quality = if (!noCallOk || totalMarkers < MinMarkerCount) "LOW" - else if (warnings.isEmpty) "HIGH" - else "MEDIUM" - - ChipQualityAssessment( - overallQuality = quality, - noCallRateAcceptable = noCallOk, - yDnaCoverage = yCoverage, - mtDnaCoverage = mtCoverage, - warnings = warnings.toSeq - ) - } - - def extractPrivateVariantsFromChip( - citizenBiosampleId: Int, - sampleGuid: UUID, - haplogroupAssignments: Option[HaplogroupAssignments] - ): Future[ChipVariantExtractionResult] = { - haplogroupAssignments match { - case None => Future.successful(ChipVariantExtractionResult(0, 0)) - case Some(assignments) => - val yFut = assignments.yDna match { - case Some(yResult) if yResult.privateVariants.flatMap(_.variants).exists(_.nonEmpty) => - val variants = yResult.privateVariants.get.variants.get - privateVariantService.extractFromCitizenBiosample( - citizenBiosampleId, sampleGuid, yResult.haplogroupName, - HaplogroupType.Y, variants - ).map(_.size).recover { - case e: Exception => - logger.warn(s"Failed to extract Y private variants for sample $sampleGuid: ${e.getMessage}") - 0 - } - case _ => Future.successful(0) - } - - val mtFut = assignments.mtDna match { - case Some(mtResult) if mtResult.privateVariants.flatMap(_.variants).exists(_.nonEmpty) => - val variants = mtResult.privateVariants.get.variants.get - privateVariantService.extractFromCitizenBiosample( - citizenBiosampleId, sampleGuid, mtResult.haplogroupName, - HaplogroupType.MT, variants - ).map(_.size).recover { - case e: Exception => - logger.warn(s"Failed to extract mtDNA private variants for sample $sampleGuid: ${e.getMessage}") - 0 - } - case _ => Future.successful(0) - } - - for { - yCount <- yFut - mtCount <- mtFut - } yield ChipVariantExtractionResult(yCount, mtCount) - } - } - - def validateChipData( - testTypeCode: String, - provider: String, - metrics: GenotypeMetrics - ): Future[Either[Seq[String], TestTypeRow]] = { - testTypeService.getByCode(testTypeCode).map { - case None => - Left(Seq(s"Unknown test type code: $testTypeCode")) - case Some(testType) => - val errors = scala.collection.mutable.ArrayBuffer[String]() - - if (testType.category != DataGenerationMethod.Genotyping) { - errors += s"Test type $testTypeCode is not a genotyping test" - } - - if (provider.isBlank) { - errors += "Provider is required" - } - - metrics.totalMarkersCalled match { - case Some(count) if count <= 0 => - errors += "Total markers called must be positive" - case None => - errors += "Total markers called is required" - case _ => // ok - } - - if (errors.isEmpty) Right(testType) - else Left(errors.toSeq) - } - } - - def findExistingByHash(sourceFileHash: String): Future[Option[GenotypeData]] = { - genotypeRepo.findBySourceFileHash(sourceFileHash) - } -} - -case class ChipVariantExtractionResult( - yPrivateVariants: Int, - mtPrivateVariants: Int - ) { - def total: Int = yPrivateVariants + mtPrivateVariants -} diff --git a/app/services/CoordinateValidation.scala b/app/services/CoordinateValidation.scala deleted file mode 100644 index 7d6ead7d..00000000 --- a/app/services/CoordinateValidation.scala +++ /dev/null @@ -1,23 +0,0 @@ -package services - -import com.vividsolutions.jts.geom.Point -import utils.GeometryUtils - -import scala.concurrent.Future - -trait CoordinateValidation { - def validateCoordinates(lat: Option[Double], lon: Option[Double]): Future[Option[Point]] = { - (lat, lon) match { - case (Some(latitude), Some(longitude)) => - if (latitude >= -90 && latitude <= 90 && longitude >= -180 && longitude <= 180) { - Future.successful(Some(GeometryUtils.createPoint(latitude, longitude))) - } else { - Future.failed(InvalidCoordinatesException(latitude, longitude)) - } - case (None, None) => Future.successful(None) - case _ => Future.failed(InvalidCoordinatesException( - lat.getOrElse(0.0), lon.getOrElse(0.0) - )) - } - } -} \ No newline at end of file diff --git a/app/services/CoverageExpectationService.scala b/app/services/CoverageExpectationService.scala deleted file mode 100644 index e506d374..00000000 --- a/app/services/CoverageExpectationService.scala +++ /dev/null @@ -1,233 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.* -import play.api.Logging -import repositories.{CoverageExpectationProfileRepository, TestTypeRepository} - -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class CoverageExpectationService @Inject()( - profileRepo: CoverageExpectationProfileRepository, - testTypeRepo: TestTypeRepository - )(implicit ec: ExecutionContext) extends Logging { - - def assessVariantCallingConfidence( - testTypeCode: String, - coverageMetrics: CoverageMetricsInput - ): Future[Option[SampleCoverageAssessment]] = { - testTypeRepo.findByCode(testTypeCode).flatMap { - case None => Future.successful(None) - case Some(testType) => - testType.id match { - case None => Future.successful(None) - case Some(ttId) => - val isChip = testType.category == DataGenerationMethod.Genotyping - profileRepo.findByTestTypeId(ttId).map { profiles => - if (profiles.isEmpty) None - else Some(buildAssessment(testType, profiles, coverageMetrics, isChip)) - } - } - } - } - - private[services] def buildAssessment( - testType: TestTypeRow, - profiles: Seq[CoverageExpectationProfile], - metrics: CoverageMetricsInput, - isChip: Boolean - ): SampleCoverageAssessment = { - val confidences = profiles.map { profile => - evaluateProfile(profile, metrics, isChip) - } - - val overallConfidence = deriveOverallConfidence(confidences) - - SampleCoverageAssessment( - testTypeCode = testType.code, - testTypeDisplayName = testType.displayName, - isChipBased = isChip, - confidences = confidences, - overallConfidence = overallConfidence - ) - } - - private[services] def evaluateProfile( - profile: CoverageExpectationProfile, - metrics: CoverageMetricsInput, - isChip: Boolean - ): VariantCallingConfidence = { - if (isChip) { - evaluateChipProfile(profile, metrics) - } else { - evaluateSequencingProfile(profile, metrics) - } - } - - private def evaluateChipProfile( - profile: CoverageExpectationProfile, - metrics: CoverageMetricsInput - ): VariantCallingConfidence = { - val confidence = metrics.markerCount match { - case Some(count) if count >= 500 => VariantCallingConfidence.HIGH - case Some(count) if count >= 100 => VariantCallingConfidence.MEDIUM - case Some(count) if count > 0 => VariantCallingConfidence.LOW - case _ => VariantCallingConfidence.INSUFFICIENT - } - - val details = metrics.markerCount.map(c => "markerCount" -> c.toString).toMap ++ - metrics.noCallRate.map(r => "noCallRate" -> f"$r%.4f") - - val noCallOk = metrics.noCallRate.forall(_ <= 0.05) - - VariantCallingConfidence( - contigName = profile.contigName, - variantClass = profile.variantClass, - depthConfidence = confidence, - coverageAdequate = true, - mappingQualityAdequate = true, - callableBasesAdequate = true, - overallConfidence = if (noCallOk) confidence else downgrade(confidence), - details = details - ) - } - - private def evaluateSequencingProfile( - profile: CoverageExpectationProfile, - metrics: CoverageMetricsInput - ): VariantCallingConfidence = { - val depthConf = metrics.meanDepth match { - case Some(depth) => profile.confidenceForDepth(depth) - case None => VariantCallingConfidence.INSUFFICIENT - } - - val coverageOk = (metrics.coveragePctAt1x, profile.minCoveragePct) match { - case (Some(actual), Some(expected)) => actual >= expected - case _ => true - } - - val mappingQualOk = (metrics.meanMappingQuality, profile.minMappingQuality) match { - case (Some(actual), Some(expected)) => actual >= expected - case _ => true - } - - val callableOk = (metrics.callablePct, profile.minCallablePct) match { - case (Some(actual), Some(expected)) => actual >= expected - case _ => true - } - - val penalties = Seq( - if (!coverageOk) 1 else 0, - if (!mappingQualOk) 1 else 0, - if (!callableOk) 1 else 0 - ).sum - - val overall = (0 until penalties).foldLeft(depthConf)((conf, _) => downgrade(conf)) - - val details = Map.empty[String, String] ++ - metrics.meanDepth.map(d => "meanDepth" -> f"$d%.1f") ++ - metrics.coveragePctAt1x.map(c => "coveragePctAt1x" -> f"$c%.4f") ++ - metrics.meanMappingQuality.map(q => "meanMappingQuality" -> f"$q%.1f") ++ - metrics.callablePct.map(p => "callablePct" -> f"$p%.4f") - - VariantCallingConfidence( - contigName = profile.contigName, - variantClass = profile.variantClass, - depthConfidence = depthConf, - coverageAdequate = coverageOk, - mappingQualityAdequate = mappingQualOk, - callableBasesAdequate = callableOk, - overallConfidence = overall, - details = details - ) - } - - private[services] def downgrade(confidence: String): String = confidence match { - case VariantCallingConfidence.HIGH => VariantCallingConfidence.MEDIUM - case VariantCallingConfidence.MEDIUM => VariantCallingConfidence.LOW - case _ => VariantCallingConfidence.INSUFFICIENT - } - - private[services] def deriveOverallConfidence(confidences: Seq[VariantCallingConfidence]): String = { - if (confidences.isEmpty) return VariantCallingConfidence.INSUFFICIENT - val levels = confidences.map(c => confidenceOrdinal(c.overallConfidence)) - confidenceFromOrdinal(levels.min) - } - - private def confidenceOrdinal(c: String): Int = c match { - case VariantCallingConfidence.HIGH => 3 - case VariantCallingConfidence.MEDIUM => 2 - case VariantCallingConfidence.LOW => 1 - case _ => 0 - } - - private def confidenceFromOrdinal(o: Int): String = o match { - case 3 => VariantCallingConfidence.HIGH - case 2 => VariantCallingConfidence.MEDIUM - case 1 => VariantCallingConfidence.LOW - case _ => VariantCallingConfidence.INSUFFICIENT - } - - def getProfilesForTestType(testTypeCode: String): Future[Seq[CoverageExpectationProfile]] = { - testTypeRepo.findByCode(testTypeCode).flatMap { - case None => Future.successful(Seq.empty) - case Some(tt) => tt.id match { - case None => Future.successful(Seq.empty) - case Some(ttId) => profileRepo.findByTestTypeId(ttId) - } - } - } - - def getConfidenceForVariant( - testTypeCode: String, - contigName: String, - variantClass: String, - meanDepth: Double - ): Future[Option[String]] = { - testTypeRepo.findByCode(testTypeCode).flatMap { - case None => Future.successful(None) - case Some(tt) => tt.id match { - case None => Future.successful(None) - case Some(ttId) => - profileRepo.findByTestTypeContigAndClass(ttId, contigName, variantClass).map { - case None => None - case Some(profile) => Some(profile.confidenceForDepth(meanDepth)) - } - } - } - } -} - -case class CoverageMetricsInput( - meanDepth: Option[Double] = None, - coveragePctAt1x: Option[Double] = None, - meanMappingQuality: Option[Double] = None, - callablePct: Option[Double] = None, - markerCount: Option[Int] = None, - noCallRate: Option[Double] = None - ) - -object CoverageMetricsInput { - def fromEmbeddedCoverage(ec: EmbeddedCoverage, callableLoci: Option[BiosampleCallableLoci] = None): CoverageMetricsInput = { - val callablePct = for { - callable <- ec.basesCallable - total <- ec.basesCallable.map(_ + ec.basesNoCoverage.getOrElse(0L) + ec.basesLowQualityMapping.getOrElse(0L)) - if total > 0 - } yield callable.toDouble / total - - CoverageMetricsInput( - meanDepth = ec.meanDepth, - coveragePctAt1x = ec.percentCoverageAt1x, - meanMappingQuality = ec.meanMappingQuality, - callablePct = callablePct - ) - } - - def fromGenotypeMetrics(gm: GenotypeMetrics): CoverageMetricsInput = { - CoverageMetricsInput( - markerCount = gm.totalMarkersCalled, - noCallRate = gm.noCallRate - ) - } -} diff --git a/app/services/CuratorAuditService.scala b/app/services/CuratorAuditService.scala deleted file mode 100644 index 35d67d36..00000000 --- a/app/services/CuratorAuditService.scala +++ /dev/null @@ -1,509 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.domain.genomics.VariantV2 -import models.domain.curator.AuditLogEntry -import models.domain.haplogroups.{ChangeSet, ChangeSetStatus, Haplogroup, HaplogroupVariantMetadata, TreeChange} -import play.api.Logging -import play.api.libs.json.* -import repositories.{CuratorAuditRepository, HaplogroupVariantMetadataRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for managing curator audit logging. - * Provides methods to log create, update, and delete actions for haplogroups and variants, - * as well as retrieve audit history. - */ -@Singleton -class CuratorAuditService @Inject()( - auditRepository: CuratorAuditRepository, - haplogroupVariantMetadataRepository: HaplogroupVariantMetadataRepository -)(implicit ec: ExecutionContext) extends Logging { - - // JSON formats for domain objects - private given Format[HaplogroupType] = Format( - Reads.StringReads.map(s => HaplogroupType.fromString(s).getOrElse(HaplogroupType.Y)), - Writes.StringWrites.contramap(_.toString) - ) - - private given Format[LocalDateTime] = Format( - Reads.localDateTimeReads("yyyy-MM-dd'T'HH:mm:ss"), - Writes.temporalWrites[LocalDateTime, java.time.format.DateTimeFormatter]( - java.time.format.DateTimeFormatter.ISO_LOCAL_DATE_TIME - ) - ) - - private given Format[Haplogroup] = Json.format[Haplogroup] - - // Helper to convert VariantV2 to JSON for audit logging - private def variantV2ToJson(variant: VariantV2): JsValue = Json.obj( - "variantId" -> variant.variantId, - "canonicalName" -> variant.canonicalName, - "mutationType" -> variant.mutationType, - "namingStatus" -> variant.namingStatus, - "aliases" -> variant.aliases, - "coordinates" -> variant.coordinates, - "notes" -> variant.notes - ) - - // === Haplogroup Audit Methods === - - /** - * Log haplogroup creation. - */ - def logHaplogroupCreate( - userId: UUID, - haplogroup: Haplogroup, - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val entry = AuditLogEntry( - userId = userId, - entityType = "haplogroup", - entityId = haplogroup.id.getOrElse(0), - action = "create", - oldValue = None, - newValue = Some(Json.toJson(haplogroup)), - comment = comment - ) - auditRepository.logAction(entry) - } - - /** - * Log haplogroup update. - */ - def logHaplogroupUpdate( - userId: UUID, - oldHaplogroup: Haplogroup, - newHaplogroup: Haplogroup, - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val entry = AuditLogEntry( - userId = userId, - entityType = "haplogroup", - entityId = oldHaplogroup.id.getOrElse(0), - action = "update", - oldValue = Some(Json.toJson(oldHaplogroup)), - newValue = Some(Json.toJson(newHaplogroup)), - comment = comment - ) - auditRepository.logAction(entry) - } - - /** - * Log haplogroup soft-delete. - */ - def logHaplogroupDelete( - userId: UUID, - haplogroup: Haplogroup, - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val entry = AuditLogEntry( - userId = userId, - entityType = "haplogroup", - entityId = haplogroup.id.getOrElse(0), - action = "delete", - oldValue = Some(Json.toJson(haplogroup)), - newValue = None, - comment = comment - ) - auditRepository.logAction(entry) - } - - // === Variant Audit Methods === - - /** - * Log variant creation. - */ - def logVariantCreate( - userId: UUID, - variant: VariantV2, - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val entry = AuditLogEntry( - userId = userId, - entityType = "variant", - entityId = variant.variantId.getOrElse(0), - action = "create", - oldValue = None, - newValue = Some(variantV2ToJson(variant)), - comment = comment - ) - auditRepository.logAction(entry) - } - - /** - * Log variant update. - */ - def logVariantUpdate( - userId: UUID, - oldVariant: VariantV2, - newVariant: VariantV2, - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val entry = AuditLogEntry( - userId = userId, - entityType = "variant", - entityId = oldVariant.variantId.getOrElse(0), - action = "update", - oldValue = Some(variantV2ToJson(oldVariant)), - newValue = Some(variantV2ToJson(newVariant)), - comment = comment - ) - auditRepository.logAction(entry) - } - - /** - * Log variant deletion. - */ - def logVariantDelete( - userId: UUID, - variant: VariantV2, - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val entry = AuditLogEntry( - userId = userId, - entityType = "variant", - entityId = variant.variantId.getOrElse(0), - action = "delete", - oldValue = Some(variantV2ToJson(variant)), - newValue = None, - comment = comment - ) - auditRepository.logAction(entry) - } - - // === History Retrieval Methods === - - /** - * Get audit history for a specific haplogroup. - */ - def getHaplogroupHistory(haplogroupId: Int): Future[Seq[AuditLogEntry]] = { - auditRepository.getEntityHistory("haplogroup", haplogroupId) - } - - /** - * Get audit history for a specific variant. - */ - def getVariantHistory(variantId: Int): Future[Seq[AuditLogEntry]] = { - auditRepository.getEntityHistory("variant", variantId) - } - - /** - * Get recent audit actions across all entities. - */ - def getRecentActions(limit: Int = 50, offset: Int = 0): Future[Seq[AuditLogEntry]] = { - auditRepository.getRecentActions(limit, offset) - } - - /** - * Get audit actions by a specific user. - */ - def getActionsByUser(userId: UUID, limit: Int = 50, offset: Int = 0): Future[Seq[AuditLogEntry]] = { - auditRepository.getActionsByUser(userId, limit, offset) - } - - // === Haplogroup-Variant Association Audit Methods === - - /** - * Log when a variant is added to a haplogroup. - */ - def logVariantAddedToHaplogroup( - author: String, - haplogroupVariantId: Int, - comment: Option[String] = None - ): Future[Int] = { - val metadata = HaplogroupVariantMetadata( - haplogroup_variant_id = haplogroupVariantId, - revision_id = 1, - author = author, - timestamp = LocalDateTime.now(), - comment = comment.getOrElse("Added via curator interface"), - change_type = "add", - previous_revision_id = None - ) - haplogroupVariantMetadataRepository.addVariantRevisionMetadata(metadata) - } - - /** - * Log when a variant is removed from a haplogroup. - */ - def logVariantRemovedFromHaplogroup( - author: String, - haplogroupVariantId: Int, - comment: Option[String] = None - ): Future[Int] = { - // Get the latest revision to link to - haplogroupVariantMetadataRepository.getVariantRevisionHistory(haplogroupVariantId).flatMap { history => - val latestRevisionId = history.headOption.map(_._2.revision_id) - val nextRevisionId = latestRevisionId.map(_ + 1).getOrElse(1) - - val metadata = HaplogroupVariantMetadata( - haplogroup_variant_id = haplogroupVariantId, - revision_id = nextRevisionId, - author = author, - timestamp = LocalDateTime.now(), - comment = comment.getOrElse("Removed via curator interface"), - change_type = "remove", - previous_revision_id = latestRevisionId - ) - haplogroupVariantMetadataRepository.addVariantRevisionMetadata(metadata) - } - } - - /** - * Get revision history for a haplogroup-variant association. - */ - def getHaplogroupVariantHistory(haplogroupVariantId: Int): Future[Seq[HaplogroupVariantMetadata]] = { - haplogroupVariantMetadataRepository.getVariantRevisionHistory(haplogroupVariantId).map(_.map(_._2)) - } - - // === Tree Versioning Audit Methods === - - /** - * Log change set creation. - */ - def logChangeSetCreate( - curatorId: String, - changeSet: ChangeSet, - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val details = Json.obj( - "changeSetId" -> changeSet.id, - "name" -> changeSet.name, - "haplogroupType" -> changeSet.haplogroupType.toString, - "sourceName" -> changeSet.sourceName, - "status" -> changeSet.status.toString - ) - val entry = AuditLogEntry( - userId = curatorIdToUuid(curatorId), - entityType = "change_set", - entityId = changeSet.id.getOrElse(0), - action = "create", - oldValue = None, - newValue = Some(details), - comment = comment - ) - auditRepository.logAction(entry) - } - - /** - * Log change set status transition. - */ - def logChangeSetStatusChange( - curatorId: String, - changeSetId: Int, - oldStatus: ChangeSetStatus, - newStatus: ChangeSetStatus, - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val entry = AuditLogEntry( - userId = curatorIdToUuid(curatorId), - entityType = "change_set", - entityId = changeSetId, - action = "status_change", - oldValue = Some(Json.obj("status" -> oldStatus.toString)), - newValue = Some(Json.obj("status" -> newStatus.toString)), - comment = comment - ) - auditRepository.logAction(entry) - } - - /** - * Log change set applied to production. - */ - def logChangeSetApply( - curatorId: String, - changeSet: ChangeSet, - appliedChangesCount: Int, - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val details = Json.obj( - "changeSetId" -> changeSet.id, - "name" -> changeSet.name, - "haplogroupType" -> changeSet.haplogroupType.toString, - "appliedChanges" -> appliedChangesCount, - "statistics" -> Json.obj( - "nodesCreated" -> changeSet.statistics.nodesCreated, - "nodesUpdated" -> changeSet.statistics.nodesUpdated, - "variantsAdded" -> changeSet.statistics.variantsAdded - ) - ) - val entry = AuditLogEntry( - userId = curatorIdToUuid(curatorId), - entityType = "change_set", - entityId = changeSet.id.getOrElse(0), - action = "apply", - oldValue = Some(Json.obj("status" -> ChangeSetStatus.UnderReview.toString)), - newValue = Some(details), - comment = comment - ) - auditRepository.logAction(entry) - } - - /** - * Log change set discarded. - */ - def logChangeSetDiscard( - curatorId: String, - changeSet: ChangeSet, - reason: String - ): Future[AuditLogEntry] = { - val details = Json.obj( - "changeSetId" -> changeSet.id, - "name" -> changeSet.name, - "reason" -> reason - ) - val entry = AuditLogEntry( - userId = curatorIdToUuid(curatorId), - entityType = "change_set", - entityId = changeSet.id.getOrElse(0), - action = "discard", - oldValue = Some(Json.obj("status" -> changeSet.status.toString)), - newValue = Some(details), - comment = Some(reason) - ) - auditRepository.logAction(entry) - } - - /** - * Log individual change review action. - */ - def logChangeReview( - curatorId: String, - change: TreeChange, - action: String, - notes: Option[String] = None - ): Future[AuditLogEntry] = { - val details = Json.obj( - "changeId" -> change.id, - "changeSetId" -> change.changeSetId, - "changeType" -> change.changeType.toString, - "reviewAction" -> action - ) - val entry = AuditLogEntry( - userId = curatorIdToUuid(curatorId), - entityType = "tree_change", - entityId = change.id.getOrElse(0), - action = s"review_$action".toLowerCase, - oldValue = Some(Json.obj("status" -> change.status.toString)), - newValue = Some(details), - comment = notes - ) - auditRepository.logAction(entry) - } - - /** - * Get audit history for a change set. - */ - def getChangeSetHistory(changeSetId: Int): Future[Seq[AuditLogEntry]] = { - auditRepository.getEntityHistory("change_set", changeSetId) - } - - // Helper to convert curator ID string to UUID - private def curatorIdToUuid(curatorId: String): UUID = { - try { - UUID.fromString(curatorId) - } catch { - case _: IllegalArgumentException => - // Generate a deterministic UUID from the curator ID string - UUID.nameUUIDFromBytes(curatorId.getBytes("UTF-8")) - } - } - - // === Tree Restructuring Audit Methods === - - /** - * Log a branch split operation. - */ - def logBranchSplit( - userId: UUID, - parentId: Int, - newHaplogroupId: Int, - movedVariantCount: Int, - movedChildIds: Seq[Int], - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val details = Json.obj( - "operation" -> "split", - "parentId" -> parentId, - "newHaplogroupId" -> newHaplogroupId, - "movedVariantCount" -> movedVariantCount, - "movedChildIds" -> movedChildIds - ) - val entry = AuditLogEntry( - userId = userId, - entityType = "haplogroup", - entityId = newHaplogroupId, - action = "split", - oldValue = None, - newValue = Some(details), - comment = comment - ) - auditRepository.logAction(entry) - } - - /** - * Log a merge into parent operation. - */ - def logMergeIntoParent( - userId: UUID, - parentId: Int, - absorbedChildId: Int, - movedVariantCount: Int, - promotedChildCount: Int, - comment: Option[String] = None - ): Future[AuditLogEntry] = { - val details = Json.obj( - "operation" -> "merge", - "parentId" -> parentId, - "absorbedChildId" -> absorbedChildId, - "movedVariantCount" -> movedVariantCount, - "promotedChildCount" -> promotedChildCount - ) - val entry = AuditLogEntry( - userId = userId, - entityType = "haplogroup", - entityId = parentId, - action = "merge", - oldValue = Some(Json.obj("absorbedChildId" -> absorbedChildId)), - newValue = Some(details), - comment = comment - ) - auditRepository.logAction(entry) - } - - /** - * Log a reparent operation (moving a haplogroup to a new parent). - */ - def logHaplogroupReparent( - userId: UUID, - haplogroup: Haplogroup, - oldParent: Option[Haplogroup], - newParent: Haplogroup, - reason: Option[String] = None - ): Future[AuditLogEntry] = { - val details = Json.obj( - "operation" -> "reparent", - "haplogroupId" -> haplogroup.id, - "haplogroupName" -> haplogroup.name, - "oldParentId" -> oldParent.flatMap(_.id), - "oldParentName" -> oldParent.map(_.name), - "newParentId" -> newParent.id, - "newParentName" -> newParent.name - ) - val entry = AuditLogEntry( - userId = userId, - entityType = "haplogroup", - entityId = haplogroup.id.getOrElse(0), - action = "reparent", - oldValue = oldParent.map(p => Json.obj("parentId" -> p.id, "parentName" -> p.name)), - newValue = Some(details), - comment = reason - ) - auditRepository.logAction(entry) - } -} diff --git a/app/services/DiscoveryProposalService.scala b/app/services/DiscoveryProposalService.scala deleted file mode 100644 index 59d7261d..00000000 --- a/app/services/DiscoveryProposalService.scala +++ /dev/null @@ -1,213 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.HaplogroupType -import models.domain.discovery.* -import play.api.Logging -import play.api.libs.json.Json -import repositories.{CuratorActionRepository, ProposedBranchRepository} - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for curator operations on discovery proposals. - * Handles accept/reject workflows with full audit trail. - */ -class DiscoveryProposalService @Inject()( - proposedBranchRepo: ProposedBranchRepository, - curatorActionRepo: CuratorActionRepository -)(implicit ec: ExecutionContext) extends Logging { - - /** - * List proposals with optional filters. - */ - def listProposals( - haplogroupType: Option[HaplogroupType], - status: Option[ProposedBranchStatus] - ): Future[Seq[ProposedBranch]] = { - status match { - case Some(s) => proposedBranchRepo.findByStatus(s, haplogroupType) - case None => - // Return all non-terminal statuses by default - for { - pending <- proposedBranchRepo.findByStatus(ProposedBranchStatus.Pending, haplogroupType) - ready <- proposedBranchRepo.findByStatus(ProposedBranchStatus.ReadyForReview, haplogroupType) - review <- proposedBranchRepo.findByStatus(ProposedBranchStatus.UnderReview, haplogroupType) - accepted <- proposedBranchRepo.findByStatus(ProposedBranchStatus.Accepted, haplogroupType) - } yield pending ++ ready ++ review ++ accepted - } - } - - /** - * Get proposal details including variants and evidence. - */ - def getProposalDetails(proposalId: Int): Future[Option[ProposalDetailsView]] = { - proposedBranchRepo.findById(proposalId).flatMap { - case None => Future.successful(None) - case Some(proposal) => - for { - variants <- proposedBranchRepo.getVariants(proposalId) - evidence <- proposedBranchRepo.getEvidence(proposalId) - actions <- curatorActionRepo.findByTarget(CuratorTargetType.ProposedBranch, proposalId) - } yield Some(ProposalDetailsView(proposal, variants, evidence, actions)) - } - } - - /** - * Accept a proposal. Sets status to Accepted and records audit trail. - */ - def acceptProposal( - proposalId: Int, - curatorId: String, - proposedName: String, - reason: Option[String] - ): Future[ProposedBranch] = { - for { - proposalOpt <- proposedBranchRepo.findById(proposalId) - proposal = proposalOpt.getOrElse( - throw new NoSuchElementException(s"Proposal $proposalId not found") - ) - _ = validateStatusTransition(proposal.status, ProposedBranchStatus.Accepted) - - previousState = Json.toJson(proposal) - now = LocalDateTime.now() - updated = proposal.copy( - status = ProposedBranchStatus.Accepted, - proposedName = Some(proposedName), - reviewedAt = Some(now), - reviewedBy = Some(curatorId), - notes = reason.orElse(proposal.notes), - updatedAt = now - ) - _ <- proposedBranchRepo.update(updated) - - _ <- curatorActionRepo.create(CuratorAction( - curatorId = curatorId, - actionType = CuratorActionType.Accept, - targetType = CuratorTargetType.ProposedBranch, - targetId = proposalId, - previousState = Some(previousState), - newState = Some(Json.toJson(updated)), - reason = reason - )) - - _ = logger.info(s"Curator $curatorId accepted proposal $proposalId as '$proposedName'") - } yield updated - } - - /** - * Reject a proposal with a reason. Records audit trail. - */ - def rejectProposal( - proposalId: Int, - curatorId: String, - reason: String - ): Future[ProposedBranch] = { - for { - proposalOpt <- proposedBranchRepo.findById(proposalId) - proposal = proposalOpt.getOrElse( - throw new NoSuchElementException(s"Proposal $proposalId not found") - ) - _ = validateStatusTransition(proposal.status, ProposedBranchStatus.Rejected) - - previousState = Json.toJson(proposal) - now = LocalDateTime.now() - updated = proposal.copy( - status = ProposedBranchStatus.Rejected, - reviewedAt = Some(now), - reviewedBy = Some(curatorId), - notes = Some(reason), - updatedAt = now - ) - _ <- proposedBranchRepo.update(updated) - - _ <- curatorActionRepo.create(CuratorAction( - curatorId = curatorId, - actionType = CuratorActionType.Reject, - targetType = CuratorTargetType.ProposedBranch, - targetId = proposalId, - previousState = Some(previousState), - newState = Some(Json.toJson(updated)), - reason = Some(reason) - )) - - _ = logger.info(s"Curator $curatorId rejected proposal $proposalId: $reason") - } yield updated - } - - /** - * Start review of a proposal (transition to UnderReview). - */ - def startReview( - proposalId: Int, - curatorId: String - ): Future[ProposedBranch] = { - for { - proposalOpt <- proposedBranchRepo.findById(proposalId) - proposal = proposalOpt.getOrElse( - throw new NoSuchElementException(s"Proposal $proposalId not found") - ) - _ = validateStatusTransition(proposal.status, ProposedBranchStatus.UnderReview) - - now = LocalDateTime.now() - updated = proposal.copy( - status = ProposedBranchStatus.UnderReview, - reviewedBy = Some(curatorId), - updatedAt = now - ) - _ <- proposedBranchRepo.update(updated) - - _ <- curatorActionRepo.create(CuratorAction( - curatorId = curatorId, - actionType = CuratorActionType.Review, - targetType = CuratorTargetType.ProposedBranch, - targetId = proposalId, - reason = Some("Started review") - )) - - _ = logger.info(s"Curator $curatorId started review of proposal $proposalId") - } yield updated - } - - /** - * Get audit trail for a proposal. - */ - def getAuditTrail(proposalId: Int): Future[Seq[CuratorAction]] = - curatorActionRepo.findByTarget(CuratorTargetType.ProposedBranch, proposalId) - - /** - * Validate that a status transition is allowed. - */ - private[services] def validateStatusTransition( - current: ProposedBranchStatus, - target: ProposedBranchStatus - ): Unit = { - val allowed = current match { - case ProposedBranchStatus.Pending => Set(ProposedBranchStatus.ReadyForReview, ProposedBranchStatus.Rejected) - case ProposedBranchStatus.ReadyForReview => Set(ProposedBranchStatus.UnderReview, ProposedBranchStatus.Rejected) - case ProposedBranchStatus.UnderReview => Set(ProposedBranchStatus.Accepted, ProposedBranchStatus.Rejected, ProposedBranchStatus.Split) - case ProposedBranchStatus.Accepted => Set(ProposedBranchStatus.Promoted, ProposedBranchStatus.Rejected) - case _ => Set.empty[ProposedBranchStatus] - } - if (!allowed.contains(target)) { - throw new IllegalStateException( - s"Cannot transition proposal from $current to $target. Allowed: ${allowed.mkString(", ")}" - ) - } - } -} - -/** - * View model combining proposal with its variants, evidence, and audit trail. - */ -case class ProposalDetailsView( - proposal: ProposedBranch, - variants: Seq[ProposedBranchVariant], - evidence: Seq[ProposedBranchEvidence], - auditTrail: Seq[CuratorAction] -) - -object ProposalDetailsView { - implicit val format: play.api.libs.json.OFormat[ProposalDetailsView] = play.api.libs.json.Json.format -} diff --git a/app/services/EmailService.scala b/app/services/EmailService.scala deleted file mode 100644 index 8711e7ad..00000000 --- a/app/services/EmailService.scala +++ /dev/null @@ -1,22 +0,0 @@ -package services - -/** - * A trait representing an email service that allows sending emails. - */ -trait EmailService { - /** - * Sends an email with the specified details. - * - * @param to the list of recipient email addresses - * @param from the sender's email address - * @param subject the subject of the email - * @param body the body content of the email - * @return Either a String containing an error message in case of failure, or a Unit upon successful email delivery - */ - def sendEmail( - to: Seq[String], - from: String, - subject: String, - body: String - ): Either[String, Unit] -} \ No newline at end of file diff --git a/app/services/EncryptionService.scala b/app/services/EncryptionService.scala deleted file mode 100644 index 71c6c01d..00000000 --- a/app/services/EncryptionService.scala +++ /dev/null @@ -1,75 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import play.api.Logging - -import java.nio.charset.StandardCharsets -import java.security.{GeneralSecurityException, InvalidKeyException} -import java.util.Base64 -import javax.crypto.{BadPaddingException, Cipher, IllegalBlockSizeException} -import javax.crypto.spec.SecretKeySpec - -@Singleton -class EncryptionService @Inject()( - secretsManagerService: CachedSecretsManagerService - ) extends Logging { - - private val ALGORITHM = "AES" - - /** - * Encrypts a plain text string using AES. - * - * @param plainText The text to encrypt. - * @return Option[String] The encrypted string (Base64 encoded), or None if encryption fails or key is missing. - */ - def encrypt(plainText: String): Option[String] = { - secretsManagerService.getCachedUserEncryptionKey.flatMap { keyString => - try { - val key = new SecretKeySpec(keyString.getBytes(StandardCharsets.UTF_8), ALGORITHM) - val cipher = Cipher.getInstance(ALGORITHM) - cipher.init(Cipher.ENCRYPT_MODE, key) - val encryptedBytes = cipher.doFinal(plainText.getBytes(StandardCharsets.UTF_8)) - Some(Base64.getEncoder.encodeToString(encryptedBytes)) - } catch { - case e: InvalidKeyException => - logger.error("Encryption failed: invalid key", e) - None - case e: GeneralSecurityException => - logger.error("Encryption failed", e) - None - } - } - } - - /** - * Decrypts an encrypted string (Base64 encoded) using AES. - * - * @param encryptedText The encrypted string. - * @return Option[String] The decrypted plain text, or None if decryption fails or key is missing. - */ - def decrypt(encryptedText: String): Option[String] = { - secretsManagerService.getCachedUserEncryptionKey.flatMap { keyString => - try { - val key = new SecretKeySpec(keyString.getBytes(StandardCharsets.UTF_8), ALGORITHM) - val cipher = Cipher.getInstance(ALGORITHM) - cipher.init(Cipher.DECRYPT_MODE, key) - val decodedBytes = Base64.getDecoder.decode(encryptedText) - val decryptedBytes = cipher.doFinal(decodedBytes) - Some(new String(decryptedBytes, StandardCharsets.UTF_8)) - } catch { - case e: IllegalArgumentException => - logger.error("Decryption failed: invalid Base64 input", e) - None - case e: InvalidKeyException => - logger.error("Decryption failed: invalid key", e) - None - case e: (BadPaddingException | IllegalBlockSizeException) => - logger.error("Decryption failed: corrupted data", e) - None - case e: GeneralSecurityException => - logger.error("Decryption failed", e) - None - } - } - } -} diff --git a/app/services/ExternalBiosampleService.scala b/app/services/ExternalBiosampleService.scala deleted file mode 100644 index 1531836b..00000000 --- a/app/services/ExternalBiosampleService.scala +++ /dev/null @@ -1,138 +0,0 @@ -package services - -import com.vividsolutions.jts.geom.Point -import jakarta.inject.{Inject, Singleton} -import models.api.ExternalBiosampleRequest -import models.domain.genomics.{Biosample, BiosampleType, SpecimenDonor} -import repositories.{BiosampleRepository, SpecimenDonorRepository} -import utils.GeometryUtils - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * ExternalBiosampleService provides functionality for creating a new biosample - * and associating it with its metadata and sequence data within the system. - * - * @constructor Creates an instance of ExternalBiosampleService with the required - * dependencies injected. - * @param biosampleRepository an instance of BiosampleRepository for managing - * biosample data in the data store. - * @param biosampleDataService an instance of BiosampleDataService responsible - * for handling biosample-related data operations such - * as linking publications and adding sequence data. - * @param ec an implicit ExecutionContext for handling asynchronous operations. - */ -@Singleton -class ExternalBiosampleService @Inject()( - biosampleRepository: BiosampleRepository, - biosampleDataService: BiosampleDataService, - biosampleService: BiosampleService - )(implicit ec: ExecutionContext) { - - def createBiosampleWithData(request: ExternalBiosampleRequest): Future[UUID] = { - val sampleGuid = UUID.randomUUID() - - def createBiosample(donorId: Option[Int]) = { - val biosample = Biosample( - id = None, - sampleGuid = sampleGuid, - sampleAccession = request.sampleAccession, - description = request.description, - alias = request.alias, - centerName = request.centerName, - specimenDonorId = donorId, - locked = false, - sourcePlatform = Some(request.sourceSystem) - ) - - biosampleRepository.create(biosample) - } - - def updateBiosample(existingBiosample: Biosample, donorId: Option[Int]) = { - val updatedBiosample = existingBiosample.copy( - description = request.description, - alias = request.alias, - centerName = request.centerName, - specimenDonorId = donorId, - sourcePlatform = Some(request.sourceSystem) - ) - biosampleRepository.update(updatedBiosample).map(_ => existingBiosample.sampleGuid) - } - - def handleDataAssociation(guid: UUID, isUpdate: Boolean) = { - val publicationFuture = request.publication - .map(pub => biosampleDataService.linkPublication(guid, pub) - .recoverWith { case e => - Future.failed(PublicationLinkageException(e.getMessage)) - }) - .getOrElse(Future.successful(())) - - val sequenceDataFuture = if (isUpdate) { - biosampleDataService.replaceSequenceData(guid, request.sequenceData) - } else { - biosampleDataService.addSequenceData(guid, request.sequenceData) - } - - for { - _ <- publicationFuture - _ <- sequenceDataFuture.recoverWith { case e => - Future.failed(SequenceDataValidationException(e.getMessage)) - } - } yield guid - } - - (for { - donorId <- biosampleService.createOrUpdateSpecimenDonor( - donorIdentifier = request.donorIdentifier.getOrElse(s"${utils.GenomicsConstants.DONOR_ID_PREFIX}${UUID.randomUUID().toString}"), - originBiobank = request.centerName, - donorType = request.donorType.getOrElse(BiosampleType.Standard), - sex = request.sex, - latitude = request.latitude, - longitude = request.longitude, - atUri = request.citizenDid - ) - existing <- biosampleRepository.findByAccession(request.sampleAccession) - guid <- existing match { - case Some((existingBiosample, _)) => - for { - guid <- updateBiosample(existingBiosample, donorId) - _ <- handleDataAssociation(guid, isUpdate = true) - } yield guid - case None => - for { - created <- biosampleService.createBiosample( - sampleGuid = sampleGuid, - sampleAccession = request.sampleAccession, - description = request.description, - alias = request.alias, - centerName = request.centerName, - specimenDonorId = donorId, - sourcePlatform = Some(request.sourceSystem) - ) - guid <- handleDataAssociation(created.sampleGuid, isUpdate = false) - } yield guid - } - } yield guid).recoverWith { - case e: BiosampleServiceException => Future.failed(e) - case e: Exception => Future.failed(new RuntimeException( - s"Failed to process biosample: ${e.getMessage}", e)) - } - } - - /** - * Deletes a biosample and all its associated data by its sample accession and owner DID. - * - * @param accession The sample accession of the biosample to delete. - * @param citizenDid The DID of the citizen who owns the biosample. - * @return A `Future` containing `true` if the biosample was found, owned by the DID, and deleted; `false` otherwise. - */ - def deleteBiosample(accession: String, citizenDid: String): Future[Boolean] = { - biosampleRepository.findByAccession(accession).flatMap { - case Some((biosample, Some(donor))) if donor.atUri.contains(citizenDid) => - biosampleDataService.fullyDeleteBiosampleAndDependencies(biosample.id.get, biosample.sampleGuid).map(_ => true) - case _ => - Future.successful(false) - } - } -} \ No newline at end of file diff --git a/app/services/GenealogicalAnchorService.scala b/app/services/GenealogicalAnchorService.scala deleted file mode 100644 index bd86ef1a..00000000 --- a/app/services/GenealogicalAnchorService.scala +++ /dev/null @@ -1,173 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.domain.haplogroups.{AgeEstimate, AnchorType, GenealogicalAnchor} -import play.api.Logging -import repositories.GenealogicalAnchorRepository - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for managing genealogical anchors and applying them as constraints - * to age estimation. - * - * Anchors provide historical calibration points: - * - KNOWN_MRCA: A documented most recent common ancestor with a known date - * - MDKA: Most distant known ancestor (provides a minimum age) - * - ANCIENT_DNA: Carbon-dated ancient DNA sample assigned to a haplogroup - */ -class GenealogicalAnchorService @Inject()( - anchorRepo: GenealogicalAnchorRepository -)(implicit ec: ExecutionContext) extends Logging { - - /** - * Get all anchors for a haplogroup, ordered by confidence descending. - */ - def getAnchorsForHaplogroup(haplogroupId: Int): Future[Seq[GenealogicalAnchor]] = - anchorRepo.findByHaplogroup(haplogroupId).map(_.sortBy(a => -a.confidence.getOrElse(BigDecimal(0)).toDouble)) - - /** - * Create a new anchor after validation. - */ - def createAnchor(anchor: GenealogicalAnchor): Future[GenealogicalAnchor] = { - validateAnchor(anchor) - anchorRepo.create(anchor) - } - - /** - * Update an existing anchor after validation. - */ - def updateAnchor(anchor: GenealogicalAnchor): Future[Boolean] = { - validateAnchor(anchor) - anchorRepo.update(anchor) - } - - /** - * Delete an anchor by ID. - */ - def deleteAnchor(id: Int): Future[Boolean] = - anchorRepo.delete(id) - - /** - * Apply anchor constraints to an age estimate. - * - * Constraints narrow the estimate based on historical evidence: - * - KNOWN_MRCA: The haplogroup formed before this date (provides upper bound on age YBP) - * - MDKA: The haplogroup must be at least as old as this ancestor (provides lower bound on age YBP) - * - ANCIENT_DNA: Provides a hard lower bound (haplogroup must predate the sample) - * - * @return Adjusted estimate with constraint metadata - */ - def applyAnchorConstraints( - haplogroupId: Int, - estimate: AgeEstimate - ): Future[AnchorConstrainedEstimate] = { - anchorRepo.findByHaplogroup(haplogroupId).map { anchors => - if (anchors.isEmpty) { - AnchorConstrainedEstimate(estimate, anchors = Seq.empty, constrained = false) - } else { - applyConstraints(estimate, anchors) - } - } - } - - /** - * Apply constraints from anchors to an estimate. - */ - private[services] def applyConstraints( - estimate: AgeEstimate, - anchors: Seq[GenealogicalAnchor] - ): AnchorConstrainedEstimate = { - var adjustedYbp = estimate.ybp - var adjustedLower = estimate.ybpLower.getOrElse(0) - var adjustedUpper = estimate.ybpUpper.getOrElse(Int.MaxValue) - var constrained = false - - for (anchor <- anchors) { - val anchorYbp = anchor.toYbp - val uncertainty = anchor.dateUncertaintyYears.getOrElse(0) - val weight = anchor.confidence.map(_.toDouble).getOrElse(0.5) - - anchor.anchorType match { - case AnchorType.KnownMrca => - // Known MRCA: haplogroup formed before this person lived - // The TMRCA must be >= this date (in YBP terms, >= anchorYbp) - val lowerBound = anchorYbp - uncertainty - if (adjustedLower < lowerBound) { - adjustedLower = lowerBound - constrained = true - } - if (adjustedYbp < lowerBound) { - adjustedYbp = lowerBound - constrained = true - } - - case AnchorType.Mdka => - // Most distant known ancestor: haplogroup is at least this old - val lowerBound = anchorYbp - uncertainty - if (adjustedLower < lowerBound) { - adjustedLower = lowerBound - constrained = true - } - if (adjustedYbp < lowerBound) { - adjustedYbp = lowerBound - constrained = true - } - - case AnchorType.AncientDna => - // Ancient DNA with carbon dating: hard lower bound - val carbonYbp = anchor.carbonDateBp.getOrElse(anchorYbp) - val carbonSigma = anchor.carbonDateSigma.getOrElse(uncertainty) - val lowerBound = carbonYbp - 2 * carbonSigma // 2-sigma lower bound - if (adjustedLower < lowerBound) { - adjustedLower = lowerBound - constrained = true - } - if (adjustedYbp < lowerBound) { - adjustedYbp = lowerBound - constrained = true - } - } - } - - // Ensure consistency - if (adjustedLower > adjustedUpper) adjustedUpper = adjustedLower - if (adjustedYbp > adjustedUpper) adjustedUpper = adjustedYbp - - val adjustedEstimate = AgeEstimate(adjustedYbp, Some(adjustedLower), Some(adjustedUpper)) - - if (constrained) { - logger.debug(s"Anchor constraint applied for haplogroup: " + - s"${estimate.ybp} -> $adjustedYbp YBP (${anchors.size} anchors)") - } - - AnchorConstrainedEstimate(adjustedEstimate, anchors, constrained) - } - - /** - * Validate anchor data before persistence. - */ - private[services] def validateAnchor(anchor: GenealogicalAnchor): Unit = { - require(anchor.haplogroupId > 0, "Haplogroup ID must be positive") - - anchor.anchorType match { - case AnchorType.AncientDna => - require(anchor.carbonDateBp.isDefined || anchor.dateCe != 0, - "Ancient DNA anchors must have carbon_date_bp or a non-zero date_ce") - case _ => // No additional validation - } - - anchor.confidence.foreach { c => - require(c >= 0 && c <= 1, s"Confidence must be between 0 and 1, got $c") - } - } -} - -/** - * Result of applying anchor constraints to an age estimate. - */ -case class AnchorConstrainedEstimate( - estimate: AgeEstimate, - anchors: Seq[GenealogicalAnchor], - constrained: Boolean -) diff --git a/app/services/GenomeRegionsManagementService.scala b/app/services/GenomeRegionsManagementService.scala deleted file mode 100644 index 1442841d..00000000 --- a/app/services/GenomeRegionsManagementService.scala +++ /dev/null @@ -1,188 +0,0 @@ -package services - -import config.GenomicsConfig -import jakarta.inject.{Inject, Singleton} -import models.api.genomics.* -import models.domain.curator.AuditLogEntry -import models.domain.genomics.{GenomeRegion, RegionCoordinate} -import play.api.Logging -import play.api.cache.AsyncCacheApi -import play.api.libs.json.{Format, JsValue, Json} -import repositories.{CuratorAuditRepository, GenomeRegionsRepository} - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for managing genome regions (including cytobands). - * Provides CRUD operations with audit logging. - */ -@Singleton -class GenomeRegionsManagementService @Inject()( - genomeRegionsRepository: GenomeRegionsRepository, - auditRepository: CuratorAuditRepository, - genomicsConfig: GenomicsConfig, - cache: AsyncCacheApi -)(implicit ec: ExecutionContext) extends Logging { - - // System UUID for API-originated changes - private val SystemUserId: UUID = UUID.fromString("00000000-0000-0000-0000-000000000000") - - // JSON formats for domain objects - private given Format[GenomeRegion] = Json.format[GenomeRegion] - - // ============================================================================ - // GenomeRegion Operations - // ============================================================================ - - def listRegions(regionType: Option[String], build: Option[String], page: Int, pageSize: Int): Future[GenomeRegionListResponse] = { - val offset = (page - 1) * pageSize - val canonicalBuild = build.map(genomicsConfig.resolveReferenceName) - - for { - regions <- genomeRegionsRepository.findRegions(regionType, canonicalBuild, offset, pageSize) - total <- genomeRegionsRepository.countRegions(regionType, canonicalBuild) - } yield GenomeRegionListResponse( - regions = regions.map(toRegionDetailDto), - total = total, - page = page, - pageSize = pageSize - ) - } - - def getRegion(id: Int): Future[Option[GenomeRegionDetailDto]] = { - genomeRegionsRepository.findRegionById(id).map(_.map(toRegionDetailDto)) - } - - def createRegion(request: CreateGenomeRegionRequest, userId: Option[UUID]): Future[Either[String, GenomeRegionDetailDto]] = { - val region = GenomeRegion( - id = None, - regionType = request.regionType, - name = request.name, - coordinates = request.coordinates.map { case (k, v) => k -> RegionCoordinate(v.contig, v.start, v.end) }, - properties = request.properties.getOrElse(Json.obj()) - ) - - genomeRegionsRepository.createRegion(region).flatMap { id => - genomeRegionsRepository.findRegionById(id).flatMap { - case Some(createdRegion) => - logAudit(userId, "genome_region", id, "create", None, Some(createdRegion)).map { _ => - invalidateCache() - Right(toRegionDetailDto(createdRegion)) - } - case None => - Future.successful(Left("Failed to retrieve created region")) - } - }.recover { - case e: Exception => - logger.error(s"Failed to create genome region: ${e.getMessage}", e) - Left(s"Failed to create genome region: ${e.getMessage}") - } - } - - def updateRegion(id: Int, request: UpdateGenomeRegionRequest, userId: Option[UUID]): Future[Either[String, GenomeRegionDetailDto]] = { - genomeRegionsRepository.findRegionById(id).flatMap { - case None => Future.successful(Left("Region not found")) - case Some(oldRegion) => - val updatedRegion = oldRegion.copy( - regionType = request.regionType.getOrElse(oldRegion.regionType), - name = request.name.orElse(oldRegion.name), - coordinates = request.coordinates.map(_.map { case (k, v) => k -> RegionCoordinate(v.contig, v.start, v.end) }).getOrElse(oldRegion.coordinates), - properties = request.properties.getOrElse(oldRegion.properties) - ) - - genomeRegionsRepository.updateRegion(id, updatedRegion).flatMap { success => - if (success) { - logAudit(userId, "genome_region", id, "update", Some(oldRegion), Some(updatedRegion)).map { _ => - invalidateCache() - Right(toRegionDetailDto(updatedRegion.copy(id = Some(id)))) - } - } else { - Future.successful(Left("Failed to update region")) - } - } - }.recover { - case e: Exception => - logger.error(s"Failed to update genome region: ${e.getMessage}", e) - Left(s"Failed to update genome region: ${e.getMessage}") - } - } - - def deleteRegion(id: Int, userId: Option[UUID]): Future[Either[String, Unit]] = { - genomeRegionsRepository.findRegionById(id).flatMap { - case None => Future.successful(Left("Region not found")) - case Some(oldRegion) => - genomeRegionsRepository.deleteRegion(id).flatMap { success => - if (success) { - logAudit(userId, "genome_region", id, "delete", Some(oldRegion), None).map { _ => - invalidateCache() - Right(()) - } - } else { - Future.successful(Left("Failed to delete region")) - } - } - }.recover { - case e: Exception => - logger.error(s"Failed to delete genome region: ${e.getMessage}", e) - Left(s"Failed to delete genome region: ${e.getMessage}") - } - } - - def bulkCreateRegions(request: BulkCreateGenomeRegionsRequest, userId: Option[UUID]): Future[BulkOperationResponse] = { - val results = request.regions.zipWithIndex.map { case (req, idx) => - createRegion(req, userId).map { - case Right(dto) => BulkOperationResult(idx, "success", Some(dto.id), None) - case Left(error) => BulkOperationResult(idx, "error", None, Some(error)) - }.recover { - case e: Exception => BulkOperationResult(idx, "error", None, Some(e.getMessage)) - } - } - - Future.sequence(results).map { resultList => - BulkOperationResponse( - total = resultList.size, - succeeded = resultList.count(_.status == "success"), - failed = resultList.count(_.status == "error"), - results = resultList - ) - } - } - - // ============================================================================ - // Helper Methods - // ============================================================================ - - private def toRegionDetailDto(region: GenomeRegion): GenomeRegionDetailDto = { - GenomeRegionDetailDto( - id = region.id.getOrElse(0), - regionType = region.regionType, - name = region.name, - coordinates = region.coordinates.map { case (k, v) => k -> RegionCoordinateDto(v.contig, v.start, v.end) }, - properties = region.properties - ) - } - - private def logAudit[T](userId: Option[UUID], entityType: String, entityId: Int, action: String, - oldValue: Option[T], newValue: Option[T])(using Format[T]): Future[AuditLogEntry] = { - val effectiveUserId = userId.getOrElse(SystemUserId) - val entry = AuditLogEntry( - userId = effectiveUserId, - entityType = entityType, - entityId = entityId, - action = action, - oldValue = oldValue.map(Json.toJson(_)), - newValue = newValue.map(Json.toJson(_)), - comment = if (userId.isEmpty) Some("API system change") else None - ) - auditRepository.logAction(entry) - } - - private def invalidateCache(): Unit = { - // Invalidate all build caches as we don't know easily which builds are affected by coordinates update - genomicsConfig.supportedReferences.foreach { refGenome => - cache.remove(s"genome-regions:$refGenome") - logger.debug(s"Invalidated cache for genome-regions:$refGenome") - } - } -} \ No newline at end of file diff --git a/app/services/GenomeRegionsService.scala b/app/services/GenomeRegionsService.scala deleted file mode 100644 index 93e0b43b..00000000 --- a/app/services/GenomeRegionsService.scala +++ /dev/null @@ -1,209 +0,0 @@ -package services - -import config.GenomicsConfig -import jakarta.inject.{Inject, Singleton} -import models.api.genomics.* -import models.domain.genomics.{GenbankContig, GenomeRegion} -import play.api.cache.AsyncCacheApi -import play.api.libs.json.{JsValue, Reads} -import repositories.{FullBuildData, GenomeRegionsRepository} - -import java.security.MessageDigest -import java.time.Instant -import java.time.format.DateTimeFormatter -import scala.concurrent.duration.* -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for the Genome Regions API. - * Provides curated genomic region metadata for reference builds. - */ -@Singleton -class GenomeRegionsService @Inject()( - genomeRegionsRepository: GenomeRegionsRepository, - genomicsConfig: GenomicsConfig, - cache: AsyncCacheApi -)(implicit ec: ExecutionContext) { - - private val CacheDuration = 7.days - - /** - * Get genome regions for a build, resolving aliases. - */ - def getRegions(buildName: String): Future[Either[GenomeRegionsError, GenomeRegionsResponse]] = { - val canonicalName = genomicsConfig.resolveReferenceName(buildName) - - if (!genomicsConfig.supportedReferences.contains(canonicalName)) { - Future.successful(Left(GenomeRegionsError( - error = "Unknown build", - message = s"Build '$buildName' is not supported. Supported builds: ${genomicsConfig.supportedReferences.mkString(", ")}", - supportedBuilds = genomicsConfig.supportedReferences - ))) - } else { - val cacheKey = s"genome-regions:$canonicalName" - cache.getOrElseUpdate(cacheKey, CacheDuration) { - buildResponse(canonicalName).map(Right(_)) - } - } - } - - /** - * Get the ETag for a build's data. - */ - def getETag(buildName: String): Future[Option[String]] = { - val canonicalName = genomicsConfig.resolveReferenceName(buildName) - genomeRegionsRepository.getVersion(canonicalName).map { versionOpt => - versionOpt.map { version => - generateETag(canonicalName, version.dataVersion) - } - } - } - - /** - * Generate ETag from build name and version. - */ - def generateETag(buildName: String, dataVersion: String): String = { - val input = s"$buildName:$dataVersion" - val md5 = MessageDigest.getInstance("MD5") - val hash = md5.digest(input.getBytes("UTF-8")).map("%02x".format(_)).mkString - s""""$hash"""" - } - - /** - * Get list of supported builds. - */ - def getSupportedBuilds: Seq[String] = genomicsConfig.supportedReferences - - /** - * Build the full response for a reference genome. - */ - private def buildResponse(canonicalName: String): Future[GenomeRegionsResponse] = { - genomeRegionsRepository.getFullBuildData(canonicalName).map { data => - - // Group regions by contig name for efficient lookup. - // We check for exact match or "chr"+name match to handle common naming conventions. - val regionsByContig = data.regions.flatMap { region => - region.coordinates.get(canonicalName).map(coord => coord.contig -> region) - }.groupBy(_._1).map { case (k, v) => k -> v.map(_._2) } - - val chromosomeMap = data.contigs.flatMap { contig => - contig.commonName.map { chromName => - // Try to find regions for this contig using common variations - val relevantRegions = regionsByContig.getOrElse(chromName, Seq.empty) ++ - regionsByContig.getOrElse("chr" + chromName, Seq.empty) ++ - regionsByContig.getOrElse(chromName.replace("chr", ""), Seq.empty) - - // Deduplicate if needed (though mapping logic usually prevents duplicate keys in map unless source has duplicates) - val uniqueRegions = relevantRegions.distinctBy(_.id) - - chromName -> buildChromosomeRegions(contig, uniqueRegions, canonicalName) - } - }.toMap - - GenomeRegionsResponse( - build = canonicalName, - version = data.version.map(_.dataVersion).getOrElse("unknown"), - generatedAt = DateTimeFormatter.ISO_INSTANT.format(Instant.now()), - chromosomes = chromosomeMap - ) - } - } - - /** - * Build the chromosome regions DTO from domain models. - */ - private def buildChromosomeRegions( - contig: GenbankContig, - regions: Seq[GenomeRegion], - buildName: String - ): ChromosomeRegionsDto = { - // Helper to convert to DTO with current build context - def toDto(r: GenomeRegion): Option[RegionDto] = toRegionDto(r, buildName) - - // Extract specific region types - val centromere = regions.find(_.regionType == "Centromere").flatMap(toDto) - val telomereP = regions.find(_.regionType == "Telomere_P").flatMap(toDto) - val telomereQ = regions.find(_.regionType == "Telomere_Q").flatMap(toDto) - - val telomeres = if (telomereP.isDefined || telomereQ.isDefined) { - Some(TelomeresDto(p = telomereP, q = telomereQ)) - } else None - - // Cytobands - val cytobands = regions.filter(_.regionType == "Cytoband") - .flatMap(r => toCytobandDto(r, buildName)) - .sortBy(_.start) - - // Build Y-chromosome specific regions if this is chrY - val yRegions = if (contig.commonName.exists(name => name.toLowerCase.contains("chry") || name == "Y")) { - Some(buildYRegions(regions, buildName)) - } else None - - ChromosomeRegionsDto( - length = contig.seqLength.toLong, - centromere = centromere, - telomeres = telomeres, - cytobands = cytobands, - regions = yRegions, - strMarkers = Seq.empty // STR markers handled by separate service/table now - ) - } - - /** - * Build Y-chromosome specific regions grouped by type. - */ - private def buildYRegions(regions: Seq[GenomeRegion], buildName: String): YChromosomeRegionsDto = { - def toDto(r: GenomeRegion) = toRegionDto(r, buildName) - def toNamedDto(r: GenomeRegion) = toNamedRegionDto(r, buildName) - - YChromosomeRegionsDto( - par1 = regions.find(_.regionType == "PAR1").flatMap(toDto), - par2 = regions.find(_.regionType == "PAR2").flatMap(toDto), - xtr = regions.filter(_.regionType == "XTR").flatMap(toDto), - ampliconic = regions.filter(_.regionType == "Ampliconic").flatMap(toDto), - palindromes = regions.filter(_.regionType == "Palindrome").flatMap(toNamedDto), - heterochromatin = regions.find(_.regionType == "Heterochromatin").flatMap(toDto), - xDegenerate = regions.filter(_.regionType == "XDegenerate").flatMap(toDto) - ) - } - - // Domain to DTO conversions - - private def getProperty[T](r: GenomeRegion, key: String)(implicit reads: Reads[T]): Option[T] = { - (r.properties \ key).asOpt[T] - } - - private def toRegionDto(r: GenomeRegion, buildName: String): Option[RegionDto] = { - r.coordinates.get(buildName).map { coord => - RegionDto( - start = coord.start, - end = coord.end, - `type` = Some(r.regionType), - modifier = getProperty[Double](r, "modifier") - ) - } - } - - private def toNamedRegionDto(r: GenomeRegion, buildName: String): Option[NamedRegionDto] = { - r.coordinates.get(buildName).map { coord => - NamedRegionDto( - name = r.name.getOrElse(""), - start = coord.start, - end = coord.end, - `type` = r.regionType, - modifier = getProperty[Double](r, "modifier") - ) - } - } - - private def toCytobandDto(r: GenomeRegion, buildName: String): Option[CytobandDto] = { - r.coordinates.get(buildName).map { coord => - CytobandDto( - name = r.name.getOrElse(""), - start = coord.start, - end = coord.end, - stain = getProperty[String](r, "stain").getOrElse("gneg") - ) - } - } -} \ No newline at end of file diff --git a/app/services/GenomicStudyService.scala b/app/services/GenomicStudyService.scala deleted file mode 100644 index dc800ec0..00000000 --- a/app/services/GenomicStudyService.scala +++ /dev/null @@ -1,275 +0,0 @@ -package services - -import models.domain.genomics.{Biosample, SpecimenDonor} -import models.domain.publications.{GenomicStudy, StudySource} -import play.api.Logging -import play.api.libs.ws.* -import repositories.{BiosampleRepository, SpecimenDonorRepository} -import services.ena.{EnaApiClient, EnaBiosampleData, EnaStudyData} -import services.mappers.GenomicStudyMappers -import services.ncbi.{NcbiApiClient, SraBiosampleData, SraStudyData} - -import javax.inject.* -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service responsible for retrieving and mapping genomic study details from external APIs. - * This class interacts with external resources such as ENA and NCBI to fetch and process genomic study data. - * - * @constructor Creates a new instance of `GenomicStudyService` with dependency-injected clients and execution context. - * @param ws The `WSClient` used for making HTTP requests to external APIs. - * @param ncbiApiClient The client for interacting with NCBI APIs. - * @param enaApiClient The client for interacting with ENA APIs. - * @param ec The execution context for asynchronous operations. - */ -@Singleton -class GenomicStudyService @Inject()( - ws: WSClient, - ncbiApiClient: NcbiApiClient, - enaApiClient: EnaApiClient, - specimenDonorRepository: SpecimenDonorRepository, - biosampleRepository: BiosampleRepository - )(implicit ec: ExecutionContext) extends Logging { - - // ENA Browser API for XML (often more detailed for studies) - // For JSON, use the ENA Portal API - private val ncbiEutilsBaseUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" - - private val ValidSexValues = Set("male", "female", "intersex") - - /** - * Fetches detailed information about a genomic study based on its accession. - * Determines the source of the study and retrieves the information accordingly. - * - * @param accession The unique accession identifier for the genomic study. - * It determines the study source (e.g., ENA, NCBI BioProject, NCBI GenBank). - * @return A Future containing an Option of GenomicStudy. The option is None if no study details are found. - * The GenomicStudy provides detailed metadata about the study, including accession, title, center name, etc. - */ - def getStudyDetails(accession: String): Future[Option[GenomicStudy]] = { - determineSource(accession) match { - case StudySource.ENA => enaApiClient.getStudyDetails(accession) - .map(_.map(GenomicStudyMappers.enaToGenomicStudy)) - case StudySource.NCBI_BIOPROJECT => ncbiApiClient.getSraStudyDetails(accession) - .map(_.map(GenomicStudyMappers.sraToGenomicStudy)) - case StudySource.NCBI_GENBANK => getGenbankDetails(accession) - } - } - - private def determineSource(accession: String): StudySource = { - val acc = accession.toUpperCase.trim - - acc match { - // NCBI BioProjects - case a if a.startsWith("PRJNA") => StudySource.NCBI_BIOPROJECT - // ENA BioProjects - case a if a.startsWith("PRJEB") => StudySource.ENA - // ENA SRA Accessions - case a if List("ERR", "ERX", "ERS", "ERA", "ERZ", "ERP").exists(a.startsWith) => StudySource.ENA - // NCBI SRA Accessions - case a if List("SRR", "SRX", "SRS", "SRP").exists(a.startsWith) => StudySource.NCBI_BIOPROJECT - // NCBI RefSeq Accessions - case a if List("NM_", "NP_", "XM_", "XP_", "NR_", "XR_", "WP_").exists(a.startsWith) => StudySource.NCBI_GENBANK - // Common NCBI GenBank patterns - case a if ( - // Single letter + 5 digits - (a.length == 6 && a.head.isLetter && a.tail.forall(_.isDigit)) || - // Two letters + 6 digits - (a.length == 8 && a.take(2).forall(_.isLetter) && a.drop(2).forall(_.isDigit)) || - // WGS pattern: four letters + "01" + 6 digits - (a.length >= 12 && - a.take(4).forall(_.isLetter) && - a.slice(4, 6) == "01" && - a.drop(6).forall(_.isDigit)) - ) => StudySource.NCBI_GENBANK - // Handle versioned accessions - case a if a.contains(".") => - a.split("\\.") match { - case Array(base, version) if version.forall(_.isDigit) => StudySource.NCBI_GENBANK - case _ => StudySource.ENA // Default for unrecognized patterns - } - // Default to ENA for unrecognized patterns - case _ => StudySource.ENA - } - } - - /** - * Retrieves a list of biosample metadata associated with a given study accession. - * Determines the source of the study and invokes the appropriate data retrieval method. - * - * @param studyAccession The unique accession identifier for the study. This determines the study source - * (e.g., ENA, NCBI BioProject) from which biosample data is retrieved. - * @return A Future containing a sequence of Biosample objects. If no biosamples are found or the source - * is unrecognized, the sequence will be empty. - */ - def getBiosamplesForStudy(studyAccession: String): Future[Seq[Biosample]] = { - determineSource(studyAccession) match { - case StudySource.ENA => - enaApiClient.getBiosamples(studyAccession) - .flatMap(processBiosamples(_, GenomicStudyMappers.enaToBiosample)) - case StudySource.NCBI_BIOPROJECT => - ncbiApiClient.getSraBiosamples(studyAccession) - .flatMap(processBiosamples(_, GenomicStudyMappers.sraToBiosample)) - case _ => - Future.successful(Seq.empty) - } - } - - private def processBiosamples[T]( - data: Seq[T], - mapper: T => GenomicStudyMappers.BiosampleMappingResult - ): Future[Seq[Biosample]] = { - Future.sequence( - data.map { item => - val result = mapper(item) - persistBiosampleWithDonor(result) - } - ) - } - - private def persistBiosampleWithDonor( - result: GenomicStudyMappers.BiosampleMappingResult - ): Future[Biosample] = { - result.specimenDonor match { - case Some(donor) => - // First try to find a matching donor - findMatchingDonor(donor).flatMap { - case Some(existingDonor) => - // Use existing donor - val biosampleWithDonor = result.biosample.copy( - specimenDonorId = Some(existingDonor.id.get) - ) - biosampleRepository.create(biosampleWithDonor) - - case None => - // Create new donor if no match found - specimenDonorRepository.create(donor).flatMap { createdDonor => - val biosampleWithDonor = result.biosample.copy( - specimenDonorId = Some(createdDonor.id.get) - ) - biosampleRepository.create(biosampleWithDonor) - } - } - - case None => - // If no donor data, just create the biosample - biosampleRepository.create(result.biosample) - } - }.recover { - case e: Exception => - logger.error(s"Error persisting biosample ${result.biosample.sampleAccession}: ${e.getMessage}") - throw e - } - - private def findMatchingDonor(donor: SpecimenDonor): Future[Option[SpecimenDonor]] = { - import scala.collection.mutable.ArrayBuffer - var conditions = ArrayBuffer.empty[(SpecimenDonor, SpecimenDonor) => Boolean] - - // Add conditions based on available donor data - if (donor.donorIdentifier.nonEmpty) { - conditions += ((existing, incoming) => existing.donorIdentifier == incoming.donorIdentifier) - } - - if (donor.sex.isDefined) { - conditions += ((existing, incoming) => existing.sex == incoming.sex) - } - - if (donor.geocoord.isDefined) { - conditions += ((existing, incoming) => existing.geocoord == incoming.geocoord) - } - - if (donor.pgpParticipantId.isDefined) { - conditions += ((existing, incoming) => existing.pgpParticipantId == incoming.pgpParticipantId) - } - - if (donor.atUri.isDefined) { - conditions += ((existing, incoming) => existing.atUri == incoming.atUri) - } - - // Get all donors with same origin biobank and type - specimenDonorRepository - .findByBiobankAndType(donor.originBiobank, donor.donorType) - .map { donors => - // Find first donor that matches all conditions - donors.find(existing => - conditions.forall(condition => condition(existing, donor)) - ) - } - } - - private def getGenbankDetails(accession: String): Future[Option[GenomicStudy]] = { - val url = s"$ncbiEutilsBaseUrl/efetch.fcgi" - - ws.url(url) - .withQueryStringParameters( - "db" -> "nucleotide", - "id" -> accession, - "rettype" -> "gb", - "retmode" -> "xml" - ) - .get() - .map { response => - response.status match { - case 200 => - try { - val xml = scala.xml.XML.loadString(response.body) - - // Extract GBSeq elements - val seqElement = xml \\ "GBSeq" - - seqElement.headOption.map { seq => - val references = parseReferences(seq \\ "GBReference") - - GenomicStudy( - id = None, - accession = (seq \\ "GBSeq_accession-version").text, - title = (seq \\ "GBSeq_definition").text, - centerName = (seq \\ "GBSeq_source").text, - studyName = (seq \\ "GBSeq_locus").text, - details = (seq \\ "GBSeq_comment").text, - source = StudySource.NCBI_GENBANK, - submissionDate = Some(parseGenbankDate((seq \\ "GBSeq_create-date").text)), - lastUpdate = Some(parseGenbankDate((seq \\ "GBSeq_update-date").text)), - molecule = Some((seq \\ "GBSeq_moltype").text), - topology = Some((seq \\ "GBSeq_topology").text), - taxonomyId = (seq \\ "GBSeq_taxonomy-id").headOption.map(_.text.toInt), - version = Some((seq \\ "GBSeq_accession-version").text.split("\\.")(1)) - ) - } - } catch { - case e: Exception => - logger.error(s"Error parsing GenBank XML for $accession: ${e.getMessage}") - None - } - case status => - logger.error(s"Error fetching GenBank entry $accession: $status - ${response.body}") - None - } - } - .recover { - case e: Exception => - logger.error(s"Exception during GenBank API call for $accession: $e") - None - } - } - - private def parseGenbankDate(date: String): java.time.LocalDate = { - // GenBank dates are in format "DD-MMM-YYYY" - java.time.LocalDate.parse( - date, - java.time.format.DateTimeFormatter.ofPattern("dd-MMM-yyyy", java.util.Locale.ENGLISH) - ) - } - - private def parseReferences(refs: scala.xml.NodeSeq): Seq[Map[String, String]] = { - refs.map { ref => - Map( - "authors" -> (ref \\ "GBReference_authors" \\ "GBAuthor").map(_.text).mkString(", "), - "title" -> (ref \\ "GBReference_title").text, - "journal" -> (ref \\ "GBReference_journal").text, - "pubmed" -> (ref \\ "GBReference_pubmed").text - ).filter(_._2.nonEmpty) // Remove empty values - }.toSeq - } - -} \ No newline at end of file diff --git a/app/services/GroupProjectService.scala b/app/services/GroupProjectService.scala deleted file mode 100644 index dc4a87ea..00000000 --- a/app/services/GroupProjectService.scala +++ /dev/null @@ -1,230 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.{GroupProject, GroupProjectMember, MemberVisibility} -import play.api.Logging -import repositories.{GroupProjectMemberRepository, GroupProjectRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class GroupProjectService @Inject()( - projectRepo: GroupProjectRepository, - memberRepo: GroupProjectMemberRepository - )(implicit ec: ExecutionContext) extends Logging { - - def createProject(project: GroupProject, creatorDid: String): Future[Either[String, GroupProject]] = { - validateProject(project) match { - case Some(error) => Future.successful(Left(error)) - case None => - val toCreate = project.copy(ownerDid = creatorDid) - projectRepo.create(toCreate).flatMap { created => - val adminMember = GroupProjectMember( - groupProjectId = created.id.get, - citizenDid = creatorDid, - role = "ADMIN", - status = "ACTIVE", - joinedAt = Some(LocalDateTime.now()) - ) - memberRepo.create(adminMember).map(_ => Right(created)) - } - } - } - - def updateProject(projectId: Int, updaterDid: String, updates: GroupProject): Future[Either[String, GroupProject]] = { - projectRepo.findById(projectId).flatMap { - case None => Future.successful(Left("Project not found")) - case Some(existing) => - isAdminOrCoAdmin(projectId, updaterDid).flatMap { - case false => Future.successful(Left("Only admins and co-admins can update projects")) - case true => - val updated = existing.copy( - projectName = updates.projectName, - description = updates.description, - backgroundInfo = updates.backgroundInfo, - joinPolicy = updates.joinPolicy, - haplogroupRequirement = updates.haplogroupRequirement, - memberListVisibility = updates.memberListVisibility, - strPolicy = updates.strPolicy, - snpPolicy = updates.snpPolicy, - publicTreeView = updates.publicTreeView, - successionPolicy = updates.successionPolicy - ) - projectRepo.update(updated).map { - case true => Right(updated) - case false => Left("Failed to update project") - } - } - } - } - - def requestMembership( - projectId: Int, - citizenDid: String, - biosampleAtUri: Option[String] = None, - displayName: Option[String] = None - ): Future[Either[String, GroupProjectMember]] = { - projectRepo.findById(projectId).flatMap { - case None => Future.successful(Left("Project not found")) - case Some(project) => - memberRepo.findByProjectAndCitizen(projectId, citizenDid).flatMap { - case Some(existing) if existing.status == "ACTIVE" => - Future.successful(Left("Already a member")) - case Some(existing) if existing.status == "PENDING_APPROVAL" => - Future.successful(Left("Membership request already pending")) - case Some(existing) if existing.status == "REMOVED" => - Future.successful(Left("Membership was revoked by project admin")) - case _ => - val status = if (project.joinPolicy == "OPEN") "ACTIVE" else "PENDING_APPROVAL" - val joinedAt = if (status == "ACTIVE") Some(LocalDateTime.now()) else None - val member = GroupProjectMember( - groupProjectId = projectId, - citizenDid = citizenDid, - biosampleAtUri = biosampleAtUri, - status = status, - displayName = displayName, - joinedAt = joinedAt - ) - memberRepo.create(member).map(m => Right(m)) - } - } - } - - def approveMembership(memberId: Int, approverDid: String): Future[Either[String, GroupProjectMember]] = { - memberRepo.findById(memberId).flatMap { - case None => Future.successful(Left("Membership not found")) - case Some(member) if member.status != "PENDING_APPROVAL" => - Future.successful(Left(s"Cannot approve membership with status: ${member.status}")) - case Some(member) => - hasPermission(member.groupProjectId, approverDid, "APPROVE_MEMBERS").flatMap { - case false => Future.successful(Left("Insufficient permissions")) - case true => - val updated = member.copy(status = "ACTIVE", joinedAt = Some(LocalDateTime.now())) - memberRepo.update(updated).map { - case true => Right(updated) - case false => Left("Failed to approve membership") - } - } - } - } - - def rejectMembership(memberId: Int, rejecterDid: String): Future[Either[String, Boolean]] = { - memberRepo.findById(memberId).flatMap { - case None => Future.successful(Left("Membership not found")) - case Some(member) if member.status != "PENDING_APPROVAL" => - Future.successful(Left(s"Cannot reject membership with status: ${member.status}")) - case Some(member) => - hasPermission(member.groupProjectId, rejecterDid, "APPROVE_MEMBERS").flatMap { - case false => Future.successful(Left("Insufficient permissions")) - case true => - memberRepo.updateStatus(member.id.get, "REMOVED").map(Right(_)) - } - } - } - - def removeMember(memberId: Int, removerDid: String): Future[Either[String, Boolean]] = { - memberRepo.findById(memberId).flatMap { - case None => Future.successful(Left("Membership not found")) - case Some(member) => - hasPermission(member.groupProjectId, removerDid, "REMOVE_MEMBERS").flatMap { - case false => Future.successful(Left("Insufficient permissions")) - case true => - if (member.role == "ADMIN") Future.successful(Left("Cannot remove the project admin")) - else memberRepo.updateStatus(member.id.get, "REMOVED").map(Right(_)) - } - } - } - - def leaveProject(projectId: Int, citizenDid: String): Future[Either[String, Boolean]] = { - memberRepo.findByProjectAndCitizen(projectId, citizenDid).flatMap { - case None => Future.successful(Left("Not a member of this project")) - case Some(member) if member.role == "ADMIN" => - Future.successful(Left("Admin cannot leave. Transfer ownership first.")) - case Some(member) => - memberRepo.updateStatus(member.id.get, "LEFT").map(Right(_)) - } - } - - def assignRole(memberId: Int, newRole: String, assignerDid: String): Future[Either[String, Boolean]] = { - if (!GroupProjectMember.ValidRoles.contains(newRole)) - return Future.successful(Left(s"Invalid role: $newRole")) - - memberRepo.findById(memberId).flatMap { - case None => Future.successful(Left("Membership not found")) - case Some(member) if member.status != "ACTIVE" => - Future.successful(Left("Can only assign roles to active members")) - case Some(member) => - isAdminOrCoAdmin(member.groupProjectId, assignerDid).flatMap { - case false => Future.successful(Left("Only admins and co-admins can assign roles")) - case true => - if (newRole == "ADMIN") Future.successful(Left("Cannot assign ADMIN role. Use ownership transfer.")) - else memberRepo.updateRole(member.id.get, newRole).map(Right(_)) - } - } - } - - def getProjectMembers(projectId: Int, requesterDid: String): Future[Either[String, Seq[GroupProjectMember]]] = { - projectRepo.findById(projectId).flatMap { - case None => Future.successful(Left("Project not found")) - case Some(project) => - memberRepo.findByProjectAndCitizen(projectId, requesterDid).flatMap { requesterMembership => - val canView = project.memberListVisibility match { - case "PUBLIC" => true - case "MEMBERS_ONLY" => requesterMembership.exists(_.status == "ACTIVE") - case "ADMINS_ONLY" => requesterMembership.exists(m => m.status == "ACTIVE" && Set("ADMIN", "CO_ADMIN").contains(m.role)) - case "HIDDEN" => requesterMembership.exists(m => m.status == "ACTIVE" && m.role == "ADMIN") - case _ => false - } - if (!canView) Future.successful(Left("Insufficient permissions to view member list")) - else memberRepo.findByProjectAndStatus(projectId, "ACTIVE").map(Right(_)) - } - } - } - - def getPendingRequests(projectId: Int, requesterDid: String): Future[Either[String, Seq[GroupProjectMember]]] = { - hasPermission(projectId, requesterDid, "APPROVE_MEMBERS").flatMap { - case false => Future.successful(Left("Insufficient permissions")) - case true => memberRepo.findByProjectAndStatus(projectId, "PENDING_APPROVAL").map(Right(_)) - } - } - - private def validateProject(project: GroupProject): Option[String] = { - if (project.projectName.isBlank || project.projectName.length < 3) - Some("Project name must be at least 3 characters") - else if (project.projectName.length > 100) - Some("Project name must be at most 100 characters") - else if (!GroupProject.ValidProjectTypes.contains(project.projectType)) - Some(s"Invalid project type: ${project.projectType}") - else if (!GroupProject.ValidJoinPolicies.contains(project.joinPolicy)) - Some(s"Invalid join policy: ${project.joinPolicy}") - else if (project.joinPolicy == "HAPLOGROUP_VERIFIED" && project.haplogroupRequirement.isEmpty) - Some("Haplogroup requirement is required for HAPLOGROUP_VERIFIED join policy") - else if (project.targetLineage.exists(!GroupProject.ValidLineages.contains(_))) - Some(s"Invalid target lineage: ${project.targetLineage.get}") - else - None - } - - private[services] def isAdminOrCoAdmin(projectId: Int, citizenDid: String): Future[Boolean] = { - memberRepo.findByProjectAndCitizen(projectId, citizenDid).map { - case Some(m) => m.status == "ACTIVE" && Set("ADMIN", "CO_ADMIN").contains(m.role) - case None => false - } - } - - private[services] def hasPermission(projectId: Int, citizenDid: String, permission: String): Future[Boolean] = { - memberRepo.findByProjectAndCitizen(projectId, citizenDid).map { - case Some(m) if m.status == "ACTIVE" => - m.role match { - case "ADMIN" => true - case "CO_ADMIN" => Set("APPROVE_MEMBERS", "REMOVE_MEMBERS", "EDIT_PROJECT", "MANAGE_SUBGROUPS", "SEND_ANNOUNCEMENTS").contains(permission) - case "MODERATOR" => Set("APPROVE_MEMBERS", "REMOVE_MEMBERS", "SEND_ANNOUNCEMENTS").contains(permission) - case "CURATOR" => Set("MANAGE_SUBGROUPS").contains(permission) - case _ => false - } - case _ => false - } - } -} diff --git a/app/services/HaplogroupTreeMergeService.scala b/app/services/HaplogroupTreeMergeService.scala deleted file mode 100644 index c33fc298..00000000 --- a/app/services/HaplogroupTreeMergeService.scala +++ /dev/null @@ -1,278 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.api.haplogroups.* -import models.domain.haplogroups.{MergeContext, VariantIndex} -import play.api.Logging -import services.tree.{ChangeSetCallbacks, TreeMergeAlgorithmService, TreeMergePreviewService, TreeMergeProvenanceService, VariantMatchingService} - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for merging external haplogroup trees into the DecodingUs baseline tree. - * - * This is the orchestrator that coordinates the merge workflow: - * - Creates and finalizes change sets for tracking - * - Delegates to TreeMergeAlgorithmService for the core algorithm - * - Delegates to TreeMergePreviewService for dry-run simulations - * - Provides callbacks to the algorithm service for change tracking - * - * == Public API == - * - * Three entry points for tree merging: - * - mergeFullTree: Replace/extend the full tree for a haplogroup type - * - mergeSubtree: Merge a subtree under a specific anchor haplogroup - * - previewMerge: Simulate merge without applying changes - * - * == Change Set Lifecycle == - * - * For non-dry-run operations: - * 1. Create change set (Draft status) - * 2. Execute algorithm with callbacks to record changes - * 3. Finalize change set (Ready for Review status) - * - * The callback pattern decouples change tracking from the algorithm, - * allowing the algorithm service to focus purely on tree merging logic. - */ -@Singleton -class HaplogroupTreeMergeService @Inject()( - treeVersioningService: TreeVersioningService, - algorithmService: TreeMergeAlgorithmService, - provenanceService: TreeMergeProvenanceService, - variantMatchingService: VariantMatchingService, - previewService: TreeMergePreviewService -)(implicit ec: ExecutionContext) extends Logging { - - // ============================================================================ - // Public API - // ============================================================================ - - /** - * Merge a full tree, replacing the existing tree for the given haplogroup type. - */ - def mergeFullTree(request: TreeMergeRequest): Future[TreeMergeResponse] = { - if (request.dryRun) { - previewMerge(MergePreviewRequest( - haplogroupType = request.haplogroupType, - anchorHaplogroupName = None, - sourceTree = request.sourceTree, - sourceName = request.sourceName, - priorityConfig = request.priorityConfig - )).map(preview => TreeMergeResponse( - success = true, - message = "Dry run completed successfully", - statistics = preview.statistics, - conflicts = preview.conflicts, - splits = preview.splits, - ambiguities = preview.ambiguities - )) - } else { - performMergeWithChangeSet( - haplogroupType = request.haplogroupType, - anchorId = None, - sourceTree = request.sourceTree, - sourceName = request.sourceName, - priorityConfig = request.priorityConfig.getOrElse(SourcePriorityConfig(List.empty)), - conflictStrategy = request.conflictStrategy.getOrElse(ConflictStrategy.HigherPriorityWins), - preloadedIndex = None, - stagingMode = request.stagingMode - ) - } - } - - /** - * Merge a subtree under a specific anchor haplogroup. - */ - def mergeSubtree(request: SubtreeMergeRequest): Future[TreeMergeResponse] = { - if (request.dryRun) { - previewMerge(MergePreviewRequest( - haplogroupType = request.haplogroupType, - anchorHaplogroupName = Some(request.anchorHaplogroupName), - sourceTree = request.sourceTree, - sourceName = request.sourceName, - priorityConfig = request.priorityConfig - )).map(preview => TreeMergeResponse( - success = true, - message = "Dry run completed successfully", - statistics = preview.statistics, - conflicts = preview.conflicts, - splits = preview.splits, - ambiguities = preview.ambiguities - )) - } else { - for { - // Find the anchor haplogroup using variant index - index <- variantMatchingService.buildVariantIndex(request.haplogroupType) - anchorOpt = index.haplogroupByName.get(request.anchorHaplogroupName.toUpperCase) - anchor = anchorOpt.getOrElse( - throw new IllegalArgumentException(s"Anchor haplogroup '${request.anchorHaplogroupName}' not found") - ) - - // Load context: Get all descendants of the anchor - descendants <- algorithmService.getDescendantsRecursive(anchor.id.get) - - // Build scoped index for the anchor and its descendants - subtreeScope = anchor +: descendants - subtreeIndex <- variantMatchingService.buildVariantIndexForScope(subtreeScope) - - // Check if the source tree root is the anchor itself - rootMatch = variantMatchingService.findExistingMatch(request.sourceTree, subtreeIndex) - rootIsAnchor = rootMatch.exists(_.id == anchor.id) - effectiveAnchorId = if (rootIsAnchor) None else anchor.id - - result <- performMergeWithChangeSet( - haplogroupType = request.haplogroupType, - anchorId = effectiveAnchorId, - sourceTree = request.sourceTree, - sourceName = request.sourceName, - priorityConfig = request.priorityConfig.getOrElse(SourcePriorityConfig(List.empty)), - conflictStrategy = request.conflictStrategy.getOrElse(ConflictStrategy.HigherPriorityWins), - preloadedIndex = Some(subtreeIndex), - stagingMode = request.stagingMode - ) - } yield result - } - } - - /** - * Preview merge without applying changes. - */ - def previewMerge(request: MergePreviewRequest): Future[MergePreviewResponse] = { - for { - existingIndex <- variantMatchingService.buildVariantIndex(request.haplogroupType) - preview <- previewService.simulateMerge( - sourceTree = request.sourceTree, - sourceName = request.sourceName, - existingIndex = existingIndex, - priorityConfig = request.priorityConfig.getOrElse(SourcePriorityConfig(List.empty)) - ) - } yield preview - } - - // ============================================================================ - // Private Implementation - // ============================================================================ - - /** - * Perform merge with change set tracking. - * - * This method: - * 1. Creates a change set for tracking - * 2. Builds callbacks that record changes to the change set - * 3. Delegates to the algorithm service - * 4. Finalizes the change set - */ - private def performMergeWithChangeSet( - haplogroupType: HaplogroupType, - anchorId: Option[Int], - sourceTree: PhyloNodeInput, - sourceName: String, - priorityConfig: SourcePriorityConfig, - conflictStrategy: ConflictStrategy, - preloadedIndex: Option[VariantIndex], - stagingMode: Boolean - ): Future[TreeMergeResponse] = { - val now = LocalDateTime.now() - val nodeCount = countNodes(sourceTree) - val enableChangeTracking = true - - logger.info(s"Starting merge for source '$sourceName' with $nodeCount nodes (stagingMode: $stagingMode)") - - for { - // Phase 0: Create change set for tracking (if enabled) - changeSetOpt <- if (enableChangeTracking) { - treeVersioningService.createChangeSet( - haplogroupType = haplogroupType, - sourceName = sourceName, - description = Some(s"Tree merge from $sourceName with $nodeCount nodes"), - createdBy = "system" - ).map(Some(_)).recover { - case e: IllegalStateException => - logger.warn(s"Could not create change set (one may already be active): ${e.getMessage}") - None - } - } else Future.successful(None) - changeSetId = changeSetOpt.flatMap(_.id) - _ = changeSetOpt.foreach(cs => logger.info(s"Created change set ${cs.id.get}: ${cs.name}")) - - // Staging mode requires a change set ID for WIP table operations - effectiveStagingMode = stagingMode && changeSetId.isDefined - _ = if (stagingMode && !effectiveStagingMode) { - logger.warn("Staging mode disabled - change set creation failed, falling back to production mode") - } - - // Build merge context - context = MergeContext( - haplogroupType = haplogroupType, - sourceName = sourceName, - priorityConfig = priorityConfig, - conflictStrategy = conflictStrategy, - timestamp = now, - changeSetId = changeSetId, - stagingMode = effectiveStagingMode - ) - - // Create callbacks for change tracking (fire-and-forget pattern) - callbacks = createCallbacks(changeSetId) - - // Delegate to algorithm service - result <- algorithmService.performMerge( - haplogroupType = haplogroupType, - anchorId = anchorId, - sourceTree = sourceTree, - sourceName = sourceName, - priorityConfig = priorityConfig, - conflictStrategy = conflictStrategy, - preloadedIndex = preloadedIndex, - context = context, - callbacks = callbacks - ) - - // Finalize change set (if one was created) - _ <- changeSetId match { - case Some(csId) => - treeVersioningService.finalizeChangeSet(csId, result.statistics, result.ambiguityReportPath).map { success => - if (success) logger.info(s"Change set $csId finalized and ready for review") - else logger.warn(s"Failed to finalize change set $csId") - }.recover { - case e: Exception => - logger.error(s"Error finalizing change set $csId: ${e.getMessage}") - } - case None => - Future.successful(()) - } - } yield result - } - - /** - * Create callbacks for change set tracking. - * - * The callbacks record changes in a fire-and-forget pattern to avoid - * slowing down the merge operation. Failures are logged but don't - * interrupt the merge. - */ - private def createCallbacks(changeSetId: Option[Int]): Option[ChangeSetCallbacks] = { - changeSetId.map { csId => - new ChangeSetCallbacks { - def recordCreate(haplogroupJson: String, parentId: Option[Int]): Unit = { - treeVersioningService.recordCreate(csId, haplogroupJson, parentId).recover { - case e: Exception => logger.warn(s"Failed to record CREATE change: ${e.getMessage}") - } - } - - def recordReparent(haplogroupId: Int, oldParentId: Option[Int], newParentId: Int): Unit = { - treeVersioningService.recordReparent(csId, haplogroupId, oldParentId, newParentId).recover { - case e: Exception => logger.warn(s"Failed to record REPARENT change: ${e.getMessage}") - } - } - } - } - } - - /** Count total nodes in a tree */ - private def countNodes(node: PhyloNodeInput): Int = { - 1 + node.children.map(countNodes).sum - } -} diff --git a/app/services/HaplogroupTreeService.scala b/app/services/HaplogroupTreeService.scala deleted file mode 100644 index 794a0c09..00000000 --- a/app/services/HaplogroupTreeService.scala +++ /dev/null @@ -1,310 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.HaplogroupType -import models.HaplogroupType.{MT, Y} -import models.api.* -import models.domain.genomics.VariantV2 -import models.domain.haplogroups.Haplogroup -import play.api.Logging -import play.api.libs.json.JsObject -import play.api.mvc.Call -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository} - -import java.time.ZoneId -import scala.concurrent.{ExecutionContext, Future} - -sealed trait RouteType -case object ApiRoute extends RouteType -case object FragmentRoute extends RouteType - -/** - * Service for building and managing haplogroup trees, providing capabilities for constructing tree responses, - * processing ancestral and descendant relationships, and querying haplogroups by variants. - */ -class HaplogroupTreeService @Inject()( - coreRepository: HaplogroupCoreRepository, - variantRepository: HaplogroupVariantRepository -)(implicit ec: ExecutionContext) extends Logging { - - /** - * Builds a TreeDTO representation for a specified haplogroup with related breadcrumbs and subtree. - */ - def buildTreeResponse(haplogroupQuery: String, haplogroupType: HaplogroupType, routeType: RouteType): Future[TreeDTO] = { - for { - resolvedHaplogroupName <- resolveHaplogroupByNameOrVariant(haplogroupQuery, haplogroupType) - rootHaplogroupOpt <- coreRepository.getHaplogroupByName(resolvedHaplogroupName, haplogroupType) - rootHaplogroup = rootHaplogroupOpt.getOrElse(throw new IllegalArgumentException(s"Haplogroup $resolvedHaplogroupName not found after variant lookup")) - - ancestors <- coreRepository.getAncestors(rootHaplogroup.id.get) - crumbs = buildCrumbs(ancestors, haplogroupType, routeType) - - subtree <- routeType match { - case ApiRoute => buildSubtree(rootHaplogroup) - case FragmentRoute => buildSubtreeWithoutVariants(rootHaplogroup) - } - - } yield TreeDTO( - name = rootHaplogroup.name, - crumbs = crumbs, - subclade = Some(subtree) - ) - } - - /** - * Resolves a haplogroup name by either direct lookup or by finding a variant. - */ - private def resolveHaplogroupByNameOrVariant(query: String, haplogroupType: HaplogroupType): Future[String] = { - coreRepository.getHaplogroupByName(query, haplogroupType).flatMap { - case Some(haplogroup) => Future.successful(haplogroup.name) - case None => - // Haplogroup not found by direct name, try searching by variant - logger.debug(s"Haplogroup '$query' not found by direct name. Attempting variant lookup.") - val normalizedQuery = normalizeVariantId(query) - variantRepository.findVariants(normalizedQuery).flatMap { - case variants if variants.nonEmpty => - // Found variants, now find their defining haplogroups - val variantIds = variants.flatMap(_.variantId).map(_.toString) - Future.sequence(variantIds.map(vid => variantRepository.findHaplogroupsByDefiningVariant(vid, haplogroupType))).map { - haplogroupLists => - val definingHaplogroups = haplogroupLists.flatten - definingHaplogroups.sortBy(_.validFrom).lastOption match { - case Some(latestHaplogroup) => - logger.info(s"Resolved variant '$query' to haplogroup '${latestHaplogroup.name}'.") - latestHaplogroup.name - case None => - logger.warn(s"Variant '$query' found, but no defining haplogroups for type $haplogroupType.") - throw new IllegalArgumentException(s"Haplogroup or variant '$query' not found") - } - } - case _ => - logger.debug(s"Variant '$query' not found.") - Future.failed(new IllegalArgumentException(s"Haplogroup or variant '$query' not found")) - } - } - } - - private def getRoute(name: String, haplogroupType: HaplogroupType, routeType: RouteType): Call = { - (haplogroupType, routeType) match { - case (Y, FragmentRoute) => controllers.routes.TreeController.yTreeFragment(Some(name)) - case (MT, FragmentRoute) => controllers.routes.TreeController.mTreeFragment(Some(name)) - case (Y, ApiRoute) => controllers.routes.TreeController.apiYTree(Some(name)) - case (MT, ApiRoute) => controllers.routes.TreeController.apiMTree(Some(name)) - } - } - - private def buildCrumbs(haplogroups: Seq[Haplogroup], haplogroupType: HaplogroupType, routeType: RouteType): List[CrumbDTO] = { - haplogroups.map { haplogroup => - CrumbDTO( - label = haplogroup.name, - url = getRoute(haplogroup.name, haplogroupType, routeType).url - ) - }.toList - } - - /** - * Recursively builds a TreeNodeDTO representation of a haplogroup and its subtree. - */ - private def buildSubtree(haplogroup: Haplogroup): Future[TreeNodeDTO] = { - for { - // Get variants for this haplogroup (now returns Seq[VariantV2]) - variants <- variantRepository.getHaplogroupVariants(haplogroup.id.get) - variantDTOs = mapVariants(variants) - - // Get and process children - children <- coreRepository.getDirectChildren(haplogroup.id.get) - childNodes <- Future.sequence(children.map(buildSubtree)) - - } yield TreeNodeDTO( - name = haplogroup.name, - variants = variantDTOs, - children = childNodes.toList, - updated = haplogroup.validFrom.atZone(ZoneId.systemDefault()), - isBackbone = haplogroup.source == "backbone", - formedYbp = haplogroup.formedYbp, - tmrcaYbp = haplogroup.tmrcaYbp - ) - } - - private def buildSubtreeWithoutVariants(haplogroup: Haplogroup): Future[TreeNodeDTO] = { - for { - variantCount <- variantRepository.countHaplogroupVariants(haplogroup.id.get) - children <- coreRepository.getDirectChildren(haplogroup.id.get) - childNodes <- Future.sequence(children.map(buildSubtreeWithoutVariants)) - } yield TreeNodeDTO( - name = haplogroup.name, - variants = Seq.empty, - variantCount = Some(variantCount), - children = childNodes.toList, - updated = haplogroup.validFrom.atZone(ZoneId.systemDefault()), - isBackbone = haplogroup.source == "backbone", - formedYbp = haplogroup.formedYbp, - tmrcaYbp = haplogroup.tmrcaYbp - ) - } - - /** - * Maps VariantV2 instances to VariantDTO. - * With VariantV2, aliases and coordinates are embedded in JSONB. - */ - private def mapVariants(variants: Seq[VariantV2]): Seq[VariantDTO] = { - variants.map { variant => - // Extract coordinates from JSONB - val coordinates = extractCoordinates(variant) - - // Extract aliases from JSONB - val aliases = extractAliases(variant) - - VariantDTO( - name = variant.displayName, - coordinates = coordinates, - variantType = variant.mutationType.dbValue, - aliases = aliases - ) - } - } - - /** - * Extract coordinates from VariantV2 JSONB into Map[String, GenomicCoordinate] - */ - private def extractCoordinates(variant: VariantV2): Map[String, GenomicCoordinate] = { - variant.coordinates.asOpt[Map[String, JsObject]].map { coordsMap => - coordsMap.flatMap { case (refGenome, coords) => - for { - contig <- (coords \ "contig").asOpt[String] - position <- (coords \ "position").asOpt[Int] - ref <- (coords \ "ref").asOpt[String] - alt <- (coords \ "alt").asOpt[String] - } yield { - val coordKey = s"$contig [${shortRefGenome(refGenome)}]" - coordKey -> GenomicCoordinate( - start = position, - stop = position, - anc = ref, - der = alt - ) - } - } - }.getOrElse(Map.empty) - } - - /** - * Extract aliases from VariantV2 JSONB into Map[String, Seq[String]] - */ - private def extractAliases(variant: VariantV2): Map[String, Seq[String]] = { - val aliases = variant.aliases - val rsIds = (aliases \ "rs_ids").asOpt[Seq[String]].getOrElse(Seq.empty) - val commonNames = (aliases \ "common_names").asOpt[Seq[String]].getOrElse(Seq.empty) - - Map( - "rsId" -> rsIds, - "commonName" -> commonNames - ).filter(_._2.nonEmpty) - } - - private def shortRefGenome(ref: String): String = ref match { - case r if r.contains("GRCh37") || r.contains("hg19") => "b37" - case r if r.contains("GRCh38") || r.contains("hg38") => "b38" - case r if r.contains("T2T") || r.contains("CHM13") || r == "hs1" => "hs1" - case other => other - } - - /** - * Builds a TreeDTO representation by constructing a haplogroup tree structure - * for the haplogroup(s) defined by the given genetic variant. - */ - def buildTreeFromVariant(variantId: String, haplogroupType: HaplogroupType, routeType: RouteType): Future[Option[TreeDTO]] = { - for { - // First find the haplogroup(s) defined by this variant - haplogroups <- variantRepository.findHaplogroupsByDefiningVariant(variantId, haplogroupType) - - // If we found any haplogroups, build the tree from the most recent one - treeOpt <- haplogroups.sortBy(_.validFrom).lastOption match { - case Some(haplogroup) => buildTreeResponse(haplogroup.name, haplogroupType, routeType).map(Some(_)) - case None => Future.successful(None) - } - } yield treeOpt - } - - /** - * Constructs a sequence of TreeDTO objects representing tree structures for all haplogroups - * associated with a specific genetic variant. - */ - def buildTreesFromVariant(variantId: String, haplogroupType: HaplogroupType, routeType: RouteType): Future[Seq[TreeDTO]] = { - for { - // Find all haplogroups that have this variant as defining - haplogroups <- variantRepository.findHaplogroupsByDefiningVariant(variantId, haplogroupType) - - // Build trees for each haplogroup - trees <- Future.sequence( - haplogroups.map(h => buildTreeResponse(h.name, haplogroupType, routeType)) - ) - } yield trees - } - - // Helper method to search by different variant identifier formats - def findVariantTrees(query: String, haplogroupType: HaplogroupType, routeType: RouteType): Future[Seq[TreeDTO]] = { - // Normalize the query - val normalizedQuery = normalizeVariantId(query) - - for { - // Search by different formats (rsID, position-based, etc) - variants <- variantRepository.findVariants(normalizedQuery) - - // Get all trees for each variant - treeLists <- Future.sequence( - variants.map(v => buildTreesFromVariant(v.variantId.get.toString, haplogroupType, routeType)) - ) - } yield treeLists.flatten - } - - /** - * Finds and retrieves haplogroup details with all associated genomic variants. - */ - def findHaplogroupWithVariants(haplogroupName: String, haplogroupType: HaplogroupType): Future[(Option[Haplogroup], Seq[VariantDTO])] = { - for { - haplogroup <- coreRepository.getHaplogroupByName(haplogroupName, haplogroupType) - variants <- findVariantsForHaplogroup(haplogroupName, haplogroupType) - } yield (haplogroup, variants) - } - - /** - * Finds and retrieves all genomic variants associated with a specified haplogroup. - * Now uses VariantV2 with embedded aliases in JSONB. - */ - def findVariantsForHaplogroup(haplogroupName: String, haplogroupType: HaplogroupType): Future[Seq[VariantDTO]] = { - for { - haplogroup <- coreRepository.getHaplogroupByName(haplogroupName, haplogroupType) - variants <- variantRepository.getHaplogroupVariants(haplogroup.flatMap(_.id).getOrElse(0)) - } yield { - val variantDTOs = mapVariants(variants) - TreeNodeDTO.sortVariants(variantDTOs) - } - } - - private def normalizeVariantId(query: String): String = { - query.trim.toLowerCase match { - case rsid if rsid.startsWith("rs") => rsid - case chrPos if chrPos.contains(":") => - val parts = chrPos.split(":") - parts.length match { - case 2 => s"${parts(0)}:${parts(1)}" // chr:pos format - case 4 => s"${parts(0)}:${parts(1)}:${parts(2)}:${parts(3)}" // chr:pos:ref:alt format - case _ => query - } - case _ => query - } - } - - /** - * Transforms a recursive tree structure of TreeNodeDTO into a flat sequence of SubcladeDTO. - */ - def mapApiResponse(root: Option[TreeNodeDTO]): Seq[SubcladeDTO] = { - def map(node: TreeNodeDTO, parent: Option[TreeNodeDTO]): Seq[SubcladeDTO] = { - SubcladeDTO(node.name, parent.map(_.name), node.variants, node.updated, node.isBackbone) +: node.children.flatMap(c => map(c, Option(node))) - } - - root.map(x => map(x, None)) - .getOrElse(Seq()) - } -} diff --git a/app/services/InstrumentProposalService.scala b/app/services/InstrumentProposalService.scala deleted file mode 100644 index fe8bcc87..00000000 --- a/app/services/InstrumentProposalService.scala +++ /dev/null @@ -1,309 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.* -import play.api.Logging -import repositories.{InstrumentObservationRepository, InstrumentProposalRepository, SequencerInstrumentRepository, SequencingLabRepository} - -import java.time.{LocalDateTime, Duration as JavaDuration} -import scala.concurrent.{ExecutionContext, Future} - -case class InstrumentConflict( - instrumentId: String, - proposals: Seq[ConflictingLab], - dominantLabName: String, - dominantRatio: Double - ) - -case class ConflictingLab( - labName: String, - observationCount: Int, - ratio: Double - ) - -case class AggregationResult( - instrumentId: String, - dominantLabName: String, - observationCount: Int, - distinctCitizenCount: Int, - confidenceScore: Double, - conflict: Option[InstrumentConflict], - proposedManufacturer: Option[String], - proposedModel: Option[String], - earliestObservation: Option[LocalDateTime], - latestObservation: Option[LocalDateTime] - ) - -@Singleton -class InstrumentProposalService @Inject()( - observationRepo: InstrumentObservationRepository, - proposalRepo: InstrumentProposalRepository, - instrumentRepo: SequencerInstrumentRepository, - labRepo: SequencingLabRepository - )(implicit ec: ExecutionContext) extends Logging { - - val MinObservationsForProposal: Int = 2 - val ReadyForReviewThreshold: Int = 5 - val AutoAcceptThreshold: Int = 10 - val MinDistinctCitizens: Int = 3 - val AgreementRatio: Double = 0.9 - - private val ObservationWeight: Double = 0.4 - private val CitizenDiversityWeight: Double = 0.3 - private val RecencyWeight: Double = 0.2 - private val ConfidenceLevelWeight: Double = 0.1 - - private val RecencyDays: Int = 30 - - def aggregateObservations(instrumentId: String): Future[Option[AggregationResult]] = { - observationRepo.findByInstrumentId(instrumentId).map { observations => - if (observations.size < MinObservationsForProposal) { - None - } else { - Some(buildAggregation(instrumentId, observations)) - } - } - } - - private[services] def buildAggregation(instrumentId: String, observations: Seq[InstrumentObservation]): AggregationResult = { - val labGroups = observations.groupBy(_.labName) - val totalObs = observations.size - - val dominantLab = labGroups.maxBy(_._2.size) - val dominantLabName = dominantLab._1 - val dominantCount = dominantLab._2.size - val dominantRatio = dominantCount.toDouble / totalObs - - val distinctCitizens = observations.map(_.biosampleRef).distinct.size - - val conflict = if (labGroups.size > 1) { - val conflictingLabs = labGroups.map { case (name, obs) => - ConflictingLab(name, obs.size, obs.size.toDouble / totalObs) - }.toSeq.sortBy(-_.observationCount) - - Some(InstrumentConflict(instrumentId, conflictingLabs, dominantLabName, dominantRatio)) - } else None - - val confidenceScore = calculateConfidence( - observations, totalObs, distinctCitizens - ) - - val platforms = dominantLab._2.flatMap(_.platform).groupBy(identity) - val models = dominantLab._2.flatMap(_.instrumentModel).groupBy(identity) - val proposedManufacturer = if (platforms.nonEmpty) Some(platforms.maxBy(_._2.size)._1) else None - val proposedModel = if (models.nonEmpty) Some(models.maxBy(_._2.size)._1) else None - - val timestamps = observations.flatMap(o => Option(o.createdAt)) - val earliest = if (timestamps.nonEmpty) Some(timestamps.min) else None - val latest = if (timestamps.nonEmpty) Some(timestamps.max) else None - - AggregationResult( - instrumentId = instrumentId, - dominantLabName = dominantLabName, - observationCount = totalObs, - distinctCitizenCount = distinctCitizens, - confidenceScore = confidenceScore, - conflict = conflict, - proposedManufacturer = proposedManufacturer, - proposedModel = proposedModel, - earliestObservation = earliest, - latestObservation = latest - ) - } - - private[services] def calculateConfidence( - observations: Seq[InstrumentObservation], - observationCount: Int, - distinctCitizens: Int - ): Double = { - val obsScore = math.min(observationCount.toDouble / AutoAcceptThreshold, 1.0) - val citizenScore = math.min(distinctCitizens.toDouble / MinDistinctCitizens, 1.0) - val recencyScore = calculateRecencyScore(observations) - val confidenceLevelScore = calculateAvgConfidenceLevel(observations) - - val raw = ObservationWeight * obsScore + - CitizenDiversityWeight * citizenScore + - RecencyWeight * recencyScore + - ConfidenceLevelWeight * confidenceLevelScore - - math.min(math.max(raw, 0.0), 1.0) - } - - private[services] def calculateRecencyScore(observations: Seq[InstrumentObservation]): Double = { - if (observations.isEmpty) return 0.0 - val now = LocalDateTime.now() - val mostRecent = observations.map(_.createdAt).max - val daysSince = JavaDuration.between(mostRecent, now).toDays - if (daysSince <= RecencyDays) 1.0 - else math.max(0.0, 1.0 - (daysSince - RecencyDays).toDouble / (RecencyDays * 3)) - } - - private[services] def calculateAvgConfidenceLevel(observations: Seq[InstrumentObservation]): Double = { - if (observations.isEmpty) return 0.0 - val weights = observations.map { obs => - obs.confidence match { - case ObservationConfidence.Known => 1.0 - case ObservationConfidence.Inferred => 0.7 - case ObservationConfidence.Guessed => 0.3 - } - } - weights.sum / weights.size - } - - def createOrUpdateProposal(instrumentId: String): Future[Option[InstrumentAssociationProposal]] = { - for { - aggregationOpt <- aggregateObservations(instrumentId) - result <- aggregationOpt match { - case None => Future.successful(None) - case Some(agg) => - if (agg.conflict.exists(_.dominantRatio < 0.7)) { - logger.warn(s"Instrument $instrumentId has conflicting lab associations " + - s"(dominant ratio: ${agg.conflict.map(_.dominantRatio).getOrElse(0.0)})") - } - upsertProposal(agg) - } - } yield result - } - - private def upsertProposal(agg: AggregationResult): Future[Option[InstrumentAssociationProposal]] = { - proposalRepo.findActiveByInstrumentId(agg.instrumentId).flatMap { - case Some(existing) => - val newStatus = evaluateThreshold(agg, existing.status) - val updated = existing.copy( - proposedLabName = agg.dominantLabName, - proposedManufacturer = agg.proposedManufacturer, - proposedModel = agg.proposedModel, - observationCount = agg.observationCount, - distinctCitizenCount = agg.distinctCitizenCount, - confidenceScore = agg.confidenceScore, - earliestObservation = agg.earliestObservation, - latestObservation = agg.latestObservation, - status = newStatus - ) - proposalRepo.update(updated).map(_ => Some(updated)) - - case None => - val status = if (agg.observationCount >= ReadyForReviewThreshold) ProposalStatus.ReadyForReview - else ProposalStatus.Pending - val proposal = InstrumentAssociationProposal( - instrumentId = agg.instrumentId, - proposedLabName = agg.dominantLabName, - proposedManufacturer = agg.proposedManufacturer, - proposedModel = agg.proposedModel, - observationCount = agg.observationCount, - distinctCitizenCount = agg.distinctCitizenCount, - confidenceScore = agg.confidenceScore, - earliestObservation = agg.earliestObservation, - latestObservation = agg.latestObservation, - status = status - ) - proposalRepo.create(proposal).map(Some(_)) - } - } - - private[services] def evaluateThreshold(agg: AggregationResult, currentStatus: ProposalStatus): ProposalStatus = { - currentStatus match { - case ProposalStatus.UnderReview | ProposalStatus.Accepted | ProposalStatus.Rejected | ProposalStatus.Superseded => - currentStatus - case _ => - if (agg.observationCount >= ReadyForReviewThreshold) ProposalStatus.ReadyForReview - else ProposalStatus.Pending - } - } - - def evaluateAllPendingProposals(): Future[Seq[InstrumentAssociationProposal]] = { - proposalRepo.findPending().flatMap { proposals => - Future.sequence(proposals.map { proposal => - aggregateObservations(proposal.instrumentId).flatMap { - case Some(agg) => - val newStatus = evaluateThreshold(agg, proposal.status) - if (newStatus != proposal.status || agg.observationCount != proposal.observationCount) { - val updated = proposal.copy( - observationCount = agg.observationCount, - distinctCitizenCount = agg.distinctCitizenCount, - confidenceScore = agg.confidenceScore, - status = newStatus - ) - proposalRepo.update(updated).map(_ => updated) - } else { - Future.successful(proposal) - } - case None => Future.successful(proposal) - } - }) - } - } - - def acceptProposal( - proposalId: Int, - curatorId: String, - labName: String, - manufacturer: Option[String], - model: Option[String], - notes: Option[String] - ): Future[Either[String, InstrumentAssociationProposal]] = { - proposalRepo.findById(proposalId).flatMap { - case None => - Future.successful(Left(s"Proposal $proposalId not found")) - case Some(proposal) if proposal.status == ProposalStatus.Accepted => - Future.successful(Left(s"Proposal $proposalId is already accepted")) - case Some(proposal) if proposal.status == ProposalStatus.Rejected => - Future.successful(Left(s"Proposal $proposalId is already rejected")) - case Some(proposal) => - for { - assocResponse <- instrumentRepo.associateLabWithInstrument( - proposal.instrumentId, labName, manufacturer, model - ) - accepted = proposal.copy( - status = ProposalStatus.Accepted, - reviewedAt = Some(LocalDateTime.now()), - reviewedBy = Some(curatorId), - reviewNotes = notes, - acceptedLabId = Some(assocResponse.labId) - ) - _ <- proposalRepo.update(accepted) - } yield Right(accepted) - } - } - - def rejectProposal( - proposalId: Int, - curatorId: String, - reason: String - ): Future[Either[String, InstrumentAssociationProposal]] = { - proposalRepo.findById(proposalId).flatMap { - case None => - Future.successful(Left(s"Proposal $proposalId not found")) - case Some(proposal) if proposal.status == ProposalStatus.Accepted => - Future.successful(Left(s"Proposal $proposalId is already accepted")) - case Some(proposal) if proposal.status == ProposalStatus.Rejected => - Future.successful(Left(s"Proposal $proposalId is already rejected")) - case Some(proposal) => - val rejected = proposal.copy( - status = ProposalStatus.Rejected, - reviewedAt = Some(LocalDateTime.now()), - reviewedBy = Some(curatorId), - reviewNotes = Some(reason) - ) - proposalRepo.update(rejected).map(_ => Right(rejected)) - } - } - - def detectConflicts(): Future[Seq[InstrumentConflict]] = { - proposalRepo.findPending().flatMap { proposals => - Future.sequence(proposals.map { proposal => - observationRepo.findByInstrumentId(proposal.instrumentId).map { observations => - val labGroups = observations.groupBy(_.labName) - if (labGroups.size > 1) { - val total = observations.size - val conflictingLabs = labGroups.map { case (name, obs) => - ConflictingLab(name, obs.size, obs.size.toDouble / total) - }.toSeq.sortBy(-_.observationCount) - val dominant = conflictingLabs.head - Some(InstrumentConflict(proposal.instrumentId, conflictingLabs, dominant.labName, dominant.ratio)) - } else None - } - }).map(_.flatten) - } - } -} diff --git a/app/services/LoggingEmailService.scala b/app/services/LoggingEmailService.scala deleted file mode 100644 index 9f98b4a0..00000000 --- a/app/services/LoggingEmailService.scala +++ /dev/null @@ -1,43 +0,0 @@ -package services - -import play.api.{Logger, Logging} - -import javax.inject.Singleton - -/** - * A concrete implementation of the EmailService trait that logs email details instead of actually sending emails. - * This class is useful for debugging or testing purposes where sending emails is not required. - * - * The email details such as recipients, sender, subject, and body are logged using the provided logger from the Logging trait. - */ -@Singleton -class LoggingEmailService extends EmailService with Logging { - - /** - * Sends an email with the specified details and logs the email information. - * - * @param to the list of recipient email addresses - * @param from the sender's email address - * @param subject the subject of the email - * @param body the body content of the email - * @return Either a String containing an error message in case of failure, or a Unit upon successful email delivery - */ - def sendEmail( - to: Seq[String], - from: String, - subject: String, - body: String - ): Either[String, Unit] = { - logger.info( - s""" - |New email: - |To: ${to.mkString(", ")} - |From: $from - |Subject: $subject - | - |$body - |""".stripMargin - ) - Right(()) - } -} \ No newline at end of file diff --git a/app/services/LoginRateLimiter.scala b/app/services/LoginRateLimiter.scala deleted file mode 100644 index b902745a..00000000 --- a/app/services/LoginRateLimiter.scala +++ /dev/null @@ -1,45 +0,0 @@ -package services - -import java.time.Instant -import java.util.concurrent.ConcurrentHashMap -import javax.inject.Singleton - -/** - * Simple in-memory rate limiter for login attempts. - * Tracks failed attempts per IP and blocks after threshold. - */ -@Singleton -class LoginRateLimiter { - - private val MaxAttempts = 10 - private val WindowSeconds = 900L // 15 minutes - private val attempts = new ConcurrentHashMap[String, (Int, Instant)]() - - /** Returns true if the IP is allowed to attempt login. */ - def isAllowed(ip: String): Boolean = { - pruneExpired() - val entry = attempts.get(ip) - entry == null || entry._1 < MaxAttempts - } - - /** Record a failed login attempt for the given IP. */ - def recordFailure(ip: String): Unit = { - attempts.compute(ip, (_, existing) => { - if (existing == null || existing._2.plusSeconds(WindowSeconds).isBefore(Instant.now())) { - (1, Instant.now()) - } else { - (existing._1 + 1, existing._2) - } - }) - } - - /** Clear attempts for an IP on successful login. */ - def recordSuccess(ip: String): Unit = { - attempts.remove(ip) - } - - private def pruneExpired(): Unit = { - val cutoff = Instant.now().minusSeconds(WindowSeconds) - attempts.entrySet().removeIf(e => e.getValue._2.isBefore(cutoff)) - } -} diff --git a/app/services/MemberVisibilityService.scala b/app/services/MemberVisibilityService.scala deleted file mode 100644 index 9502c76a..00000000 --- a/app/services/MemberVisibilityService.scala +++ /dev/null @@ -1,177 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.* -import play.api.Logging -import repositories.{GroupProjectMemberRepository, GroupProjectRepository} - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class MemberVisibilityService @Inject()( - projectRepo: GroupProjectRepository, - memberRepo: GroupProjectMemberRepository - )(implicit ec: ExecutionContext) extends Logging { - - def updateVisibility( - memberId: Int, - requesterDid: String, - newVisibility: MemberVisibility - ): Future[Either[String, GroupProjectMember]] = { - memberRepo.findById(memberId).flatMap { - case None => Future.successful(Left("Membership not found")) - case Some(member) if member.citizenDid != requesterDid => - Future.successful(Left("Only the member can update their own visibility")) - case Some(member) if member.status != "ACTIVE" => - Future.successful(Left("Can only update visibility for active memberships")) - case Some(member) => - validateVisibility(newVisibility) match { - case Some(error) => Future.successful(Left(error)) - case None => - val updated = member.copy(visibility = newVisibility, updatedAt = LocalDateTime.now()) - memberRepo.update(updated).map { - case true => Right(updated) - case false => Left("Failed to update visibility") - } - } - } - } - - def getEffectiveVisibility(memberId: Int): Future[Option[EffectiveVisibility]] = { - memberRepo.findById(memberId).flatMap { - case None => Future.successful(None) - case Some(member) => - projectRepo.findById(member.groupProjectId).map { - case None => None - case Some(project) => Some(EffectiveVisibility.compute(project, member.visibility)) - } - } - } - - def getFilteredMemberView( - memberId: Int, - viewerDid: String, - haplogroup: Option[String] = None, - lineagePath: Option[Seq[String]] = None, - privateVariantCount: Option[Int] = None, - ancestor: AncestorData = AncestorData() - ): Future[Option[FilteredMemberView]] = { - memberRepo.findById(memberId).flatMap { - case None => Future.successful(None) - case Some(member) if member.status != "ACTIVE" => Future.successful(None) - case Some(member) => - projectRepo.findById(member.groupProjectId).flatMap { - case None => Future.successful(None) - case Some(project) => - val effective = EffectiveVisibility.compute(project, member.visibility) - resolveViewerContext(project, member.groupProjectId, viewerDid).map { context => - Some(buildFilteredView(member, effective, context, haplogroup, lineagePath, privateVariantCount, ancestor)) - } - } - } - } - - def getFilteredMembersForProject( - projectId: Int, - viewerDid: String, - memberData: Map[Int, MemberSupplementalData] - ): Future[Either[String, Seq[FilteredMemberView]]] = { - projectRepo.findById(projectId).flatMap { - case None => Future.successful(Left("Project not found")) - case Some(project) => - for { - viewerContext <- resolveViewerContext(project, projectId, viewerDid) - members <- memberRepo.findByProjectAndStatus(projectId, "ACTIVE") - } yield { - if (!canViewMemberList(project, viewerContext)) - Left("Insufficient permissions to view member list") - else { - val views = members.flatMap { member => - val effective = EffectiveVisibility.compute(project, member.visibility) - if (!effective.showInMemberList && !viewerContext.isAdmin) None - else { - val data = memberData.getOrElse(member.id.getOrElse(-1), MemberSupplementalData()) - Some(buildFilteredView(member, effective, viewerContext, - data.haplogroup, data.lineagePath, data.privateVariantCount, data.ancestor)) - } - } - Right(views) - } - } - } - } - - private def buildFilteredView( - member: GroupProjectMember, - effective: EffectiveVisibility, - context: ViewerContext, - haplogroup: Option[String], - lineagePath: Option[Seq[String]], - privateVariantCount: Option[Int], - ancestor: AncestorData - ): FilteredMemberView = { - FilteredMemberView( - memberId = member.id.getOrElse(0), - kitId = member.kitId.orElse(Some(s"KIT-${member.id.getOrElse(0)}")), - displayName = if (effective.showDisplayName) member.displayName else None, - role = member.role, - contributionLevel = member.contributionLevel, - terminalHaplogroup = if (effective.shareTerminalHaplogroup) haplogroup else None, - lineagePath = if (effective.shareFullLineagePath) lineagePath else None, - privateVariantCount = if (effective.sharePrivateVariants) privateVariantCount else None, - ancestor = AncestorData.filter(ancestor, effective.ancestorVisibility), - strVisibility = effective.strVisibility, - allowDirectContact = effective.allowDirectContact && context.isMember, - subgroupIds = member.subgroupIds, - joinedAt = member.joinedAt - ) - } - - private def canViewMemberList(project: GroupProject, context: ViewerContext): Boolean = { - project.memberListVisibility match { - case "PUBLIC" => true - case "MEMBERS_ONLY" => context.isMember - case "ADMINS_ONLY" => context.isAdmin - case "HIDDEN" => context.isAdmin && context.role == "ADMIN" - case _ => false - } - } - - private[services] def resolveViewerContext(project: GroupProject, projectId: Int, viewerDid: String): Future[ViewerContext] = { - memberRepo.findByProjectAndCitizen(projectId, viewerDid).map { - case Some(m) if m.status == "ACTIVE" => - ViewerContext( - isMember = true, - isAdmin = Set("ADMIN", "CO_ADMIN").contains(m.role), - role = m.role, - viewerDid = viewerDid - ) - case _ => - ViewerContext(isMember = false, isAdmin = false, role = "NONE", viewerDid = viewerDid) - } - } - - private def validateVisibility(v: MemberVisibility): Option[String] = { - if (!MemberVisibility.ValidAncestorVisibility.contains(v.ancestorVisibility)) - Some(s"Invalid ancestor visibility: ${v.ancestorVisibility}") - else if (!MemberVisibility.ValidStrVisibility.contains(v.strVisibility)) - Some(s"Invalid STR visibility: ${v.strVisibility}") - else - None - } -} - -case class ViewerContext( - isMember: Boolean, - isAdmin: Boolean, - role: String, - viewerDid: String - ) - -case class MemberSupplementalData( - haplogroup: Option[String] = None, - lineagePath: Option[Seq[String]] = None, - privateVariantCount: Option[Int] = None, - ancestor: AncestorData = AncestorData() - ) diff --git a/app/services/OpenAlexService.scala b/app/services/OpenAlexService.scala deleted file mode 100644 index fb0524ed..00000000 --- a/app/services/OpenAlexService.scala +++ /dev/null @@ -1,93 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.domain.publications.Publication -import play.api.libs.json.{JsArray, JsValue} -import play.api.libs.ws.WSClient -import play.api.{Configuration, Logging} -import services.mappers.OpenAlexMapper - -import java.time.LocalDate -import java.time.format.DateTimeParseException -import javax.inject.Singleton -import scala.concurrent.{ExecutionContext, Future} - - -/** - * A service that interacts with OpenAlex API to fetch comprehensive publication data. - * - * @param configuration The application's configuration settings, used to retrieve the 'openalex.mailToEmail' value. - * @param ws The WSClient instance for making HTTP requests. - * @param ec An ExecutionContext, which provides context for executing code in a separate thread or reactor. - */ -@Singleton -class OpenAlexService @Inject()( - configuration: Configuration, - ws: WSClient) - (implicit ec: ExecutionContext) extends Logging { - - private val mailToEmail: String = configuration.get[String]("openalex.mailToEmail") - private val openAlexBaseUrl: String = "https://api.openalex.org" - - /** - * Fetches comprehensive publication data from OpenAlex for a given DOI - * by making a single direct API call and parsing the JSON response. - * - * @param doi The Digital Object Identifier of the publication. - * @return A Future containing an Option[Publication]. Returns None if the publication cannot be found or an error occurs. - */ - def fetchAndMapPublicationByDOI(doi: String): Future[Option[Publication]] = { - // Single API URL for the full work details - val apiUrl = s"$openAlexBaseUrl/works/https://doi.org/$doi?mailto=$mailToEmail" - - logger.info(s"Fetching: $apiUrl") - - ws.url(apiUrl).get().map { response => - if (response.status == 200) { - logger.debug(s"Successfully fetched JSON for DOI '$doi'") - Some(OpenAlexMapper.jsonToPublication(response.json, doi)) - } else { - logger.warn(s"OpenAlex API returned non-200 status for DOI '$doi': ${response.status}, Body: ${response.body}") - None - } - }.recover { - case e: Exception => - logger.error(s"Exception during OpenAlex API call for DOI '$doi': ${e.getMessage}", e) - None - } - } - - /** - * Searches for works in OpenAlex matching the given query string and filters. - * - * @param searchQuery The search term (maps to the 'search' query parameter). - * @param filters Optional filters (map of field -> value). - * @return A Future containing a sequence of PublicationCandidate objects. - */ - def searchWorks(searchQuery: String, filters: Map[String, String] = Map.empty): Future[Seq[models.domain.publications.PublicationCandidate]] = { - val apiUrl = s"$openAlexBaseUrl/works" - - val queryParams = filters.toSeq :+ ("search" -> searchQuery) :+ ("mailto" -> mailToEmail) - - logger.info(s"Searching OpenAlex: $apiUrl with params $queryParams") - - ws.url(apiUrl).withQueryStringParameters(queryParams*).get().map { response => - if (response.status == 200) { - (response.json \ "results").asOpt[Seq[JsValue]] match { - case Some(results) => - results.map(OpenAlexMapper.jsonToPublicationCandidate) - case None => - logger.warn("OpenAlex search response did not contain 'results' array.") - Seq.empty - } - } else { - logger.warn(s"OpenAlex search failed: ${response.status}, Body: ${response.body}") - Seq.empty - } - }.recover { - case e: Exception => - logger.error(s"Exception during OpenAlex search: ${e.getMessage}", e) - Seq.empty - } - } -} diff --git a/app/services/PDSRegistrationService.scala b/app/services/PDSRegistrationService.scala deleted file mode 100644 index e24038a4..00000000 --- a/app/services/PDSRegistrationService.scala +++ /dev/null @@ -1,109 +0,0 @@ -package services - -import models.PDSRegistration -import play.api.Logging -import repositories.PDSRegistrationRepository - -import java.time.ZonedDateTime -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class PDSRegistrationService @Inject()( - atProtocolClient: ATProtocolClient, - pdsRegistrationRepository: PDSRegistrationRepository - )(implicit ec: ExecutionContext) extends Logging { - - /** - * Registers a new PDS, performing server-side verification with the AT Protocol. - * - * @param did The Decentralized Identifier (DID) of the PDS. - * @param handle The handle associated with the PDS. - * @param pdsUrl The base URL of the PDS. - * @param rToken The AT Protocol authentication token provided by the Researcher Edge App. - * @return A Future indicating success or failure of the registration. - */ - def registerPDS(did: String, handle: String, pdsUrl: String, rToken: String): Future[Either[String, PDSRegistration]] = { - // 1. Check if PDS already registered - pdsRegistrationRepository.findByDid(did).flatMap { - case Some(existingRegistration) => - Future.successful(Left(s"PDS with DID $did is already registered.")) - case None => - // 2. Perform server-side verification with the AT Protocol - atProtocolClient.getLatestCommit(pdsUrl, did, rToken).flatMap { - case Some(commitResponse) => - // 3. Validation: Confirm DID is valid and PDS is responsive (implicitly done by successful commit fetch) - // 4. Write New DID Record - val newRegistration = PDSRegistration( - did = did, - pdsUrl = pdsUrl, - handle = handle, - lastCommitCid = Some(commitResponse.cid), - lastCommitSeq = Some(commitResponse.seq), - cursor = 0L, - createdAt = ZonedDateTime.now(), - updatedAt = ZonedDateTime.now() - ) - pdsRegistrationRepository.create(newRegistration).map { res => - logger.info(s"Internal Notification: PDS Registered successfully for DID $did. Rust Sync Cluster will detect this via DB poll.") - Right(res) - } - case None => - Future.successful(Left(s"Failed to verify PDS $pdsUrl for DID $did. Could not get latest commit.")) - } - } recover { - case e: Exception => - logger.error(s"Error during PDS registration for DID $did: ${e.getMessage}", e) - Left("An unexpected error occurred during PDS registration.") - } - } - - /** - * Retrieves a PDS registration by its DID. - */ - def getPDSByDid(did: String): Future[Option[PDSRegistration]] = { - pdsRegistrationRepository.findByDid(did) - } - - /** - * Retrieves a PDS registration by its handle. - */ - def getPDSByHandle(handle: String): Future[Option[PDSRegistration]] = { - pdsRegistrationRepository.findByHandle(handle) - } - - /** - * Lists all registered PDS entries. - */ - def listAllPDS(): Future[Seq[PDSRegistration]] = { - pdsRegistrationRepository.listAll - } - - /** - * Updates the cursor (last commit CID and sequence) for a registered PDS. - */ - def updatePDSCursor(did: String, lastCommitCid: String, newCursor: Long): Future[Either[String, Unit]] = { - pdsRegistrationRepository.updateCursor(did, lastCommitCid, newCursor).map { affectedRows => - if (affectedRows > 0) Right(()) - else Left(s"PDS with DID $did not found or cursor update failed.") - } recover { - case e: Exception => - logger.error(s"Error updating PDS cursor for DID $did: ${e.getMessage}", e) - Left("An unexpected error occurred during PDS cursor update.") - } - } - - /** - * Deletes a PDS registration. - */ - def deletePDS(did: String): Future[Either[String, Unit]] = { - pdsRegistrationRepository.delete(did).map { affectedRows => - if (affectedRows > 0) Right(()) - else Left(s"PDS with DID $did not found or deletion failed.") - } recover { - case e: Exception => - logger.error(s"Error deleting PDS for DID $did: ${e.getMessage}", e) - Left("An unexpected error occurred during PDS deletion.") - } - } -} diff --git a/app/services/PatronageService.scala b/app/services/PatronageService.scala deleted file mode 100644 index 0df53fb4..00000000 --- a/app/services/PatronageService.scala +++ /dev/null @@ -1,143 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.billing.{PatronSubscription, PatronSummary, PatronTier} -import play.api.Logging -import repositories.PatronSubscriptionRepository - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class PatronageService @Inject()( - subscriptionRepo: PatronSubscriptionRepository - )(implicit ec: ExecutionContext) extends Logging { - - def createSubscription( - userId: UUID, - tier: String, - billingInterval: String, - paymentProvider: String, - providerSubscriptionId: Option[String] = None, - providerCustomerId: Option[String] = None - ): Future[Either[String, PatronSubscription]] = { - if (!PatronSubscription.ValidTiers.contains(tier)) - return Future.successful(Left(s"Invalid patron tier: $tier")) - if (!PatronSubscription.ValidIntervals.contains(billingInterval)) - return Future.successful(Left(s"Invalid billing interval: $billingInterval")) - if (!PatronSubscription.ValidProviders.contains(paymentProvider)) - return Future.successful(Left(s"Invalid payment provider: $paymentProvider")) - - subscriptionRepo.findActiveByUserId(userId).flatMap { - case Some(existing) => - Future.successful(Left(s"User already has an active subscription (tier: ${existing.patronTier})")) - case None => - val amountCents = PatronTier.amountCents(tier, billingInterval) - val now = LocalDateTime.now() - val periodEnd = billingInterval match { - case "MONTHLY" => now.plusMonths(1) - case "YEARLY" => now.plusYears(1) - } - - val subscription = PatronSubscription( - userId = userId, - patronTier = tier, - paymentProvider = paymentProvider, - providerSubscriptionId = providerSubscriptionId, - providerCustomerId = providerCustomerId, - amountCents = amountCents, - billingInterval = billingInterval, - currentPeriodStart = Some(now), - currentPeriodEnd = Some(periodEnd) - ) - subscriptionRepo.create(subscription).map(Right(_)) - } - } - - def cancelSubscription(subscriptionId: Int, userId: UUID): Future[Either[String, Boolean]] = { - subscriptionRepo.findById(subscriptionId).flatMap { - case None => - Future.successful(Left("Subscription not found")) - case Some(sub) if sub.userId != userId => - Future.successful(Left("Not authorized to cancel this subscription")) - case Some(sub) if sub.status != "ACTIVE" => - Future.successful(Left(s"Cannot cancel subscription with status: ${sub.status}")) - case Some(_) => - subscriptionRepo.cancel(subscriptionId).map(Right(_)) - } - } - - def getActiveSubscription(userId: UUID): Future[Option[PatronSubscription]] = - subscriptionRepo.findActiveByUserId(userId) - - def getUserSubscriptions(userId: UUID): Future[Seq[PatronSubscription]] = - subscriptionRepo.findByUserId(userId) - - def handlePaymentWebhook(event: WebhookEvent, provider: String): Future[Either[String, Boolean]] = { - subscriptionRepo.findByProviderSubscriptionId(provider, event.providerSubscriptionId).flatMap { - case None => - logger.warn(s"Webhook for unknown subscription: ${event.providerSubscriptionId}") - Future.successful(Left("Subscription not found")) - case Some(sub) => - event.eventType match { - case "subscription.renewed" | "invoice.paid" => - val start = event.periodStart.getOrElse(LocalDateTime.now()) - val end = event.periodEnd.getOrElse( - if (sub.billingInterval == "MONTHLY") start.plusMonths(1) else start.plusYears(1) - ) - for { - _ <- subscriptionRepo.updateStatus(sub.id.get, "ACTIVE") - _ <- subscriptionRepo.updatePeriod(sub.id.get, start, end) - } yield Right(true) - - case "subscription.cancelled" | "subscription.deleted" => - subscriptionRepo.cancel(sub.id.get).map(Right(_)) - - case "invoice.payment_failed" => - subscriptionRepo.updateStatus(sub.id.get, "PAST_DUE").map(Right(_)) - - case other => - logger.debug(s"Unhandled webhook event type: $other") - Future.successful(Right(true)) - } - } - } - - def expireOverdueSubscriptions(): Future[Int] = { - subscriptionRepo.findByStatus("ACTIVE").flatMap { active => - val now = LocalDateTime.now() - val expired = active.filter { sub => - sub.currentPeriodEnd.exists(_.isBefore(now)) - } - - Future.sequence(expired.map { sub => - subscriptionRepo.updateStatus(sub.id.get, "EXPIRED") - }).map(_.count(_ == true)) - } - } - - def getPatronSummary: Future[PatronSummary] = { - for { - activeCount <- subscriptionRepo.countActive() - tierCounts <- subscriptionRepo.countByTier() - } yield { - val monthlyRevenue = tierCounts.map { case (tier, count) => - val monthlyAmount = PatronTier.amountCents(tier, "MONTHLY") - monthlyAmount * count - }.sum - - PatronSummary( - activePatrons = activeCount, - tierCounts = tierCounts, - monthlyRevenueCents = monthlyRevenue - ) - } - } - - def isPatron(userId: UUID): Future[Boolean] = - subscriptionRepo.findActiveByUserId(userId).map(_.isDefined) - - def getPatronTier(userId: UUID): Future[Option[String]] = - subscriptionRepo.findActiveByUserId(userId).map(_.map(_.patronTier)) -} diff --git a/app/services/PaymentGateway.scala b/app/services/PaymentGateway.scala deleted file mode 100644 index bf74f686..00000000 --- a/app/services/PaymentGateway.scala +++ /dev/null @@ -1,35 +0,0 @@ -package services - -import scala.concurrent.Future - -case class CheckoutRequest( - tier: String, - billingInterval: String, - amountCents: Int, - currency: String = "USD", - customerEmail: Option[String] = None, - successUrl: String, - cancelUrl: String - ) - -case class CheckoutResult( - sessionId: String, - checkoutUrl: String, - providerCustomerId: Option[String] = None - ) - -case class WebhookEvent( - eventType: String, - providerSubscriptionId: String, - providerCustomerId: Option[String] = None, - status: Option[String] = None, - periodStart: Option[java.time.LocalDateTime] = None, - periodEnd: Option[java.time.LocalDateTime] = None - ) - -trait PaymentGateway { - def providerName: String - def createCheckoutSession(request: CheckoutRequest): Future[CheckoutResult] - def cancelSubscription(providerSubscriptionId: String): Future[Boolean] - def parseWebhookEvent(payload: String, signature: String): Future[Option[WebhookEvent]] -} diff --git a/app/services/PdsFleetService.scala b/app/services/PdsFleetService.scala deleted file mode 100644 index be6322e3..00000000 --- a/app/services/PdsFleetService.scala +++ /dev/null @@ -1,176 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.pds.* -import play.api.Logging -import play.api.libs.json.{JsValue, Json} -import repositories.{PdsFleetConfigRepository, PdsHeartbeatLogRepository, PdsNodeRepository} - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -case class HeartbeatRequest( - did: String, - pdsUrl: String, - handle: Option[String] = None, - nodeName: Option[String] = None, - softwareVersion: Option[String] = None, - status: String = "ONLINE", - capabilities: Option[JsValue] = None, - loadMetrics: Option[JsValue] = None, - processingQueueSize: Option[Int] = None, - lastCommitCid: Option[String] = None, - lastCommitRev: Option[String] = None, - ipAddress: Option[String] = None, - osInfo: Option[String] = None, - errorMessage: Option[String] = None - ) - -@Singleton -class PdsFleetService @Inject()( - nodeRepo: PdsNodeRepository, - heartbeatRepo: PdsHeartbeatLogRepository, - configRepo: PdsFleetConfigRepository - )(implicit ec: ExecutionContext) extends Logging { - - def processHeartbeat(request: HeartbeatRequest): Future[Either[String, PdsNode]] = { - if (!PdsNode.ValidStatuses.contains(request.status)) - return Future.successful(Left(s"Invalid status: ${request.status}")) - - nodeRepo.findByDid(request.did).flatMap { - case Some(existing) => - for { - _ <- nodeRepo.updateHeartbeat( - existing.id.get, request.status, request.softwareVersion, - request.lastCommitCid, request.lastCommitRev - ) - _ <- updateNodeFields(existing, request) - _ <- recordHeartbeat(existing.id.get, request) - updated <- nodeRepo.findById(existing.id.get) - } yield Right(updated.getOrElse(existing)) - - case None => - val newNode = PdsNode( - did = request.did, - pdsUrl = request.pdsUrl, - handle = request.handle, - nodeName = request.nodeName, - softwareVersion = request.softwareVersion, - status = request.status, - capabilities = request.capabilities.getOrElse(Json.obj()), - lastHeartbeat = Some(LocalDateTime.now()), - lastCommitCid = request.lastCommitCid, - lastCommitRev = request.lastCommitRev, - ipAddress = request.ipAddress, - osInfo = request.osInfo - ) - for { - created <- nodeRepo.create(newNode) - _ <- recordHeartbeat(created.id.get, request) - } yield Right(created) - } - } - - def getNode(did: String): Future[Option[PdsNode]] = - nodeRepo.findByDid(did) - - def getNodeById(id: Int): Future[Option[PdsNode]] = - nodeRepo.findById(id) - - def listNodes(statusFilter: Option[String] = None): Future[Seq[PdsNode]] = - statusFilter match { - case Some(status) => nodeRepo.findByStatus(status) - case None => nodeRepo.findAll() - } - - def getFleetSummary: Future[PdsFleetSummary] = { - for { - statusCounts <- nodeRepo.countByStatus() - targetVersion <- configRepo.findByKey("target_software_version") - allNodes <- nodeRepo.findAll() - } yield { - val target = targetVersion.map(_.configValue) - val onTarget = target match { - case Some(tv) => allNodes.count(_.softwareVersion.contains(tv)) - case None => 0 - } - val total = statusCounts.values.sum - - PdsFleetSummary( - totalNodes = total, - onlineNodes = statusCounts.getOrElse("ONLINE", 0), - offlineNodes = statusCounts.getOrElse("OFFLINE", 0), - busyNodes = statusCounts.getOrElse("BUSY", 0), - errorNodes = statusCounts.getOrElse("ERROR", 0), - unknownNodes = statusCounts.getOrElse("UNKNOWN", 0), - targetVersion = target, - nodesOnTargetVersion = onTarget, - nodesOutdated = total - onTarget - ) - } - } - - def markStaleNodesOffline(): Future[Int] = { - configRepo.findByKey("offline_threshold_seconds").flatMap { configOpt => - val thresholdSeconds = configOpt.map(_.configValue.toLong).getOrElse(900L) - val cutoff = LocalDateTime.now().minusSeconds(thresholdSeconds) - nodeRepo.findStaleNodes(cutoff).flatMap { staleNodes => - Future.sequence(staleNodes.flatMap(_.id).map { nodeId => - nodeRepo.updateStatus(nodeId, "OFFLINE") - }).map(_.count(identity)) - } - } - } - - def getNodeHeartbeatHistory(nodeId: Int, limit: Int = 100): Future[Seq[PdsHeartbeatLog]] = - heartbeatRepo.findByNode(nodeId, limit) - - def getFleetConfig: Future[Seq[PdsFleetConfig]] = - configRepo.findAll() - - def updateFleetConfig(key: String, value: String, updatedBy: Option[String] = None): Future[Either[String, Boolean]] = { - configRepo.upsert(key, value, updatedBy).map { success => - if (success) Right(true) - else Left("Failed to update fleet config") - } - } - - def removeNode(did: String): Future[Either[String, Boolean]] = { - nodeRepo.findByDid(did).flatMap { - case None => Future.successful(Left("Node not found")) - case Some(node) => - nodeRepo.delete(node.id.get).map(Right(_)) - } - } - - def pruneHeartbeatLogs(retentionDays: Int = 30): Future[Int] = { - val cutoff = LocalDateTime.now().minusDays(retentionDays) - heartbeatRepo.deleteOlderThan(cutoff) - } - - private def updateNodeFields(existing: PdsNode, request: HeartbeatRequest): Future[Boolean] = { - val updated = existing.copy( - pdsUrl = request.pdsUrl, - handle = request.handle.orElse(existing.handle), - nodeName = request.nodeName.orElse(existing.nodeName), - softwareVersion = request.softwareVersion.orElse(existing.softwareVersion), - status = request.status, - capabilities = request.capabilities.getOrElse(existing.capabilities), - ipAddress = request.ipAddress.orElse(existing.ipAddress), - osInfo = request.osInfo.orElse(existing.osInfo) - ) - nodeRepo.update(updated) - } - - private def recordHeartbeat(nodeId: Int, request: HeartbeatRequest): Future[PdsHeartbeatLog] = { - val log = PdsHeartbeatLog( - pdsNodeId = nodeId, - status = request.status, - softwareVersion = request.softwareVersion, - loadMetrics = request.loadMetrics, - processingQueueSize = request.processingQueueSize, - errorMessage = request.errorMessage - ) - heartbeatRepo.create(log) - } -} diff --git a/app/services/PdsSignatureVerifier.scala b/app/services/PdsSignatureVerifier.scala deleted file mode 100644 index 568eb085..00000000 --- a/app/services/PdsSignatureVerifier.scala +++ /dev/null @@ -1,171 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import play.api.{Configuration, Logging} -import utils.Base58 - -import java.nio.charset.StandardCharsets -import java.security.* -import java.security.spec.X509EncodedKeySpec -import java.time.{Duration, Instant} -import java.util.Base64 -import java.util.concurrent.ConcurrentHashMap -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class PdsSignatureVerifier @Inject()( - atProtoClient: ATProtocolClient, - configuration: Configuration - )(implicit ec: ExecutionContext) extends Logging { - - private val timestampWindowSeconds: Long = - configuration.getOptional[Long]("pds.auth.timestamp.window.seconds").getOrElse(300) - - private val usedNonces = new ConcurrentHashMap[String, Instant]() - - def isTimestampValid(timestamp: String): Boolean = { - try { - val requestTime = Instant.parse(timestamp) - val now = Instant.now() - Duration.between(requestTime, now).abs().getSeconds <= timestampWindowSeconds - } catch { - case _: Exception => false - } - } - - def checkAndRecordNonce(nonce: String): Boolean = { - pruneExpiredNonces() - usedNonces.putIfAbsent(nonce, Instant.now()) == null - } - - def hashBody[A](request: play.api.mvc.Request[A]): String = { - val bodyBytes = request.body match { - case json: play.api.libs.json.JsValue => - json.toString.getBytes(StandardCharsets.UTF_8) - case anyContent: play.api.mvc.AnyContent => - anyContent.asJson.map(_.toString.getBytes(StandardCharsets.UTF_8)) - .orElse(anyContent.asRaw.flatMap(_.asBytes().map(_.toArray))) - .orElse(anyContent.asText.map(_.getBytes(StandardCharsets.UTF_8))) - .getOrElse(Array.empty[Byte]) - case text: String => - text.getBytes(StandardCharsets.UTF_8) - case _ => Array.empty[Byte] - } - val digest = MessageDigest.getInstance("SHA-256").digest(bodyBytes) - Base64.getEncoder.encodeToString(digest) - } - - def buildSigningInput( - method: String, - path: String, - timestamp: String, - bodyHash: String, - nonce: Option[String] - ): String = { - val parts = Seq(method.toUpperCase, path, timestamp, bodyHash) ++ nonce.toSeq - parts.mkString("\n") - } - - def verifySignature(did: String, signingInput: String, signatureBase64: String): Future[Boolean] = { - atProtoClient.resolveDid(did).map { - case None => - logger.warn(s"Could not resolve DID document for $did") - false - case Some(doc) => - doc.verificationMethod.flatMap(_.headOption) match { - case None => - logger.warn(s"No verification method found in DID document for $did") - false - case Some(vm) => - vm.publicKeyMultibase match { - case None => - logger.warn(s"No publicKeyMultibase in verification method for $did") - false - case Some(multibaseKey) => - try { - val publicKey = parseMultibaseKey(multibaseKey) - val signatureBytes = Base64.getDecoder.decode(signatureBase64) - val inputBytes = signingInput.getBytes(StandardCharsets.UTF_8) - verifyWithKey(publicKey, inputBytes, signatureBytes) - } catch { - case e: Exception => - logger.error(s"Signature verification failed for $did: ${e.getMessage}", e) - false - } - } - } - } - } - - private[services] def parseMultibaseKey(multibaseKey: String): PublicKey = { - require(multibaseKey.startsWith("z"), "Only base58btc (z prefix) multibase keys are supported") - - val decoded = Base58.decode(multibaseKey.substring(1)) - require(decoded.length >= 2, "Key too short to contain multicodec prefix") - - val (prefix0, prefix1) = (decoded(0) & 0xff, decoded(1) & 0xff) - val rawKey = decoded.drop(2) - - (prefix0, prefix1) match { - case (0xed, 0x01) => buildEd25519PublicKey(rawKey) - case (0x80, 0x24) => buildP256PublicKey(rawKey) - case _ => - throw new IllegalArgumentException( - f"Unsupported multicodec key type: 0x${prefix0}%02x 0x${prefix1}%02x") - } - } - - private[services] def buildEd25519PublicKey(rawKey: Array[Byte]): PublicKey = { - require(rawKey.length == 32, s"Ed25519 public key must be 32 bytes, got ${rawKey.length}") - // ASN.1 DER prefix for Ed25519 public key (OID 1.3.101.112) - val derPrefix: Array[Byte] = Array( - 0x30, 0x2a, 0x30, 0x05, 0x06, 0x03, 0x2b, 0x65, - 0x70, 0x03, 0x21, 0x00 - ).map(_.toByte) - val encoded = derPrefix ++ rawKey - val keyFactory = KeyFactory.getInstance("Ed25519") - keyFactory.generatePublic(new X509EncodedKeySpec(encoded)) - } - - private[services] def buildP256PublicKey(rawKey: Array[Byte]): PublicKey = { - val derPrefix: Array[Byte] = if (rawKey.length == 33) { - // Compressed P-256 point - Array( - 0x30, 0x39, 0x30, 0x13, 0x06, 0x07, 0x2a, 0x86, - 0x48, 0xce, 0x3d, 0x02, 0x01, 0x06, 0x08, 0x2a, - 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07, 0x03, - 0x22, 0x00 - ).map(_.toByte) - } else if (rawKey.length == 65) { - // Uncompressed P-256 point - Array( - 0x30, 0x59, 0x30, 0x13, 0x06, 0x07, 0x2a, 0x86, - 0x48, 0xce, 0x3d, 0x02, 0x01, 0x06, 0x08, 0x2a, - 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07, 0x03, - 0x42, 0x00 - ).map(_.toByte) - } else { - throw new IllegalArgumentException(s"Invalid P-256 key length: ${rawKey.length}") - } - val encoded = derPrefix ++ rawKey - val keyFactory = KeyFactory.getInstance("EC") - keyFactory.generatePublic(new X509EncodedKeySpec(encoded)) - } - - private[services] def verifyWithKey(publicKey: PublicKey, data: Array[Byte], signature: Array[Byte]): Boolean = { - val algorithm = publicKey.getAlgorithm match { - case "Ed25519" | "EdDSA" => "Ed25519" - case "EC" => "SHA256withECDSA" - case other => throw new IllegalArgumentException(s"Unsupported key algorithm: $other") - } - val sig = Signature.getInstance(algorithm) - sig.initVerify(publicKey) - sig.update(data) - sig.verify(signature) - } - - private def pruneExpiredNonces(): Unit = { - val cutoff = Instant.now().minusSeconds(timestampWindowSeconds * 2) - usedNonces.entrySet().removeIf(e => e.getValue.isBefore(cutoff)) - } -} diff --git a/app/services/PgpBiosampleService.scala b/app/services/PgpBiosampleService.scala deleted file mode 100644 index a95ff0ae..00000000 --- a/app/services/PgpBiosampleService.scala +++ /dev/null @@ -1,86 +0,0 @@ -package services - -import com.vividsolutions.jts.geom.Point -import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.{BiologicalSex, Biosample, BiosampleType, SpecimenDonor} -import repositories.* - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for creating and managing PGP biosamples with associated metadata and data. - * - * @constructor Constructs the service with the provided biosample repository and accession generator. - * @param biosampleRepository Repository interface for managing biosample data. - * @param accessionGenerator Generator for creating unique accession numbers for biosamples. - * @param ec Implicit execution context for handling asynchronous operations. - */ -@Singleton -class PgpBiosampleService @Inject()( - biosampleRepository: BiosampleRepository, - accessionGenerator: AccessionNumberGenerator, - biosampleService: BiosampleService - )(implicit ec: ExecutionContext) { - - def createPgpBiosample( - participantId: String, - description: String, - centerName: String, - sex: Option[BiologicalSex] = None, - latitude: Option[Double] = None, - longitude: Option[Double] = None - ): Future[UUID] = { - val sampleGuid = UUID.randomUUID() - - // First check for existing participant - biosampleRepository.findByAliasOrAccession(participantId).flatMap { - case Some((existing, _)) => - Future.failed(DuplicateParticipantException( - s"Participant $participantId already has a biosample with accession ${existing.sampleAccession}" - )) - - case None => - for { - donorId <- { - val shouldCreatePgpDonor = sex.isDefined || latitude.isDefined || longitude.isDefined - if (shouldCreatePgpDonor) { - biosampleService.createOrUpdateSpecimenDonor( - donorIdentifier = participantId, - originBiobank = centerName, - donorType = BiosampleType.PGP, - sex = sex, - latitude = latitude, - longitude = longitude, - pgpParticipantId = Some(participantId) - ) - } else { - Future.successful(None) - } - } - metadata = AccessionMetadata( - pgpParticipantId = Some(participantId), - citizenBiosampleDid = None - ) - accession <- accessionGenerator.generateAccession(BiosampleType.PGP, metadata) - createdBiosample <- biosampleService.createBiosample( - sampleGuid = sampleGuid, - sampleAccession = accession, - description = description, - alias = Some(participantId), - centerName = centerName, - specimenDonorId = donorId, - sourcePlatform = Some(utils.GenomicsConstants.PGP_SOURCE_PLATFORM) - ) - } yield createdBiosample.sampleGuid - }.recoverWith { - case e: Exception if e.getMessage.contains("duplicate key") => - Future.failed(DuplicateParticipantException( - s"Participant $participantId already exists (caught at database level)" - )) - case e: BiosampleServiceException => Future.failed(e) - case e: Exception => Future.failed(new RuntimeException( - s"Failed to create PGP biosample: ${e.getMessage}", e)) - } - } -} \ No newline at end of file diff --git a/app/services/PrivateVariantExtractionService.scala b/app/services/PrivateVariantExtractionService.scala deleted file mode 100644 index 06639f33..00000000 --- a/app/services/PrivateVariantExtractionService.scala +++ /dev/null @@ -1,193 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.HaplogroupType -import models.atmosphere.VariantCall -import models.domain.discovery.* -import models.domain.genomics.{MutationType, NamingStatus, VariantV2} -import play.api.Logging -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, PrivateVariantRepository} - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Extracts private variants from biosample haplogroup results and records them - * for the discovery pipeline. Private variants are mutations beyond the terminal - * haplogroup that may indicate new branches when shared across multiple biosamples. - */ -class PrivateVariantExtractionService @Inject()( - privateVariantRepo: PrivateVariantRepository, - variantRepo: HaplogroupVariantRepository, - coreRepo: HaplogroupCoreRepository -)(implicit ec: ExecutionContext) extends Logging { - - /** - * Extract and store private variants from a Citizen biosample. - */ - def extractFromCitizenBiosample( - citizenBiosampleId: Int, - sampleGuid: UUID, - terminalHaplogroupName: String, - haplogroupType: HaplogroupType, - variantCalls: Seq[VariantCall] - ): Future[Seq[BiosamplePrivateVariant]] = { - extractPrivateVariants( - SampleReference(BiosampleSourceType.Citizen, citizenBiosampleId, sampleGuid), - terminalHaplogroupName, - haplogroupType, - variantCalls - ) - } - - /** - * Extract and store private variants from an External (publication) biosample. - */ - def extractFromExternalBiosample( - biosampleId: Int, - sampleGuid: UUID, - terminalHaplogroupName: String, - haplogroupType: HaplogroupType, - variantCalls: Seq[VariantCall] - ): Future[Seq[BiosamplePrivateVariant]] = { - extractPrivateVariants( - SampleReference(BiosampleSourceType.External, biosampleId, sampleGuid), - terminalHaplogroupName, - haplogroupType, - variantCalls - ) - } - - /** - * Core extraction logic. Resolves variant calls to variant IDs, - * filters to only private (non-tree) variants, and stores them. - */ - private def extractPrivateVariants( - sampleRef: SampleReference, - terminalHaplogroupName: String, - haplogroupType: HaplogroupType, - variantCalls: Seq[VariantCall] - ): Future[Seq[BiosamplePrivateVariant]] = { - if (variantCalls.isEmpty) { - return Future.successful(Seq.empty) - } - - for { - // Resolve terminal haplogroup - terminalHgOpt <- coreRepo.getHaplogroupByName(terminalHaplogroupName, haplogroupType) - terminalHg = terminalHgOpt.getOrElse { - logger.warn(s"Terminal haplogroup '$terminalHaplogroupName' not found for sample ${sampleRef.sampleGuid}") - throw new IllegalArgumentException(s"Terminal haplogroup '$terminalHaplogroupName' not found") - } - - // Get the set of variant IDs already on the tree for this haplogroup's lineage - ancestors <- coreRepo.getAncestors(terminalHg.id.get) - allLineageIds = (ancestors.flatMap(_.id) :+ terminalHg.id.get).toSet - treeVariants <- Future.sequence(allLineageIds.toSeq.map(id => variantRepo.getHaplogroupVariants(id))) - treeVariantPositions = treeVariants.flatten.flatMap(extractPosition).toSet - - // Resolve variant calls to variant_v2 records - resolvedVariants <- resolveOrCreateVariants(variantCalls, terminalHg.id.get) - - // Filter to only private variants (not already on the tree) - privateEntries = resolvedVariants.filter { case (call, _) => - val pos = positionKey(call) - !treeVariantPositions.contains(pos) - } - - // Create private variant records - pvRecords = privateEntries.map { case (_, variantId) => - BiosamplePrivateVariant( - sampleType = sampleRef.sampleType, - sampleId = sampleRef.sampleId, - sampleGuid = sampleRef.sampleGuid, - variantId = variantId, - haplogroupType = haplogroupType, - terminalHaplogroupId = terminalHg.id.get - ) - }.toSeq - - saved <- if (pvRecords.nonEmpty) { - logger.info(s"Extracted ${pvRecords.size} private variants for sample ${sampleRef.sampleGuid} " + - s"(${sampleRef.sampleType}) beyond terminal ${terminalHaplogroupName}") - privateVariantRepo.createAll(pvRecords) - } else { - Future.successful(Seq.empty) - } - } yield saved - } - - /** - * Resolve variant calls to existing variant_v2 records or create new ones. - * Returns a map of VariantCall -> variant_id. - */ - private[services] def resolveOrCreateVariants( - variantCalls: Seq[VariantCall], - terminalHaplogroupId: Int - ): Future[Map[VariantCall, Int]] = { - Future.sequence(variantCalls.map { call => - resolveVariant(call, terminalHaplogroupId).map(vid => call -> vid) - }).map(_.toMap) - } - - /** - * Resolve a single variant call to a variant_v2 ID. - * Looks up by name first, then by position, creating new if needed. - */ - private def resolveVariant(call: VariantCall, terminalHaplogroupId: Int): Future[Int] = { - call.variantName match { - case Some(name) => - // Named variant: search by name - variantRepo.findVariants(name).flatMap { - case variants if variants.nonEmpty => - // Use the first match — parallel mutation detection is deferred to DISC-2 - Future.successful(variants.head.variantId.get) - case _ => - // Not found by name, search by position - resolveByPosition(call, terminalHaplogroupId) - } - case None => - // Unnamed variant: search by position - resolveByPosition(call, terminalHaplogroupId) - } - } - - /** - * Resolve a variant by genomic position coordinates. - */ - private def resolveByPosition(call: VariantCall, terminalHaplogroupId: Int): Future[Int] = { - val posQuery = s"${call.contigAccession}:${call.position}" - variantRepo.findVariants(posQuery).map { - case variants if variants.nonEmpty => variants.head.variantId.get - case _ => - // Variant not found — for now, log and skip. - // Full variant creation requires DISC-2 (proposed branch engine) context. - logger.debug(s"Variant at ${call.contigAccession}:${call.position} not found in variant_v2, skipping") - -1 - } - } - - private def extractPosition(v: VariantV2): Option[String] = { - val coords = v.coordinates - val grch38 = (coords \ "GRCh38").asOpt[play.api.libs.json.JsObject] - .orElse((coords \ "GRCh38.p14").asOpt[play.api.libs.json.JsObject]) - grch38.flatMap { c => - for { - contig <- (c \ "contig").asOpt[String] - pos <- (c \ "position").asOpt[Int] - } yield s"$contig:$pos" - } - } - - private def positionKey(call: VariantCall): String = - s"${call.contigAccession}:${call.position}" -} - -/** - * Value object representing a reference to any biosample type. - */ -case class SampleReference( - sampleType: BiosampleSourceType, - sampleId: Int, - sampleGuid: UUID -) diff --git a/app/services/ProjectService.scala b/app/services/ProjectService.scala deleted file mode 100644 index 57312b37..00000000 --- a/app/services/ProjectService.scala +++ /dev/null @@ -1,75 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.api.{ProjectRequest, ProjectResponse} -import models.domain.Project -import repositories.ProjectRepository - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class ProjectService @Inject()( - projectRepository: ProjectRepository - )(implicit ec: ExecutionContext) { - - def createProject(request: ProjectRequest): Future[ProjectResponse] = { - val project = Project( - id = None, - projectGuid = UUID.randomUUID(), - name = request.name, - description = request.description, - ownerDid = "did:example:owner", // Placeholder until auth provides owner DID - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now(), - deleted = false, - atUri = request.atUri, - atCid = Some(UUID.randomUUID().toString) - ) - - projectRepository.create(project).map(toResponse) - } - - def updateProject(atUri: String, request: ProjectRequest): Future[ProjectResponse] = { - projectRepository.findByAtUri(atUri).flatMap { - case Some(existing) => - if (request.atCid.isDefined && request.atCid != existing.atCid) { - Future.failed(new IllegalStateException(s"Optimistic locking failure: atCid mismatch.")) - } else { - val updatedProject = existing.copy( - name = request.name, - description = request.description, - atUri = request.atUri, - updatedAt = LocalDateTime.now(), - atCid = Some(UUID.randomUUID().toString) - ) - projectRepository.update(updatedProject, request.atCid).flatMap { success => - if (success) { - Future.successful(toResponse(updatedProject)) - } else { - Future.failed(new RuntimeException("Update failed")) - } - } - } - case None => - Future.failed(new NoSuchElementException(s"Project not found for atUri: $atUri")) - } - } - - def deleteProject(atUri: String): Future[Boolean] = { - projectRepository.softDeleteByAtUri(atUri) - } - - private def toResponse(p: Project): ProjectResponse = { - ProjectResponse( - projectGuid = p.projectGuid, - name = p.name, - description = p.description, - ownerDid = p.ownerDid, - createdAt = p.createdAt, - updatedAt = p.updatedAt, - atCid = p.atCid - ) - } -} diff --git a/app/services/ProjectStrComparisonService.scala b/app/services/ProjectStrComparisonService.scala deleted file mode 100644 index 66143bcb..00000000 --- a/app/services/ProjectStrComparisonService.scala +++ /dev/null @@ -1,380 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.dal.domain.genomics.StrMutationRate -import models.domain.{EffectiveVisibility, GroupProject, GroupProjectMember} -import play.api.Logging -import play.api.libs.json.{Json, OFormat} -import repositories.* - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -case class StrDistanceResult( - memberId1: Int, - memberId2: Int, - geneticDistance: Int, - markerCount: Int, - normalizedDistance: Double - ) - -object StrDistanceResult { - implicit val format: OFormat[StrDistanceResult] = Json.format[StrDistanceResult] -} - -case class ModalComparisonResult( - memberId: Int, - distanceFromModal: Int, - markerCount: Int, - normalizedDistance: Double - ) - -object ModalComparisonResult { - implicit val format: OFormat[ModalComparisonResult] = Json.format[ModalComparisonResult] -} - -case class ProjectModalHaplotype( - projectId: Int, - markerModals: Seq[MarkerModal], - sampleCount: Int, - computedAt: LocalDateTime = LocalDateTime.now() - ) - -object ProjectModalHaplotype { - implicit val format: OFormat[ProjectModalHaplotype] = Json.format[ProjectModalHaplotype] -} - -case class MarkerModal( - markerName: String, - modalValue: Int, - variance: Double, - sampleCount: Int - ) - -object MarkerModal { - implicit val format: OFormat[MarkerModal] = Json.format[MarkerModal] -} - -@Singleton -class ProjectStrComparisonService @Inject()( - projectRepo: GroupProjectRepository, - memberRepo: GroupProjectMemberRepository, - biosampleRepo: CitizenBiosampleRepository, - biosampleMainRepo: BiosampleRepository, - variantCallRepo: BiosampleVariantCallRepository, - strMutationRateRepo: StrMutationRateRepository - )(implicit ec: ExecutionContext) extends Logging { - - def getProjectModalHaplotype( - projectId: Int, - viewerDid: String - ): Future[Either[String, ProjectModalHaplotype]] = { - projectRepo.findById(projectId).flatMap { - case None => Future.successful(Left("Project not found")) - case Some(project) => - resolveViewerPermission(project, projectId, viewerDid, "MODAL_COMPARISON_ONLY").flatMap { - case Left(err) => Future.successful(Left(err)) - case Right(_) => - computeProjectModal(project).map(Right(_)) - } - } - } - - def getMemberDistanceFromModal( - projectId: Int, - memberId: Int, - viewerDid: String - ): Future[Either[String, ModalComparisonResult]] = { - projectRepo.findById(projectId).flatMap { - case None => Future.successful(Left("Project not found")) - case Some(project) => - resolveViewerPermission(project, projectId, viewerDid, "DISTANCE_CALCULATION_ONLY").flatMap { - case Left(err) => Future.successful(Left(err)) - case Right(_) => - memberRepo.findById(memberId).flatMap { - case None => Future.successful(Left("Member not found")) - case Some(member) if member.groupProjectId != projectId => - Future.successful(Left("Member not in this project")) - case Some(member) if member.status != "ACTIVE" => - Future.successful(Left("Member is not active")) - case Some(member) => - val effective = EffectiveVisibility.compute(project, member.visibility) - if (effective.strVisibility == "NONE") - Future.successful(Left("Member STR data is not shared")) - else - computeDistanceFromModal(project, member) - } - } - } - } - - def getMemberPairDistance( - projectId: Int, - memberId1: Int, - memberId2: Int, - viewerDid: String - ): Future[Either[String, StrDistanceResult]] = { - projectRepo.findById(projectId).flatMap { - case None => Future.successful(Left("Project not found")) - case Some(project) => - resolveViewerPermission(project, projectId, viewerDid, "DISTANCE_CALCULATION_ONLY").flatMap { - case Left(err) => Future.successful(Left(err)) - case Right(_) => - for { - m1Opt <- memberRepo.findById(memberId1) - m2Opt <- memberRepo.findById(memberId2) - result <- (m1Opt, m2Opt) match { - case (None, _) => Future.successful(Left("Member 1 not found")) - case (_, None) => Future.successful(Left("Member 2 not found")) - case (Some(m1), Some(m2)) => - validateMembersForComparison(project, m1, m2, projectId) match { - case Some(err) => Future.successful(Left(err)) - case None => computePairDistance(m1, m2) - } - } - } yield result - } - } - } - - def getDistanceMatrix( - projectId: Int, - viewerDid: String - ): Future[Either[String, Seq[StrDistanceResult]]] = { - projectRepo.findById(projectId).flatMap { - case None => Future.successful(Left("Project not found")) - case Some(project) => - resolveViewerPermission(project, projectId, viewerDid, "DISTANCE_CALCULATION_ONLY").flatMap { - case Left(err) => Future.successful(Left(err)) - case Right(_) => - memberRepo.findByProjectAndStatus(projectId, "ACTIVE").flatMap { members => - val eligible = members.filter { m => - val effective = EffectiveVisibility.compute(project, m.visibility) - effective.strVisibility != "NONE" - } - computeDistanceMatrix(eligible) - } - } - } - } - - private[services] def computeProjectModal(project: GroupProject): Future[ProjectModalHaplotype] = { - for { - members <- memberRepo.findByProjectAndStatus(project.id.get, "ACTIVE") - eligible = members.filter { m => - val effective = EffectiveVisibility.compute(project, m.visibility) - effective.strVisibility != "NONE" - } - strDataByMember <- collectStrData(eligible) - } yield { - val allObservations = strDataByMember.values.flatten.toSeq - val byMarker = allObservations.groupBy(_._1) - - val markerModals = byMarker.map { case (markerName, observations) => - val values = observations.map(_._2) - val mode = values.groupBy(identity).maxByOption(_._2.size).map(_._1).getOrElse(0) - val mean = if (values.nonEmpty) values.sum.toDouble / values.size else 0.0 - val variance = if (values.size > 1) { - values.map(v => math.pow(v - mean, 2)).sum / (values.size - 1) - } else 0.0 - - MarkerModal(markerName, mode, variance, values.size) - }.toSeq.sortBy(_.markerName) - - ProjectModalHaplotype( - projectId = project.id.get, - markerModals = markerModals, - sampleCount = strDataByMember.size - ) - } - } - - private def computeDistanceFromModal( - project: GroupProject, - member: GroupProjectMember - ): Future[Either[String, ModalComparisonResult]] = { - for { - modal <- computeProjectModal(project) - memberStr <- resolveMemberStrData(member) - } yield { - memberStr match { - case None => Left("No STR data available for member") - case Some(memberValues) => - val modalMap = modal.markerModals.map(m => m.markerName -> m.modalValue).toMap - val commonMarkers = modalMap.keySet.intersect(memberValues.keySet) - if (commonMarkers.isEmpty) - Left("No common STR markers between member and project modal") - else { - val distance = commonMarkers.toSeq.map { marker => - math.abs(modalMap(marker) - memberValues(marker)) - }.sum - val normalized = if (commonMarkers.nonEmpty) distance.toDouble / commonMarkers.size else 0.0 - Right(ModalComparisonResult( - memberId = member.id.getOrElse(0), - distanceFromModal = distance, - markerCount = commonMarkers.size, - normalizedDistance = normalized - )) - } - } - } - } - - private def computePairDistance( - m1: GroupProjectMember, - m2: GroupProjectMember - ): Future[Either[String, StrDistanceResult]] = { - for { - str1 <- resolveMemberStrData(m1) - str2 <- resolveMemberStrData(m2) - } yield { - (str1, str2) match { - case (None, _) => Left("No STR data available for member 1") - case (_, None) => Left("No STR data available for member 2") - case (Some(values1), Some(values2)) => - val commonMarkers = values1.keySet.intersect(values2.keySet) - if (commonMarkers.isEmpty) - Left("No common STR markers between members") - else { - val distance = commonMarkers.toSeq.map { marker => - math.abs(values1(marker) - values2(marker)) - }.sum - val normalized = if (commonMarkers.nonEmpty) distance.toDouble / commonMarkers.size else 0.0 - Right(StrDistanceResult( - memberId1 = m1.id.getOrElse(0), - memberId2 = m2.id.getOrElse(0), - geneticDistance = distance, - markerCount = commonMarkers.size, - normalizedDistance = normalized - )) - } - } - } - } - - private def computeDistanceMatrix(members: Seq[GroupProjectMember]): Future[Either[String, Seq[StrDistanceResult]]] = { - collectStrData(members).map { strDataByMember => - val memberIds = strDataByMember.keys.toSeq.sorted - val results = for { - i <- memberIds.indices - j <- (i + 1) until memberIds.size - id1 = memberIds(i) - id2 = memberIds(j) - values1 = strDataByMember(id1).toMap - values2 = strDataByMember(id2).toMap - commonMarkers = values1.keySet.intersect(values2.keySet) - if commonMarkers.nonEmpty - } yield { - val distance = commonMarkers.toSeq.map(m => math.abs(values1(m) - values2(m))).sum - val normalized = distance.toDouble / commonMarkers.size - StrDistanceResult(id1, id2, distance, commonMarkers.size, normalized) - } - Right(results) - } - } - - private[services] def resolveMemberStrData(member: GroupProjectMember): Future[Option[Map[String, Int]]] = { - member.biosampleAtUri match { - case None => Future.successful(None) - case Some(atUri) => - biosampleRepo.findByAtUri(atUri).flatMap { - case None => Future.successful(None) - case Some(citizenBiosample) => - biosampleMainRepo.findByGuid(citizenBiosample.sampleGuid).flatMap { - case None => Future.successful(None) - case Some((biosample, _)) => - biosample.id match { - case None => Future.successful(None) - case Some(biosampleId) => - for { - calls <- variantCallRepo.findByBiosample(biosampleId) - rates <- strMutationRateRepo.findAll() - } yield { - val strMarkerNames = rates.map(_.markerName).toSet - val strCalls = calls.filter(c => - c.observedState.forall(_.isDigit) && c.observedState.nonEmpty - ) - if (strCalls.isEmpty) None - else { - val markerMap = strCalls.flatMap { call => - call.observedState.toIntOption.map(v => call.variantId.toString -> v) - }.toMap - if (markerMap.nonEmpty) Some(markerMap) else None - } - } - } - } - } - } - } - - private def collectStrData(members: Seq[GroupProjectMember]): Future[Map[Int, Seq[(String, Int)]]] = { - val futures = members.flatMap { m => - m.id.map { memberId => - resolveMemberStrData(m).map { - case None => None - case Some(data) => Some(memberId -> data.toSeq) - } - } - } - Future.sequence(futures).map(_.flatten.toMap) - } - - private def validateMembersForComparison( - project: GroupProject, - m1: GroupProjectMember, - m2: GroupProjectMember, - projectId: Int - ): Option[String] = { - if (m1.groupProjectId != projectId) Some("Member 1 not in this project") - else if (m2.groupProjectId != projectId) Some("Member 2 not in this project") - else if (m1.status != "ACTIVE") Some("Member 1 is not active") - else if (m2.status != "ACTIVE") Some("Member 2 is not active") - else { - val eff1 = EffectiveVisibility.compute(project, m1.visibility) - val eff2 = EffectiveVisibility.compute(project, m2.visibility) - if (eff1.strVisibility == "NONE") Some("Member 1 STR data is not shared") - else if (eff2.strVisibility == "NONE") Some("Member 2 STR data is not shared") - else None - } - } - - private def resolveViewerPermission( - project: GroupProject, - projectId: Int, - viewerDid: String, - requiredLevel: String - ): Future[Either[String, Unit]] = { - val projectStrLevel = project.strPolicy match { - case "HIDDEN" => "NONE" - case "DISTANCE_ONLY" => "DISTANCE_CALCULATION_ONLY" - case "MODAL_COMPARISON" => "MODAL_COMPARISON_ONLY" - case "MEMBERS_ONLY_RAW" => "FULL_TO_MEMBERS" - case "PUBLIC_RAW" => "FULL_PUBLIC" - case _ => "NONE" - } - - if (projectStrLevel == "NONE") - return Future.successful(Left("Project STR policy does not allow STR operations")) - - val strRank = Map( - "NONE" -> 0, "DISTANCE_CALCULATION_ONLY" -> 1, "MODAL_COMPARISON_ONLY" -> 2, - "FULL_TO_MEMBERS" -> 3, "FULL_PUBLIC" -> 4 - ) - val projectRank = strRank.getOrElse(projectStrLevel, 0) - val requiredRank = strRank.getOrElse(requiredLevel, 0) - - if (projectRank < requiredRank) - return Future.successful(Left("Project STR policy does not allow this operation")) - - if (projectStrLevel == "FULL_PUBLIC") - Future.successful(Right(())) - else { - memberRepo.findByProjectAndCitizen(projectId, viewerDid).map { - case Some(m) if m.status == "ACTIVE" => Right(()) - case _ => Left("Only active project members can access STR data") - } - } - } -} diff --git a/app/services/ProjectTreeAggregationService.scala b/app/services/ProjectTreeAggregationService.scala deleted file mode 100644 index 12e80fb1..00000000 --- a/app/services/ProjectTreeAggregationService.scala +++ /dev/null @@ -1,243 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.domain.{EffectiveVisibility, GroupProject, GroupProjectMember} -import models.domain.haplogroups.Haplogroup -import play.api.Logging -import play.api.libs.json.{Json, OFormat} -import repositories.{BiosampleHaplogroupRepository, CitizenBiosampleRepository, GroupProjectMemberRepository, GroupProjectRepository, HaplogroupCoreRepository} - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -case class AggregatedTreeNode( - haplogroupId: Int, - haplogroupName: String, - memberCount: Int, - cumulativeCount: Int, - children: Seq[AggregatedTreeNode], - formedYbp: Option[Int] = None, - tmrcaYbp: Option[Int] = None - ) - -object AggregatedTreeNode { - implicit val format: OFormat[AggregatedTreeNode] = Json.format[AggregatedTreeNode] -} - -case class ProjectTreeSummary( - projectId: Int, - projectName: String, - lineageType: String, - rootNodes: Seq[AggregatedTreeNode], - totalMembers: Int, - membersWithHaplogroup: Int, - generatedAt: LocalDateTime = LocalDateTime.now() - ) - -object ProjectTreeSummary { - implicit val format: OFormat[ProjectTreeSummary] = Json.format[ProjectTreeSummary] -} - -@Singleton -class ProjectTreeAggregationService @Inject()( - projectRepo: GroupProjectRepository, - memberRepo: GroupProjectMemberRepository, - biosampleRepo: CitizenBiosampleRepository, - biosampleHaplogroupRepo: BiosampleHaplogroupRepository, - haplogroupRepo: HaplogroupCoreRepository - )(implicit ec: ExecutionContext) extends Logging { - - def getAggregatedTree( - projectId: Int, - lineageType: String, - viewerDid: String - ): Future[Either[String, ProjectTreeSummary]] = { - val haplogroupType = lineageType match { - case "Y_DNA" => Some(HaplogroupType.Y) - case "MT_DNA" => Some(HaplogroupType.MT) - case _ => None - } - - haplogroupType match { - case None => Future.successful(Left(s"Invalid lineage type: $lineageType")) - case Some(hgType) => - projectRepo.findById(projectId).flatMap { - case None => Future.successful(Left("Project not found")) - case Some(project) => - canViewTree(project, projectId, viewerDid).flatMap { - case false => Future.successful(Left("Insufficient permissions to view project tree")) - case true => - buildAggregatedTree(project, hgType).map(Right(_)) - } - } - } - } - - def getBranchMemberCount( - projectId: Int, - haplogroupId: Int, - lineageType: String - ): Future[Either[String, Int]] = { - val haplogroupType = lineageType match { - case "Y_DNA" => Some(HaplogroupType.Y) - case "MT_DNA" => Some(HaplogroupType.MT) - case _ => None - } - - haplogroupType match { - case None => Future.successful(Left(s"Invalid lineage type: $lineageType")) - case Some(hgType) => - projectRepo.findById(projectId).flatMap { - case None => Future.successful(Left("Project not found")) - case Some(project) => - resolveHaplogroupAssignments(project, hgType).flatMap { assignments => - haplogroupRepo.getDescendants(haplogroupId).map { descendants => - val descendantIds = descendants.flatMap(_.id).toSet + haplogroupId - val count = assignments.count(hgId => descendantIds.contains(hgId)) - Right(count) - } - } - } - } - } - - private def buildAggregatedTree(project: GroupProject, hgType: HaplogroupType): Future[ProjectTreeSummary] = { - for { - members <- memberRepo.findByProjectAndStatus(project.id.get, "ACTIVE") - treeEligibleMembers = members.filter { m => - val effective = EffectiveVisibility.compute(project, m.visibility) - effective.showInTree - } - assignments <- resolveHaplogroupAssignments(project, hgType, treeEligibleMembers) - haplogroupCounts = assignments.groupBy(identity).map { case (hgId, occurrences) => hgId -> occurrences.size } - rootNodes <- buildTreeFromCounts(haplogroupCounts, hgType) - } yield ProjectTreeSummary( - projectId = project.id.get, - projectName = project.projectName, - lineageType = if (hgType == HaplogroupType.Y) "Y_DNA" else "MT_DNA", - rootNodes = rootNodes, - totalMembers = treeEligibleMembers.size, - membersWithHaplogroup = assignments.size - ) - } - - private[services] def resolveHaplogroupAssignments( - project: GroupProject, - hgType: HaplogroupType, - members: Seq[GroupProjectMember] - ): Future[Seq[Int]] = { - val biosampleFutures = members.flatMap(_.biosampleAtUri).map { atUri => - biosampleRepo.findByAtUri(atUri).flatMap { - case None => Future.successful(None) - case Some(biosample) => - biosampleHaplogroupRepo.findBySampleGuid(biosample.sampleGuid).map { - case None => None - case Some(bh) => - hgType match { - case HaplogroupType.Y => bh.yHaplogroupId - case HaplogroupType.MT => bh.mtHaplogroupId - } - } - } - } - Future.sequence(biosampleFutures).map(_.flatten) - } - - private def resolveHaplogroupAssignments( - project: GroupProject, - hgType: HaplogroupType - ): Future[Seq[Int]] = { - for { - members <- memberRepo.findByProjectAndStatus(project.id.get, "ACTIVE") - treeEligible = members.filter { m => - EffectiveVisibility.compute(project, m.visibility).showInTree - } - assignments <- resolveHaplogroupAssignments(project, hgType, treeEligible) - } yield assignments - } - - private[services] def buildTreeFromCounts( - haplogroupCounts: Map[Int, Int], - hgType: HaplogroupType - ): Future[Seq[AggregatedTreeNode]] = { - if (haplogroupCounts.isEmpty) return Future.successful(Seq.empty) - - for { - // Get ancestor paths for all haplogroups - pathsWithHaplogroups <- Future.sequence( - haplogroupCounts.keys.toSeq.map { hgId => - for { - haplogroup <- haplogroupRepo.findById(hgId) - ancestors <- haplogroupRepo.getAncestors(hgId) - } yield (hgId, haplogroup, ancestors) - } - ) - - // Build a set of all haplogroup IDs on any path (ancestors + direct) - allHaplogroupIds = pathsWithHaplogroups.flatMap { case (hgId, hg, ancestors) => - ancestors.flatMap(_.id) :+ hgId - }.distinct - - // Get all relationships for the haplogroup type - relationships <- haplogroupRepo.getAllRelationships(hgType) - - // Filter relationships to only include relevant nodes - relevantRelationships = relationships.filter { case (childId, parentId) => - allHaplogroupIds.contains(childId) && allHaplogroupIds.contains(parentId) - } - - // Build parent->children map - childrenMap = relevantRelationships.groupBy(_._2).map { case (parentId, rels) => - parentId -> rels.map(_._1) - } - - // Build haplogroup lookup from paths data - haplogroupLookup = pathsWithHaplogroups.flatMap { case (_, hg, ancestors) => - hg.map(h => h.id.get -> h).toSeq ++ ancestors.flatMap(a => a.id.map(_ -> a)) - }.toMap - - // Find root nodes (those in our set that have no parent in our set) - childIds = relevantRelationships.map(_._1).toSet - rootIds = allHaplogroupIds.filterNot(childIds.contains) - } yield { - rootIds.flatMap(rootId => buildNode(rootId, haplogroupCounts, childrenMap, haplogroupLookup)) - } - } - - private def buildNode( - haplogroupId: Int, - directCounts: Map[Int, Int], - childrenMap: Map[Int, Seq[Int]], - haplogroupLookup: Map[Int, Haplogroup] - ): Option[AggregatedTreeNode] = { - haplogroupLookup.get(haplogroupId).map { haplogroup => - val childNodes = childrenMap.getOrElse(haplogroupId, Seq.empty) - .flatMap(childId => buildNode(childId, directCounts, childrenMap, haplogroupLookup)) - .sortBy(_.haplogroupName) - - val directCount = directCounts.getOrElse(haplogroupId, 0) - val cumulativeCount = directCount + childNodes.map(_.cumulativeCount).sum - - AggregatedTreeNode( - haplogroupId = haplogroupId, - haplogroupName = haplogroup.name, - memberCount = directCount, - cumulativeCount = cumulativeCount, - children = childNodes, - formedYbp = haplogroup.formedYbp, - tmrcaYbp = haplogroup.tmrcaYbp - ) - } - } - - private def canViewTree(project: GroupProject, projectId: Int, viewerDid: String): Future[Boolean] = { - if (project.publicTreeView) Future.successful(true) - else { - memberRepo.findByProjectAndCitizen(projectId, viewerDid).map { - case Some(m) if m.status == "ACTIVE" => true - case _ => false - } - } - } -} diff --git a/app/services/ProposalEngine.scala b/app/services/ProposalEngine.scala deleted file mode 100644 index adb75dfd..00000000 --- a/app/services/ProposalEngine.scala +++ /dev/null @@ -1,262 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.HaplogroupType -import models.domain.discovery.* -import play.api.Logging -import repositories.{PrivateVariantRepository, ProposedBranchRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Creates and updates proposed branches based on shared private variants - * across biosamples. Uses Jaccard similarity to match incoming variant sets - * against existing proposals under the same parent haplogroup. - */ -class ProposalEngine @Inject()( - proposedBranchRepo: ProposedBranchRepository, - privateVariantRepo: PrivateVariantRepository -)(implicit ec: ExecutionContext) extends Logging { - - private val DefaultJaccardMatchThreshold = 0.8 - private val DefaultJaccardSplitLower = 0.5 - - /** - * Process a biosample's private variants and create/update proposals. - * Groups variants by parent haplogroup, then matches against existing proposals. - */ - def processDiscovery( - sampleRef: SampleReference, - privateVariants: Seq[BiosamplePrivateVariant] - ): Future[Seq[ProposedBranch]] = { - if (privateVariants.isEmpty) return Future.successful(Seq.empty) - - // Group by (terminalHaplogroupId, haplogroupType) since a sample could have - // private variants under different terminals (unlikely but possible) - val grouped = privateVariants.groupBy(pv => (pv.terminalHaplogroupId, pv.haplogroupType)) - - Future.sequence(grouped.toSeq.map { case ((parentHgId, hgType), pvs) => - val variantIds = pvs.map(_.variantId).toSet - findOrCreateProposal(parentHgId, hgType, variantIds, sampleRef) - }) - } - - /** - * Find an existing proposal matching the variant set (Jaccard >= 0.8), - * or create a new one. Adds evidence for the biosample either way. - */ - def findOrCreateProposal( - parentHaplogroupId: Int, - haplogroupType: HaplogroupType, - variantIds: Set[Int], - sampleRef: SampleReference - ): Future[ProposedBranch] = { - for { - // Get configurable thresholds - matchThreshold <- getConfigDouble(haplogroupType, "jaccard_match_threshold", DefaultJaccardMatchThreshold) - splitLower <- getConfigDouble(haplogroupType, "jaccard_split_threshold", DefaultJaccardSplitLower) - - // Find all active proposals under this parent - existingProposals <- proposedBranchRepo.findByParentAndType(parentHaplogroupId, haplogroupType) - - // Calculate Jaccard similarity for each - scored <- Future.sequence(existingProposals.map { proposal => - proposedBranchRepo.getVariantIds(proposal.id.get).map { proposalVariantIds => - val jaccard = jaccardSimilarity(variantIds, proposalVariantIds) - (proposal, proposalVariantIds, jaccard) - } - }) - - // Find best match - bestMatch = scored.filter(_._3 >= matchThreshold).sortBy(-_._3).headOption - splitCandidates = scored.filter(s => s._3 >= splitLower && s._3 < matchThreshold) - - result <- bestMatch match { - case Some((proposal, proposalVariantIds, jaccard)) => - // Match found — add evidence and update - addEvidenceToProposal(proposal, variantIds, proposalVariantIds, sampleRef) - - case None => - // No match — create new proposal - createNewProposal(parentHaplogroupId, haplogroupType, variantIds, sampleRef) - } - - // Flag split candidates (update notes but don't change status) - _ <- Future.sequence(splitCandidates.map { case (proposal, _, jaccard) => - val splitNote = s"Partial match (Jaccard=${"%.2f".format(jaccard)}) with sample ${sampleRef.sampleGuid} — may need split review" - val existingNotes = proposal.notes.map(_ + "; ").getOrElse("") - proposedBranchRepo.update(proposal.copy(notes = Some(existingNotes + splitNote))) - }) - } yield result - } - - /** - * Add a biosample as evidence to an existing proposal. - * Updates variant evidence counts and recalculates consensus. - */ - private def addEvidenceToProposal( - proposal: ProposedBranch, - sampleVariantIds: Set[Int], - proposalVariantIds: Set[Int], - sampleRef: SampleReference - ): Future[ProposedBranch] = { - val matchCount = (sampleVariantIds intersect proposalVariantIds).size - val mismatchCount = (sampleVariantIds diff proposalVariantIds).size - - for { - // Add evidence record - _ <- proposedBranchRepo.addEvidence(ProposedBranchEvidence( - proposedBranchId = proposal.id.get, - sampleType = sampleRef.sampleType, - sampleId = sampleRef.sampleId, - sampleGuid = sampleRef.sampleGuid, - variantMatchCount = matchCount, - variantMismatchCount = mismatchCount - )) - - // Add any new variants from this sample to the proposal - newVariants = sampleVariantIds diff proposalVariantIds - _ <- Future.sequence(newVariants.toSeq.map { vid => - proposedBranchRepo.addVariant(ProposedBranchVariant( - proposedBranchId = proposal.id.get, - variantId = vid, - isDefining = false, - evidenceCount = 1 - )) - }) - - // Update evidence counts for existing variants - sharedVariants = sampleVariantIds intersect proposalVariantIds - _ <- Future.sequence(sharedVariants.toSeq.map { vid => - proposedBranchRepo.getVariants(proposal.id.get).flatMap { variants => - variants.find(_.variantId == vid) match { - case Some(pbv) => - proposedBranchRepo.updateVariantEvidence(proposal.id.get, vid, pbv.evidenceCount + 1) - case None => - Future.successful(false) - } - } - }) - - // Recalculate consensus - evidenceCount <- proposedBranchRepo.countEvidence(proposal.id.get) - confidence = calculateConfidenceScore(evidenceCount, matchCount, mismatchCount) - - // Check thresholds - updated <- updateConsensusAndThresholds(proposal, evidenceCount, confidence) - } yield updated - } - - /** - * Create a new proposal from a set of private variants. - */ - private def createNewProposal( - parentHaplogroupId: Int, - haplogroupType: HaplogroupType, - variantIds: Set[Int], - sampleRef: SampleReference - ): Future[ProposedBranch] = { - for { - // Create proposal - proposal <- proposedBranchRepo.create(ProposedBranch( - parentHaplogroupId = parentHaplogroupId, - haplogroupType = haplogroupType, - consensusCount = 1, - confidenceScore = 0.0 - )) - - // Add defining variants - _ <- Future.sequence(variantIds.toSeq.map { vid => - proposedBranchRepo.addVariant(ProposedBranchVariant( - proposedBranchId = proposal.id.get, - variantId = vid, - isDefining = true, - evidenceCount = 1 - )) - }) - - // Add evidence - _ <- proposedBranchRepo.addEvidence(ProposedBranchEvidence( - proposedBranchId = proposal.id.get, - sampleType = sampleRef.sampleType, - sampleId = sampleRef.sampleId, - sampleGuid = sampleRef.sampleGuid, - variantMatchCount = variantIds.size, - variantMismatchCount = 0 - )) - - _ = logger.info(s"Created new proposal ${proposal.id.get} under haplogroup $parentHaplogroupId " + - s"with ${variantIds.size} defining variants from sample ${sampleRef.sampleGuid}") - } yield proposal - } - - /** - * Update consensus count and check against thresholds. - * Transitions: PENDING -> READY_FOR_REVIEW when consensus_threshold met. - */ - private def updateConsensusAndThresholds( - proposal: ProposedBranch, - newConsensusCount: Int, - newConfidence: Double - ): Future[ProposedBranch] = { - for { - consensusThreshold <- getConfigInt(proposal.haplogroupType, "consensus_threshold", 3) - autoPromoteThreshold <- getConfigInt(proposal.haplogroupType, "auto_promote_threshold", 10) - confidenceThreshold <- getConfigDouble(proposal.haplogroupType, "confidence_threshold", 0.95) - - newStatus = proposal.status match { - case ProposedBranchStatus.Pending if newConsensusCount >= consensusThreshold => - logger.info(s"Proposal ${proposal.id.get} reached consensus threshold ($newConsensusCount >= $consensusThreshold)") - ProposedBranchStatus.ReadyForReview - case other => other - } - - _ <- proposedBranchRepo.updateConsensus(proposal.id.get, newConsensusCount, newConfidence) - _ <- if (newStatus != proposal.status) { - proposedBranchRepo.updateStatus(proposal.id.get, newStatus) - } else { - Future.successful(true) - } - } yield proposal.copy( - consensusCount = newConsensusCount, - confidenceScore = newConfidence, - status = newStatus - ) - } - - /** - * Calculate Jaccard similarity: |A ∩ B| / |A ∪ B| - */ - private[services] def jaccardSimilarity(a: Set[Int], b: Set[Int]): Double = { - if (a.isEmpty && b.isEmpty) return 1.0 - val intersection = (a intersect b).size.toDouble - val union = (a union b).size.toDouble - if (union == 0) 0.0 else intersection / union - } - - /** - * Calculate a confidence score based on evidence strength. - * Simplified version using sample count and variant consistency. - */ - private[services] def calculateConfidenceScore( - evidenceCount: Int, - matchCount: Int, - mismatchCount: Int - ): Double = { - val totalVariants = matchCount + mismatchCount - val matchRatio = if (totalVariants > 0) matchCount.toDouble / totalVariants else 0.0 - - // Weight: 60% sample count (capped at 10), 40% variant consistency - val sampleScore = Math.min(evidenceCount / 10.0, 1.0) - val score = 0.6 * sampleScore + 0.4 * matchRatio - Math.min(score, 1.0) - } - - private def getConfigDouble(hgType: HaplogroupType, key: String, default: Double): Future[Double] = - proposedBranchRepo.getConfig(hgType, key).map(_.flatMap(_.toDoubleOption).getOrElse(default)) - - private def getConfigInt(hgType: HaplogroupType, key: String, default: Int): Future[Int] = - proposedBranchRepo.getConfig(hgType, key).map(_.flatMap(_.toIntOption).getOrElse(default)) -} diff --git a/app/services/PublicationDiscoveryService.scala b/app/services/PublicationDiscoveryService.scala deleted file mode 100644 index e18bb762..00000000 --- a/app/services/PublicationDiscoveryService.scala +++ /dev/null @@ -1,130 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.publications.{PublicationCandidate, PublicationSearchRun} -import play.api.Logging -import repositories.{PublicationCandidateRepository, PublicationRepository, PublicationSearchConfigRepository, PublicationSearchRunRepository} - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class PublicationDiscoveryService @Inject()( - searchConfigRepository: PublicationSearchConfigRepository, - candidateRepository: PublicationCandidateRepository, - runRepository: PublicationSearchRunRepository, - publicationRepository: PublicationRepository, - publicationService: PublicationService, - openAlexService: OpenAlexService, - relevanceScoringService: RelevanceScoringService, - scoringFeedbackService: ScoringFeedbackService - )(implicit ec: ExecutionContext) extends Logging { - - def acceptCandidate(candidateId: Int, reviewedBy: java.util.UUID): Future[Option[models.domain.publications.Publication]] = { - candidateRepository.findById(candidateId).flatMap { - case Some(candidate) => - // 1. Mark candidate as accepted - candidateRepository.updateStatus(candidateId, "accepted", Some(reviewedBy), None).flatMap { success => - if (success) { - // 2. Create Publication from Candidate - // We assume the DOI is present if we are accepting it. If not, we can't import it easily via existing flow. - candidate.doi match { - case Some(doi) => - publicationService.processPublication(doi, forceRefresh = true) - case None => - logger.warn(s"Candidate $candidateId has no DOI, cannot auto-import.") - Future.successful(None) - } - } else { - Future.successful(None) - } - } - case None => Future.successful(None) - } - } - - def rejectCandidate(candidateId: Int, reviewedBy: java.util.UUID, reason: Option[String]): Future[Boolean] = { - candidateRepository.updateStatus(candidateId, "rejected", Some(reviewedBy), reason) - } - - def deferCandidate(candidateId: Int, reviewedBy: java.util.UUID): Future[Boolean] = { - candidateRepository.updateStatus(candidateId, "deferred", Some(reviewedBy), None) - } - - def bulkAcceptCandidates(candidateIds: Seq[Int], reviewedBy: java.util.UUID): Future[Seq[Option[models.domain.publications.Publication]]] = { - Future.sequence(candidateIds.map(id => acceptCandidate(id, reviewedBy))) - } - - def bulkRejectCandidates(candidateIds: Seq[Int], reviewedBy: java.util.UUID, reason: Option[String]): Future[Int] = { - candidateRepository.bulkUpdateStatus(candidateIds, "rejected", reviewedBy, reason) - } - - def bulkDeferCandidates(candidateIds: Seq[Int], reviewedBy: java.util.UUID): Future[Int] = { - candidateRepository.bulkUpdateStatus(candidateIds, "deferred", reviewedBy, None) - } - - def refreshLearnedWeights(): Future[Option[LearnedWeights]] = { - scoringFeedbackService.computeLearnedWeights().map { - case Some(weights) => - relevanceScoringService.applyLearnedWeights(weights) - Some(weights) - case None => - relevanceScoringService.clearLearnedWeights() - None - } - } - - def runDiscovery(): Future[Unit] = { - logger.info("Starting publication discovery run...") - - // Refresh learned weights from curator feedback before scoring new candidates - refreshLearnedWeights().flatMap { learnedWeights => - learnedWeights.foreach(w => logger.info(s"Using learned weights from ${w.sampleSize} reviewed candidates.")) - - searchConfigRepository.getEnabledConfigs().flatMap { configs => - logger.info(s"Found ${configs.size} enabled search configurations.") - - val runs = configs.map { config => - val startTime = System.currentTimeMillis() - - openAlexService.searchWorks(config.searchQuery).flatMap { rawCandidates => - val existingDoisFuture = publicationRepository.getAllDois.map(_.toSet) - - existingDoisFuture.flatMap { existingDois => - val newCandidates = rawCandidates.filterNot { c => - c.doi.exists(existingDois.contains) - } - - val scoredCandidates = relevanceScoringService.scoreCandidates(newCandidates) - - candidateRepository.saveCandidates(scoredCandidates).flatMap { savedCandidates => - val endTime = System.currentTimeMillis() - val duration = (endTime - startTime).toInt - - val run = PublicationSearchRun( - id = None, - configId = config.id.get, - runAt = LocalDateTime.now(), - candidatesFound = rawCandidates.size, - newCandidates = savedCandidates.size, - queryUsed = Some(config.searchQuery), - durationMs = Some(duration) - ) - - for { - _ <- runRepository.create(run) - _ <- searchConfigRepository.updateLastRun(config.id.get, LocalDateTime.now()) - } yield () - } - } - }.recover { - case e: Exception => - logger.error(s"Error running discovery for config '${config.name}' (ID: ${config.id}): ${e.getMessage}", e) - } - } - - Future.sequence(runs).map(_ => logger.info("Publication discovery run completed.")) - } - } - } -} \ No newline at end of file diff --git a/app/services/PublicationService.scala b/app/services/PublicationService.scala deleted file mode 100644 index a8992485..00000000 --- a/app/services/PublicationService.scala +++ /dev/null @@ -1,107 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.api.{PaginatedResult, PublicationWithEnaStudiesAndSampleCount} -import models.domain.publications.Publication -import org.apache.pekko.actor.ActorRef -import org.apache.pekko.pattern.ask -import org.apache.pekko.util.Timeout -import play.api.Logging -import repositories.PublicationRepository - -import javax.inject.Named -import scala.concurrent.duration.DurationInt -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service that handles business logic related to publications and their associated details. - * - * @constructor Creates a new instance of the PublicationService class. - * @param publicationRepository Repository interface for fetching publication-related data. - * @param ec Implicit execution context for managing asynchronous operations. - */ -class PublicationService @Inject()( - publicationRepository: PublicationRepository, - @Named("publication-update-actor") publicationUpdateActor: ActorRef - )(implicit ec: ExecutionContext) extends Logging { - /** - * Retrieves a paginated list of publications along with their associated ENA studies and sample counts. - * - * @param page The page number to retrieve (1-based index). - * @param pageSize The maximum number of publications to include in each page. - * @return A Future containing a PaginatedResult with the list of publications, their associated details, - * the pagination information, and the total number of publications. - */ - def getPaginatedPublicationsWithDetails(page: Int, pageSize: Int): Future[PaginatedResult[PublicationWithEnaStudiesAndSampleCount]] = { - for { - totalItems <- publicationRepository.countAllPublications() - publicationsWithDetails <- publicationRepository.findPublicationsWithDetailsPaginated(page, pageSize) - } yield PaginatedResult(publicationsWithDetails, page, pageSize, totalItems) - } - - /** - * Retrieves all publications along with their associated ENA studies and sample counts. - * - * This method fetches all records by using a very large page size to ensure all publications are retrieved in one operation. - * - * @return A Future containing a sequence of PublicationWithEnaStudiesAndSampleCount objects, - * each representing a publication with its associated ENA studies and sample count. - */ - def getAllPublicationsWithDetails: Future[Seq[PublicationWithEnaStudiesAndSampleCount]] = { - publicationRepository.findPublicationsWithDetailsPaginated(1, Int.MaxValue) // Fetch all by using a very large pageSize - } - - /** - * Searches publications by a query string and returns paginated results. - * - * @param query The search query to match against title, authors, and abstract - * @param page The page number to retrieve (1-based index) - * @param pageSize The number of items per page - * @return A Future containing a PaginatedResult with matching publications - */ - def searchPublications(query: String, page: Int, pageSize: Int): Future[PaginatedResult[PublicationWithEnaStudiesAndSampleCount]] = { - publicationRepository.searchPublications(query, page, pageSize).map { case (publications, totalCount) => - PaginatedResult(publications, page, pageSize, totalCount) - } - } - - /** - * Processes a publication by DOI, optionally forcing a refresh of the data. - * - * @param doi The DOI of the publication to process - * @param forceRefresh Whether to force a refresh of the publication data - * @return A Future containing an Option[Publication] - */ - def processPublication(doi: String, forceRefresh: Boolean): Future[Option[Publication]] = { - import actors.PublicationUpdateActor.{UpdateResult, UpdateSinglePublication} - - if (forceRefresh) { - implicit val timeout: Timeout = Timeout(30.seconds) - (publicationUpdateActor ? UpdateSinglePublication(doi)) - .mapTo[UpdateResult] - .flatMap { - case UpdateResult(_, true, _) => publicationRepository.findByDoi(doi) - case UpdateResult(_, false, msg) => - logger.error(s"Failed to update publication: $msg") - Future.successful(None) - } - } else { - // First try to find existing publication - publicationRepository.findByDoi(doi).flatMap { - case Some(pub) => Future.successful(Some(pub)) - case None => - // If not found, fetch it fresh - implicit val timeout: Timeout = Timeout(30.seconds) - (publicationUpdateActor ? UpdateSinglePublication(doi)) - .mapTo[UpdateResult] - .flatMap { - case UpdateResult(_, true, _) => publicationRepository.findByDoi(doi) - case UpdateResult(_, false, msg) => - logger.error(s"Failed to update publication: $msg") - Future.successful(None) - } - } - } - } - -} diff --git a/app/services/RelevanceScoringService.scala b/app/services/RelevanceScoringService.scala deleted file mode 100644 index ab569541..00000000 --- a/app/services/RelevanceScoringService.scala +++ /dev/null @@ -1,236 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.domain.publications.PublicationCandidate -import play.api.libs.json.{JsArray, JsValue} -import play.api.{Configuration, Logging} - -import java.util.concurrent.atomic.AtomicReference -import scala.concurrent.ExecutionContext - -/** - * Calculates relevance scores for publication candidates using multiple signals: - * - * 1. Keyword matching — title/abstract matched against domain-relevant terms - * 2. OpenAlex concept weighting — concepts with scores from OpenAlex taxonomy - * 3. Citation impact — normalized citation percentile and raw count - * 4. Journal relevance — bonus for known high-value journals - * - * Final score is a weighted combination normalized to [0, 1]. - */ -class RelevanceScoringService @Inject()( - configuration: Configuration -)(implicit ec: ExecutionContext) extends Logging { - - // Default weights for each scoring component (should sum to 1.0) - private val defaultKeywordWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.keywordWeight").getOrElse(0.35) - private val defaultConceptWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.conceptWeight").getOrElse(0.25) - private val defaultCitationWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.citationWeight").getOrElse(0.20) - private val defaultJournalWeight: Double = configuration.getOptional[Double]("publication-discovery.scoring.journalWeight").getOrElse(0.20) - - // Learned weights from feedback loop (overrides defaults when set) - private val learnedWeightsRef: AtomicReference[Option[LearnedWeights]] = new AtomicReference(None) - - def applyLearnedWeights(weights: LearnedWeights): Unit = { - learnedWeightsRef.set(Some(weights)) - logger.info(s"Applied learned weights from ${weights.sampleSize} samples: " + - s"keyword=${f"${weights.keywordWeight}%.3f"}, concept=${f"${weights.conceptWeight}%.3f"}, " + - s"citation=${f"${weights.citationWeight}%.3f"}, journal=${f"${weights.journalWeight}%.3f"}") - } - - def clearLearnedWeights(): Unit = { - learnedWeightsRef.set(None) - logger.info("Cleared learned weights, reverting to defaults.") - } - - def getActiveWeights: (Double, Double, Double, Double) = { - learnedWeightsRef.get() match { - case Some(lw) => (lw.keywordWeight, lw.conceptWeight, lw.citationWeight, lw.journalWeight) - case None => (defaultKeywordWeight, defaultConceptWeight, defaultCitationWeight, defaultJournalWeight) - } - } - - private def keywordWeight: Double = getActiveWeights._1 - private def conceptWeight: Double = getActiveWeights._2 - private def citationWeight: Double = getActiveWeights._3 - private def journalWeight: Double = getActiveWeights._4 - - // High-value keywords for genomics/phylogenetics domain - private[services] val primaryKeywords: Set[String] = Set( - "haplogroup", "y-dna", "y-chromosome", "mtdna", "mitochondrial dna", - "phylogenetic", "phylogeny", "ancient dna", "adna", - "y-str", "snp", "whole genome sequencing", "population genetics", - "human migration", "coalescent", "tmrca", "molecular clock" - ) - - private[services] val secondaryKeywords: Set[String] = Set( - "genetic genealogy", "paternal lineage", "maternal lineage", - "uniparental", "non-recombining", "nry", - "demographic history", "founder effect", "genetic drift", - "admixture", "archaeogenetics", "paleogenomics", - "short tandem repeat", "microsatellite" - ) - - // OpenAlex concepts that indicate high relevance - private[services] val highValueConcepts: Set[String] = Set( - "haplogroup", "y chromosome", "mitochondrial dna", "human y-chromosome dna haplogroup", - "phylogenetics", "ancient dna", "population genetics", - "genetic genealogy", "molecular phylogenetics" - ) - - private[services] val mediumValueConcepts: Set[String] = Set( - "genetics", "genomics", "human genetics", "molecular biology", - "single-nucleotide polymorphism", "dna sequencing", - "biological anthropology", "archaeogenetics" - ) - - // Known high-value journals for this domain - private[services] val highValueJournals: Set[String] = Set( - "nature", "science", "nature genetics", "nature communications", - "molecular biology and evolution", "american journal of human genetics", - "european journal of human genetics", "genome research", - "human genetics", "human mutation", "genome biology", - "plos genetics", "current biology", "cell", - "proceedings of the national academy of sciences", - "annals of human genetics", "genes", "forensic science international: genetics" - ).map(_.toLowerCase) - - /** - * Calculate the composite relevance score for a candidate. - */ - def score(candidate: PublicationCandidate): Double = { - val keywordScore = calculateKeywordScore(candidate) - val conceptScore = calculateConceptScore(candidate.rawMetadata) - val citationScore = calculateCitationScore(candidate.rawMetadata) - val journalScore = calculateJournalScore(candidate.journalName) - - val composite = keywordWeight * keywordScore + - conceptWeight * conceptScore + - citationWeight * citationScore + - journalWeight * journalScore - - // Clamp to [0, 1] - math.max(0.0, math.min(1.0, composite)) - } - - /** - * Score a batch of candidates, returning them with updated relevance scores. - */ - def scoreCandidates(candidates: Seq[PublicationCandidate]): Seq[PublicationCandidate] = { - candidates.map { c => - val newScore = score(c) - c.copy(relevanceScore = Some(newScore)) - } - } - - /** - * Keyword-based scoring: check title and abstract for domain-relevant terms. - * Primary keywords score higher than secondary keywords. - */ - private[services] def calculateKeywordScore(candidate: PublicationCandidate): Double = { - val text = (candidate.title + " " + candidate.`abstract`.getOrElse("")).toLowerCase - - val primaryHits = primaryKeywords.count(kw => text.contains(kw)) - val secondaryHits = secondaryKeywords.count(kw => text.contains(kw)) - - // Each primary keyword contributes 0.15, each secondary 0.08, capped at 1.0 - val rawScore = primaryHits * 0.15 + secondaryHits * 0.08 - math.min(1.0, rawScore) - } - - /** - * OpenAlex concept-based scoring: extract concepts from raw metadata - * and weight by concept relevance and OpenAlex-assigned score. - * - * OpenAlex concepts have structure: [{display_name: "...", score: 0.8, ...}, ...] - */ - private[services] def calculateConceptScore(rawMetadata: Option[JsValue]): Double = { - rawMetadata.flatMap { json => - // Try both "concepts" (older API) and "topics" (newer API) - val concepts = (json \ "concepts").asOpt[JsArray] - .orElse((json \ "topics").asOpt[JsArray]) - .map(_.value.toSeq) - .getOrElse(Seq.empty) - - if (concepts.isEmpty) None - else { - var totalScore = 0.0 - - for (concept <- concepts) { - val name = (concept \ "display_name").asOpt[String].getOrElse("").toLowerCase - val apiScore = (concept \ "score").asOpt[Double].getOrElse(0.0) - - if (highValueConcepts.exists(hvc => name.contains(hvc))) { - totalScore += apiScore * 1.0 // Full weight for high-value - } else if (mediumValueConcepts.exists(mvc => name.contains(mvc))) { - totalScore += apiScore * 0.4 // Reduced weight for medium-value - } - } - - Some(math.min(1.0, totalScore)) - } - }.getOrElse(0.0) - } - - /** - * Citation-based scoring using OpenAlex citation metrics. - * - * Uses citation_normalized_percentile (0-1) if available, - * otherwise falls back to cited_by_count with logarithmic scaling. - */ - private[services] def calculateCitationScore(rawMetadata: Option[JsValue]): Double = { - rawMetadata.flatMap { json => - // Prefer normalized percentile (already 0-1) - val percentile = (json \ "citation_normalized_percentile" \ "value").asOpt[Double] - .orElse((json \ "cited_by_percentile_year" \ "max").asOpt[Double].map(_ / 100.0)) - - percentile.orElse { - // Fallback: logarithmic scaling of raw citation count - (json \ "cited_by_count").asOpt[Int].map { count => - if (count <= 0) 0.0 - else math.min(1.0, math.log10(count.toDouble + 1) / 3.0) // log10(1001)/3 ≈ 1.0 - } - } - }.getOrElse(0.0) - } - - /** - * Journal-based scoring: bonus for publications in known high-value journals. - */ - private[services] def calculateJournalScore(journalName: Option[String]): Double = { - journalName.map(_.toLowerCase) match { - case Some(name) if highValueJournals.exists(j => name.contains(j)) => 1.0 - case Some(_) => 0.3 // Known journal, but not high-value - case None => 0.0 - } - } - - /** - * Get a breakdown of scoring components for debugging/display. - */ - def scoreBreakdown(candidate: PublicationCandidate): ScoringBreakdown = { - ScoringBreakdown( - keywordScore = calculateKeywordScore(candidate), - conceptScore = calculateConceptScore(candidate.rawMetadata), - citationScore = calculateCitationScore(candidate.rawMetadata), - journalScore = calculateJournalScore(candidate.journalName), - compositeScore = score(candidate), - keywordWeight = keywordWeight, - conceptWeight = conceptWeight, - citationWeight = citationWeight, - journalWeight = journalWeight - ) - } -} - -case class ScoringBreakdown( - keywordScore: Double, - conceptScore: Double, - citationScore: Double, - journalScore: Double, - compositeScore: Double, - keywordWeight: Double, - conceptWeight: Double, - citationWeight: Double, - journalWeight: Double -) diff --git a/app/services/ScoringFeedbackService.scala b/app/services/ScoringFeedbackService.scala deleted file mode 100644 index 79b1000b..00000000 --- a/app/services/ScoringFeedbackService.scala +++ /dev/null @@ -1,170 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.publications.PublicationCandidate -import play.api.Logging -import repositories.PublicationCandidateRepository - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Learns from curator accept/reject decisions to improve relevance scoring. - * - * Analyzes historical decisions by computing per-component score distributions - * for accepted vs rejected candidates, then derives adjusted weights that - * emphasize components with higher discriminative power. - */ -@Singleton -class ScoringFeedbackService @Inject()( - candidateRepository: PublicationCandidateRepository, - relevanceScoringService: RelevanceScoringService -)(implicit ec: ExecutionContext) extends Logging { - - val MinSamplesForFeedback: Int = 10 - - /** - * Analyze all reviewed candidates and compute learned weight adjustments. - * Returns None if insufficient data (< MinSamplesForFeedback reviewed candidates). - */ - def computeLearnedWeights(): Future[Option[LearnedWeights]] = { - candidateRepository.listReviewed().map { reviewed => - if (reviewed.size < MinSamplesForFeedback) { - logger.info(s"Insufficient reviewed candidates (${reviewed.size}/$MinSamplesForFeedback) for feedback learning.") - None - } else { - val accepted = reviewed.filter(_.status == "accepted") - val rejected = reviewed.filter(_.status == "rejected") - - if (accepted.isEmpty || rejected.isEmpty) { - logger.info("Need both accepted and rejected candidates for feedback learning.") - None - } else { - Some(deriveWeights(accepted, rejected)) - } - } - } - } - - /** - * Compute a feedback analysis report with per-component statistics. - */ - def analyzeFeedback(): Future[Option[FeedbackAnalysis]] = { - candidateRepository.listReviewed().map { reviewed => - val accepted = reviewed.filter(_.status == "accepted") - val rejected = reviewed.filter(_.status == "rejected") - - if (accepted.isEmpty && rejected.isEmpty) None - else { - val acceptedBreakdowns = accepted.map(relevanceScoringService.scoreBreakdown) - val rejectedBreakdowns = rejected.map(relevanceScoringService.scoreBreakdown) - - Some(FeedbackAnalysis( - totalReviewed = reviewed.size, - acceptedCount = accepted.size, - rejectedCount = rejected.size, - acceptedMeans = computeMeans(acceptedBreakdowns), - rejectedMeans = computeMeans(rejectedBreakdowns), - componentDiscriminativePower = computeDiscriminativePower(acceptedBreakdowns, rejectedBreakdowns) - )) - } - } - } - - private[services] def deriveWeights( - accepted: Seq[PublicationCandidate], - rejected: Seq[PublicationCandidate] - ): LearnedWeights = { - val acceptedBreakdowns = accepted.map(relevanceScoringService.scoreBreakdown) - val rejectedBreakdowns = rejected.map(relevanceScoringService.scoreBreakdown) - - val discriminativePower = computeDiscriminativePower(acceptedBreakdowns, rejectedBreakdowns) - - // Compute new weights proportional to discriminative power, - // blended with original weights for stability (70% original, 30% learned) - val blendRatio = 0.3 - val originalWeights = Map( - "keyword" -> relevanceScoringService.scoreBreakdown(accepted.head).keywordWeight, - "concept" -> relevanceScoringService.scoreBreakdown(accepted.head).conceptWeight, - "citation" -> relevanceScoringService.scoreBreakdown(accepted.head).citationWeight, - "journal" -> relevanceScoringService.scoreBreakdown(accepted.head).journalWeight - ) - - // Normalize discriminative power to sum to 1.0 for use as weights - val totalPower = discriminativePower.values.sum - val learnedRaw = if (totalPower > 0) { - discriminativePower.view.mapValues(_ / totalPower).toMap - } else { - originalWeights - } - - // Blend: new_weight = (1 - blend) * original + blend * learned - val blended = originalWeights.map { case (component, origWeight) => - val learnedWeight = learnedRaw.getOrElse(component, origWeight) - component -> ((1.0 - blendRatio) * origWeight + blendRatio * learnedWeight) - } - - // Normalize blended weights to sum to 1.0 - val blendedTotal = blended.values.sum - val normalized = blended.view.mapValues(_ / blendedTotal).toMap - - logger.info(s"Learned weights from ${accepted.size + rejected.size} reviewed candidates: $normalized") - - LearnedWeights( - keywordWeight = normalized("keyword"), - conceptWeight = normalized("concept"), - citationWeight = normalized("citation"), - journalWeight = normalized("journal"), - sampleSize = accepted.size + rejected.size, - discriminativePower = discriminativePower - ) - } - - /** - * Discriminative power = |mean_accepted - mean_rejected| for each component. - * Higher values mean the component better separates accepted from rejected. - */ - private[services] def computeDiscriminativePower( - acceptedBreakdowns: Seq[ScoringBreakdown], - rejectedBreakdowns: Seq[ScoringBreakdown] - ): Map[String, Double] = { - val acceptedMeans = computeMeans(acceptedBreakdowns) - val rejectedMeans = computeMeans(rejectedBreakdowns) - - Map( - "keyword" -> math.abs(acceptedMeans.getOrElse("keyword", 0.0) - rejectedMeans.getOrElse("keyword", 0.0)), - "concept" -> math.abs(acceptedMeans.getOrElse("concept", 0.0) - rejectedMeans.getOrElse("concept", 0.0)), - "citation" -> math.abs(acceptedMeans.getOrElse("citation", 0.0) - rejectedMeans.getOrElse("citation", 0.0)), - "journal" -> math.abs(acceptedMeans.getOrElse("journal", 0.0) - rejectedMeans.getOrElse("journal", 0.0)) - ) - } - - private[services] def computeMeans(breakdowns: Seq[ScoringBreakdown]): Map[String, Double] = { - if (breakdowns.isEmpty) return Map("keyword" -> 0.0, "concept" -> 0.0, "citation" -> 0.0, "journal" -> 0.0) - - val n = breakdowns.size.toDouble - Map( - "keyword" -> breakdowns.map(_.keywordScore).sum / n, - "concept" -> breakdowns.map(_.conceptScore).sum / n, - "citation" -> breakdowns.map(_.citationScore).sum / n, - "journal" -> breakdowns.map(_.journalScore).sum / n - ) - } -} - -case class LearnedWeights( - keywordWeight: Double, - conceptWeight: Double, - citationWeight: Double, - journalWeight: Double, - sampleSize: Int, - discriminativePower: Map[String, Double] -) - -case class FeedbackAnalysis( - totalReviewed: Int, - acceptedCount: Int, - rejectedCount: Int, - acceptedMeans: Map[String, Double], - rejectedMeans: Map[String, Double], - componentDiscriminativePower: Map[String, Double] -) diff --git a/app/services/StrAgeService.scala b/app/services/StrAgeService.scala deleted file mode 100644 index b97f2f24..00000000 --- a/app/services/StrAgeService.scala +++ /dev/null @@ -1,291 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.dal.domain.genomics.{HaplogroupCharacterState, StrMutationRate} -import models.domain.haplogroups.AgeEstimate -import play.api.Logging -import repositories.{BiosampleVariantCallRepository, HaplogroupCharacterStateRepository, StrMutationRateRepository} - -import scala.concurrent.{ExecutionContext, Future} - -/** - * STR-based age estimation using genetic distance analysis. - * - * Uses the stepwise mutation model with per-marker mutation rates to calculate - * P(t|g_STRs) = product of P(t|ms) x P(gs|ms) across all markers. - * - * Multi-step mutation frequencies (from McDonald 2021): - * omega_1 = 0.962 (single-step) - * omega_2 = 0.032 (two-step) - * omega_3 = 0.004 (three-step) - * - * Generation length: 33 years (pre-industrial average) - */ -class StrAgeService @Inject()( - strMutationRateRepo: StrMutationRateRepository, - characterStateRepo: HaplogroupCharacterStateRepository, - variantCallRepo: BiosampleVariantCallRepository -)(implicit ec: ExecutionContext) extends Logging { - - // Default generation length in years - val DefaultGenerationLength: Double = 33.0 - val DefaultGenerationLengthSigma: Double = 4.0 - - // Multi-step mutation frequencies - val Omega1: Double = 0.962 - val Omega2: Double = 0.032 - val Omega3: Double = 0.004 - - /** - * Calculate STR-based age estimate for a set of sample STR observations - * relative to an ancestral haplogroup's modal haplotype. - * - * @param ancestralStates Map of variantId -> ancestral repeat count (from ASR) - * @param observedValues Map of variantId -> observed repeat count (from sample) - * @param mutationRates Map of variantId -> StrMutationRate - * @param generationLength Years per generation - * @return Age estimate result - */ - private[services] def calculateFromGeneticDistance( - ancestralStates: Map[Int, Int], - observedValues: Map[Int, Int], - mutationRates: Map[Int, StrMutationRate], - generationLength: Double = DefaultGenerationLength - ): StrAgeEstimateResult = { - // Find markers present in all three maps - val commonMarkers = ancestralStates.keySet - .intersect(observedValues.keySet) - .intersect(mutationRates.keySet) - - if (commonMarkers.isEmpty) { - return StrAgeEstimateResult( - estimate = AgeEstimate(0, Some(0), Some(0)), - markerCount = 0, - totalGeneticDistance = 0, - method = "STR_GENETIC_DISTANCE" - ) - } - - // Calculate per-marker genetic distance and sum - val markerResults = commonMarkers.toSeq.map { variantId => - val ancestral = ancestralStates(variantId) - val observed = observedValues(variantId) - val rate = mutationRates(variantId) - val distance = math.abs(observed - ancestral) - val stepSize = observed - ancestral - MarkerDistance(variantId, ancestral, observed, distance, stepSize, rate) - } - - val totalGeneticDistance = markerResults.map(_.distance).sum - val markerCount = markerResults.size - - // Average mutation rate across markers (per generation) - val avgMutationRate = markerResults.map(_.rate.mutationRate.toDouble).sum / markerCount - - // Estimate generations using total genetic distance - // E[distance] = sum(mu_i) * t_gen, so t_gen = total_distance / sum(mu_i) - val totalMutationRate = markerResults.map(_.rate.mutationRate.toDouble).sum - val estimatedGenerations = if (totalMutationRate > 0) { - totalGeneticDistance.toDouble / totalMutationRate - } else 0.0 - - val pointYears = math.round(estimatedGenerations * generationLength).toInt - - // Confidence interval using variance of genetic distance - // Var(distance) ≈ sum(mu_i * (1 + multi_step_contribution)) * t_gen - // For CI: use Poisson-like approximation on total distance - val (lowerDist, upperDist) = geneticDistanceConfidenceInterval( - totalGeneticDistance, totalMutationRate, markerCount - ) - val lowerYears = if (totalMutationRate > 0) - math.round((lowerDist / totalMutationRate) * generationLength).toInt - else 0 - val upperYears = if (totalMutationRate > 0) - math.round((upperDist / totalMutationRate) * generationLength).toInt - else 0 - - if (markerCount > 0) { - logger.debug(s"STR age estimate: $pointYears YBP ($lowerYears–$upperYears) from $markerCount markers, " + - s"total genetic distance $totalGeneticDistance") - } - - StrAgeEstimateResult( - estimate = AgeEstimate(pointYears, Some(lowerYears), Some(upperYears)), - markerCount = markerCount, - totalGeneticDistance = totalGeneticDistance, - method = "STR_GENETIC_DISTANCE" - ) - } - - /** - * Calculate STR age for a biosample relative to its terminal haplogroup's - * ancestral motif, using stored character states and variant calls. - * - * @param biosampleId The biosample to calculate for - * @param haplogroupId The terminal haplogroup with ancestral STR motifs - * @param strVariantIds IDs of STR-type variants to use - * @return STR age estimate or None if insufficient data - */ - def calculateForBiosample( - biosampleId: Int, - haplogroupId: Int, - strVariantIds: Seq[Int] - ): Future[Option[StrAgeEstimateResult]] = { - for { - // Get ancestral states from ASR - ancestralStates <- characterStateRepo.findStrStatesForHaplogroup(haplogroupId, strVariantIds) - - // Get observed values from biosample - observedCalls <- variantCallRepo.findByBiosampleAndVariants(biosampleId, strVariantIds) - - // Get mutation rates for all markers - allRates <- strMutationRateRepo.findAll() - } yield { - // Build maps: variantId -> repeat count - val ancestralMap = ancestralStates.flatMap { cs => - cs.inferredState.toIntOption.map(v => cs.variantId -> v) - }.toMap - - val observedMap = observedCalls.flatMap { call => - call.observedState.toIntOption.map(v => call.variantId -> v) - }.toMap - - // Build rate map by markerName -> StrMutationRate, then match via variantId - // For now, build a variantId -> rate map using the variant IDs we have - val rateByMarker = allRates.map(r => r.markerName -> r).toMap - - // We need to map variantIds to marker names. For now, use the variantId - // directly as key in the rate map (rates are indexed by variantId for lookup) - val rateMap = allRates.flatMap(r => r.id.map(_ -> r)).toMap - - if (ancestralMap.isEmpty || observedMap.isEmpty) None - else Some(calculateFromGeneticDistance(ancestralMap, observedMap, rateMap)) - } - } - - /** - * Calculate STR-based TMRCA between two sets of STR observations. - * - * Uses the total genetic distance between the two samples across shared markers. - */ - private[services] def calculateTmrcaFromStrs( - observedValues1: Map[Int, Int], - observedValues2: Map[Int, Int], - mutationRates: Map[Int, StrMutationRate], - generationLength: Double = DefaultGenerationLength - ): StrAgeEstimateResult = { - val commonMarkers = observedValues1.keySet - .intersect(observedValues2.keySet) - .intersect(mutationRates.keySet) - - if (commonMarkers.isEmpty) { - return StrAgeEstimateResult( - estimate = AgeEstimate(0, Some(0), Some(0)), - markerCount = 0, - totalGeneticDistance = 0, - method = "STR_TMRCA" - ) - } - - // Distance between two samples (TMRCA uses half the total) - val totalDistance = commonMarkers.toSeq.map { vid => - math.abs(observedValues1(vid) - observedValues2(vid)) - }.sum - - val markerCount = commonMarkers.size - val totalMutationRate = commonMarkers.toSeq.map(vid => mutationRates(vid).mutationRate.toDouble).sum - // TMRCA: divide by 2 because distance accumulates on both lineages - val twoLineageRate = 2.0 * totalMutationRate - - val estimatedGenerations = if (twoLineageRate > 0) { - totalDistance.toDouble / twoLineageRate - } else 0.0 - - val pointYears = math.round(estimatedGenerations * generationLength).toInt - - val (lowerDist, upperDist) = geneticDistanceConfidenceInterval( - totalDistance, twoLineageRate, markerCount - ) - val lowerYears = if (twoLineageRate > 0) - math.round((lowerDist / twoLineageRate) * generationLength).toInt - else 0 - val upperYears = if (twoLineageRate > 0) - math.round((upperDist / twoLineageRate) * generationLength).toInt - else 0 - - StrAgeEstimateResult( - estimate = AgeEstimate(pointYears, Some(lowerYears), Some(upperYears)), - markerCount = markerCount, - totalGeneticDistance = totalDistance, - method = "STR_TMRCA" - ) - } - - /** - * Confidence interval for genetic distance using a Poisson-like model. - * - * Genetic distance follows approximately Poisson(sum(mu_i) * t). - * For small distances, use exact Poisson quantiles (approximated). - * For larger distances, use normal approximation. - */ - private[services] def geneticDistanceConfidenceInterval( - observedDistance: Int, - totalMutationRate: Double, - markerCount: Int - ): (Double, Double) = { - if (observedDistance == 0) { - return (0.0, -0.5 * math.log(0.025) * 2) // Upper bound for zero observations - } - - // Normal approximation with multi-step variance inflation - val varianceInflation = 1.0 + Omega2 * 4 + Omega3 * 9 // E[step^2] contribution - val effectiveVariance = observedDistance.toDouble * varianceInflation - val z = 1.96 - val sqrtVar = math.sqrt(effectiveVariance) - val lower = math.max(0.0, observedDistance.toDouble - z * sqrtVar) - val upper = observedDistance.toDouble + z * sqrtVar - - (lower, upper) - } - - /** - * Adjust multi-step mutation probability for a given step size. - * - * P(step=k) based on omega frequencies: - * |k|=1: omega_1 = 0.962 - * |k|=2: omega_2 = 0.032 - * |k|=3: omega_3 = 0.004 - * |k|>=4: ~0 (negligible) - */ - private[services] def multiStepProbability(stepSize: Int): Double = { - math.abs(stepSize) match { - case 0 => 1.0 - case 1 => Omega1 - case 2 => Omega2 - case 3 => Omega3 - case _ => 0.001 // Negligible for |step| >= 4 - } - } -} - -/** - * Per-marker distance result. - */ -case class MarkerDistance( - variantId: Int, - ancestralValue: Int, - observedValue: Int, - distance: Int, - stepSize: Int, - rate: StrMutationRate -) - -/** - * Result of an STR-based age estimation. - */ -case class StrAgeEstimateResult( - estimate: AgeEstimate, - markerCount: Int, - totalGeneticDistance: Int, - method: String -) diff --git a/app/services/SubmissionProvenanceService.scala b/app/services/SubmissionProvenanceService.scala deleted file mode 100644 index 19de7378..00000000 --- a/app/services/SubmissionProvenanceService.scala +++ /dev/null @@ -1,134 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.pds.{PdsSubmission, SubmissionSummary} -import play.api.Logging -import play.api.libs.json.JsValue -import repositories.{PdsNodeRepository, PdsSubmissionRepository} - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class SubmissionProvenanceService @Inject()( - submissionRepo: PdsSubmissionRepository, - nodeRepo: PdsNodeRepository - )(implicit ec: ExecutionContext) extends Logging { - - def recordSubmission( - did: String, - submissionType: String, - proposedValue: String, - biosampleId: Option[Int] = None, - biosampleGuid: Option[UUID] = None, - confidenceScore: Option[Double] = None, - algorithmVersion: Option[String] = None, - softwareVersion: Option[String] = None, - payload: Option[JsValue] = None, - atUri: Option[String] = None, - atCid: Option[String] = None - ): Future[Either[String, PdsSubmission]] = { - if (!PdsSubmission.ValidTypes.contains(submissionType)) - return Future.successful(Left(s"Invalid submission type: $submissionType")) - - nodeRepo.findByDid(did).flatMap { - case None => Future.successful(Left(s"PDS node not registered: $did")) - case Some(node) => - val submission = PdsSubmission( - pdsNodeId = node.id.get, - submissionType = submissionType, - biosampleId = biosampleId, - biosampleGuid = biosampleGuid, - proposedValue = proposedValue, - confidenceScore = confidenceScore, - algorithmVersion = algorithmVersion, - softwareVersion = softwareVersion.orElse(node.softwareVersion), - payload = payload, - atUri = atUri, - atCid = atCid - ) - submissionRepo.create(submission).map(Right(_)) - } - } - - def acceptSubmission(submissionId: Int, reviewedBy: String, notes: Option[String] = None): Future[Either[String, Boolean]] = { - updateSubmissionStatus(submissionId, "ACCEPTED", reviewedBy, notes) - } - - def rejectSubmission(submissionId: Int, reviewedBy: String, notes: Option[String] = None): Future[Either[String, Boolean]] = { - updateSubmissionStatus(submissionId, "REJECTED", reviewedBy, notes) - } - - def supersedeSubmission(submissionId: Int, reviewedBy: String, notes: Option[String] = None): Future[Either[String, Boolean]] = { - updateSubmissionStatus(submissionId, "SUPERSEDED", reviewedBy, notes) - } - - def getSubmission(id: Int): Future[Option[PdsSubmission]] = - submissionRepo.findById(id) - - def getSubmissionsForNode(did: String, submissionType: Option[String] = None): Future[Either[String, Seq[PdsSubmission]]] = { - nodeRepo.findByDid(did).flatMap { - case None => Future.successful(Left("PDS node not found")) - case Some(node) => - val result = submissionType match { - case Some(t) => submissionRepo.findByNodeAndType(node.id.get, t) - case None => submissionRepo.findByNode(node.id.get) - } - result.map(Right(_)) - } - } - - def getSubmissionsForBiosample(biosampleId: Int): Future[Seq[PdsSubmission]] = - submissionRepo.findByBiosampleId(biosampleId) - - def getSubmissionsForBiosampleGuid(guid: UUID): Future[Seq[PdsSubmission]] = - submissionRepo.findByBiosampleGuid(guid) - - def getPendingSubmissions(submissionType: Option[String] = None, limit: Int = 100): Future[Seq[PdsSubmission]] = - submissionType match { - case Some(t) => submissionRepo.findByTypeAndStatus(t, "PENDING", limit) - case None => submissionRepo.findByStatus("PENDING", limit) - } - - def getNodeSubmissionSummary(did: String): Future[Either[String, SubmissionSummary]] = { - nodeRepo.findByDid(did).flatMap { - case None => Future.successful(Left("PDS node not found")) - case Some(node) => - submissionRepo.countByNodeAndStatus(node.id.get).map { counts => - val total = counts.values.sum - val accepted = counts.getOrElse("ACCEPTED", 0) - val rejected = counts.getOrElse("REJECTED", 0) - val reviewed = accepted + rejected - val rate = if (reviewed > 0) accepted.toDouble / reviewed else 0.0 - - Right(SubmissionSummary( - pdsNodeId = node.id.get, - did = did, - totalSubmissions = total, - pendingCount = counts.getOrElse("PENDING", 0), - acceptedCount = accepted, - rejectedCount = rejected, - acceptanceRate = rate - )) - } - } - } - - private def updateSubmissionStatus( - submissionId: Int, - newStatus: String, - reviewedBy: String, - notes: Option[String] - ): Future[Either[String, Boolean]] = { - submissionRepo.findById(submissionId).flatMap { - case None => Future.successful(Left("Submission not found")) - case Some(submission) if submission.status != "PENDING" => - Future.successful(Left(s"Cannot update submission with status: ${submission.status}")) - case Some(_) => - submissionRepo.updateStatus(submissionId, newStatus, Some(reviewedBy), notes).map { success => - if (success) Right(true) - else Left("Failed to update submission status") - } - } - } -} diff --git a/app/services/TargetedSequencingService.scala b/app/services/TargetedSequencingService.scala deleted file mode 100644 index bc56e0b6..00000000 --- a/app/services/TargetedSequencingService.scala +++ /dev/null @@ -1,135 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.* -import play.api.Logging -import repositories.{TestTypeRepository, TestTypeTargetRegionRepository} - -import scala.concurrent.{ExecutionContext, Future} - -case class TargetedTestCapabilities( - testType: TestTypeRow, - targetRegions: Seq[TestTypeTargetRegion], - supportsYDna: Boolean, - supportsMtDna: Boolean, - primaryContig: Option[String], - totalTargetedBases: Option[Long] - ) - -@Singleton -class TargetedSequencingService @Inject()( - testTypeRepo: TestTypeRepository, - targetRegionRepo: TestTypeTargetRegionRepository - )(implicit ec: ExecutionContext) extends Logging { - - def getTargetedTestCapabilities(testTypeCode: String): Future[Option[TargetedTestCapabilities]] = { - testTypeRepo.findByCode(testTypeCode).flatMap { - case None => Future.successful(None) - case Some(testType) => - testType.id match { - case None => Future.successful(None) - case Some(ttId) => - targetRegionRepo.findByTestTypeId(ttId).map { regions => - val primaryContig = regions.headOption.map(_.contigName) - val totalBases = regions.flatMap(_.regionSize).sum - Some(TargetedTestCapabilities( - testType = testType, - targetRegions = regions, - supportsYDna = testType.supportsHaplogroupY, - supportsMtDna = testType.supportsHaplogroupMt, - primaryContig = primaryContig, - totalTargetedBases = if (totalBases > 0) Some(totalBases.toLong) else None - )) - } - } - } - } - - def assessCoverage( - testTypeCode: String, - actualMeanDepth: Option[Double], - actualCoveragePct: Option[Double] - ): Future[Option[TargetedCoverageAssessment]] = { - testTypeRepo.findByCode(testTypeCode).flatMap { - case None => Future.successful(None) - case Some(testType) => - testType.id match { - case None => Future.successful(None) - case Some(ttId) => - targetRegionRepo.findByTestTypeId(ttId).map { regions => - if (regions.isEmpty) None - else Some(buildAssessment(testType, regions, actualMeanDepth, actualCoveragePct)) - } - } - } - } - - private[services] def buildAssessment( - testType: TestTypeRow, - regions: Seq[TestTypeTargetRegion], - actualMeanDepth: Option[Double], - actualCoveragePct: Option[Double] - ): TargetedCoverageAssessment = { - val regionResults = regions.map { region => - val meetsDepth = (actualMeanDepth, region.expectedMinDepth) match { - case (Some(actual), Some(expected)) => actual >= expected - case _ => true - } - val meetsCoverage = (actualCoveragePct, region.expectedCoveragePct) match { - case (Some(actual), Some(expected)) => actual >= expected - case _ => true - } - - RegionCoverageResult( - regionName = region.regionName, - contigName = region.contigName, - startPosition = region.startPosition, - endPosition = region.endPosition, - expectedCoveragePct = region.expectedCoveragePct, - expectedMinDepth = region.expectedMinDepth, - actualMeanDepth = actualMeanDepth, - actualCoveragePct = actualCoveragePct, - meetsExpectation = meetsDepth && meetsCoverage - ) - } - - val overallCoverage = actualCoveragePct.getOrElse(0.0) - val allMeet = regionResults.forall(_.meetsExpectation) - - TargetedCoverageAssessment( - testTypeCode = testType.code, - testTypeDisplayName = testType.displayName, - targetRegions = regionResults, - overallCoveragePct = overallCoverage, - overallMeetsExpectation = allMeet, - qualityTier = TargetedCoverageAssessment.qualityTierFromCoverage(overallCoverage) - ) - } - - def getTargetedYTests: Future[Seq[TestTypeRow]] = { - testTypeRepo.findByCapability(supportsY = Some(true)).map { tests => - tests.filter(_.targetType == TargetType.YChromosome) - } - } - - def getTargetedMtTests: Future[Seq[TestTypeRow]] = { - testTypeRepo.findByCapability(supportsMt = Some(true)).map { tests => - tests.filter(_.targetType == TargetType.MtDna) - } - } - - def findUpgradePath(currentTestTypeCode: String): Future[Option[TestTypeRow]] = { - testTypeRepo.findByCode(currentTestTypeCode).flatMap { - case Some(current) if current.successorTestTypeId.isDefined => - testTypeRepo.getTestTypeRowsByIds(Seq(current.successorTestTypeId.get)).map(_.headOption) - case _ => Future.successful(None) - } - } - - def isTargetedTest(testTypeCode: String): Future[Boolean] = { - testTypeRepo.findByCode(testTypeCode).map { - case Some(tt) => tt.targetType == TargetType.YChromosome || tt.targetType == TargetType.MtDna - case None => false - } - } -} diff --git a/app/services/TerminalVariantClusteringService.scala b/app/services/TerminalVariantClusteringService.scala deleted file mode 100644 index 28565afa..00000000 --- a/app/services/TerminalVariantClusteringService.scala +++ /dev/null @@ -1,396 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.HaplogroupType -import models.domain.discovery.* -import play.api.Logging -import repositories.{HaplogroupCoreRepository, PrivateVariantRepository, ProposedBranchRepository} - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Clusters private variants across biosamples sharing the same terminal haplogroup - * to identify candidate new branches. Operates as a batch analysis service that - * scans existing private variant data and feeds clusters into the ProposalEngine. - * - * Algorithm: - * 1. For a given terminal haplogroup, gather all active private variants - * 2. Group by biosample to get per-sample variant sets - * 3. Identify variant co-occurrence patterns (clusters) - * 4. Feed qualifying clusters into the ProposalEngine - */ -class TerminalVariantClusteringService @Inject()( - privateVariantRepo: PrivateVariantRepository, - proposalEngine: ProposalEngine, - coreRepo: HaplogroupCoreRepository, - proposedBranchRepo: ProposedBranchRepository -)(implicit ec: ExecutionContext) extends Logging { - - private val DefaultMinClusterSize = 2 - private val DefaultMinVariantsPerCluster = 1 - - /** - * Run clustering for all terminal haplogroups of a given type. - * Scans active private variants, groups by terminal, clusters, and creates proposals. - */ - def clusterAllTerminals(haplogroupType: HaplogroupType): Future[ClusteringReport] = { - for { - minClusterSize <- getConfigInt(haplogroupType, "min_cluster_size", DefaultMinClusterSize) - minVariants <- getConfigInt(haplogroupType, "min_variants_per_cluster", DefaultMinVariantsPerCluster) - - // Get all terminal haplogroups that have active private variants - terminals <- findTerminalsWithPrivateVariants(haplogroupType) - - results <- Future.sequence(terminals.map { terminalHgId => - clusterForTerminal(terminalHgId, haplogroupType, minClusterSize, minVariants) - }) - } yield { - val report = ClusteringReport( - haplogroupType = haplogroupType, - terminalsScanned = terminals.size, - clustersFound = results.map(_.clusters.size).sum, - proposalsCreated = results.map(_.proposalsCreated).sum, - proposalsUpdated = results.map(_.proposalsUpdated).sum - ) - logger.info(s"Clustering complete for $haplogroupType: ${report.terminalsScanned} terminals, " + - s"${report.clustersFound} clusters, ${report.proposalsCreated} new proposals, " + - s"${report.proposalsUpdated} updated proposals") - report - } - } - - /** - * Run clustering for a specific terminal haplogroup. - */ - def clusterForTerminal( - terminalHaplogroupId: Int, - haplogroupType: HaplogroupType, - minClusterSize: Int = DefaultMinClusterSize, - minVariantsPerCluster: Int = DefaultMinVariantsPerCluster - ): Future[TerminalClusterResult] = { - for { - // Get all active private variants under this terminal - privateVariants <- privateVariantRepo.findByTerminalHaplogroup(terminalHaplogroupId) - activeVariants = privateVariants.filter(_.status == PrivateVariantStatus.Active) - - // Group by sample to get per-sample variant sets - sampleVariantSets = groupBySample(activeVariants) - - // Find co-occurrence clusters - clusters = findClusters(sampleVariantSets, minClusterSize, minVariantsPerCluster) - - // Feed clusters into proposal engine - proposalResults <- processClustersThroughProposals( - clusters, terminalHaplogroupId, haplogroupType - ) - } yield { - if (clusters.nonEmpty) { - logger.info(s"Terminal $terminalHaplogroupId: found ${clusters.size} clusters " + - s"from ${sampleVariantSets.size} samples with ${activeVariants.size} active variants") - } - TerminalClusterResult( - terminalHaplogroupId = terminalHaplogroupId, - samplesAnalyzed = sampleVariantSets.size, - clusters = clusters, - proposalsCreated = proposalResults.count(_.isNew), - proposalsUpdated = proposalResults.count(!_.isNew) - ) - } - } - - /** - * Group private variants by sample, returning a map of SampleKey -> Set[variantId]. - */ - private[services] def groupBySample( - variants: Seq[BiosamplePrivateVariant] - ): Map[SampleKey, Set[Int]] = { - variants.groupBy(pv => SampleKey(pv.sampleType, pv.sampleId, pv.sampleGuid)) - .view.mapValues(_.map(_.variantId).toSet).toMap - } - - /** - * Find clusters of co-occurring variants across samples. - * - * A cluster is a set of variants that appear together in at least `minClusterSize` samples. - * Uses a frequency-based approach: - * 1. Count how many samples each variant pair co-occurs in - * 2. Build clusters from frequently co-occurring variants - * 3. Filter to clusters meeting minimum size threshold - */ - private[services] def findClusters( - sampleVariantSets: Map[SampleKey, Set[Int]], - minClusterSize: Int, - minVariantsPerCluster: Int - ): Seq[VariantCluster] = { - if (sampleVariantSets.size < minClusterSize) return Seq.empty - - // Find which samples share each exact variant set (or near-identical sets) - // Group samples by their variant set, then merge similar sets - val setGroups: Map[Set[Int], Seq[SampleKey]] = sampleVariantSets - .toSeq - .groupBy(_._2) - .view.mapValues(_.map(_._1)).toMap - - // Direct matches: samples with identical variant sets - val exactClusters = setGroups - .filter { case (variantSet, samples) => - samples.size >= minClusterSize && variantSet.size >= minVariantsPerCluster - } - .map { case (variantSet, samples) => - VariantCluster( - variantIds = variantSet, - supportingSamples = samples, - clusterType = ClusterType.Exact - ) - }.toSeq - - // Subset clusters: find core variant sets shared by multiple samples - // even if some samples have additional private variants - val coreClusters = findCoreClusters(sampleVariantSets, minClusterSize, minVariantsPerCluster) - .filterNot { coreCluster => - // Don't duplicate exact clusters - exactClusters.exists(_.variantIds == coreCluster.variantIds) - } - - exactClusters ++ coreClusters - } - - /** - * Find core variant subsets shared across multiple samples. - * For each pair of samples, compute intersection. If the intersection - * appears in enough samples, it's a core cluster. - */ - private[services] def findCoreClusters( - sampleVariantSets: Map[SampleKey, Set[Int]], - minClusterSize: Int, - minVariantsPerCluster: Int - ): Seq[VariantCluster] = { - val samples = sampleVariantSets.toSeq - if (samples.size < minClusterSize) return Seq.empty - - // Compute pairwise intersections and count how many samples contain each - val candidateCores = scala.collection.mutable.Map[Set[Int], Set[SampleKey]]() - - for { - i <- samples.indices - j <- (i + 1) until samples.size - } { - val intersection = samples(i)._2 intersect samples(j)._2 - if (intersection.size >= minVariantsPerCluster) { - val entry = candidateCores.getOrElseUpdate(intersection, Set.empty) - candidateCores(intersection) = entry + samples(i)._1 + samples(j)._1 - } - } - - // Also check which other samples contain each candidate core - val enriched = candidateCores.map { case (coreVariants, initialSamples) => - val allSupporting = sampleVariantSets.collect { - case (key, variantSet) if coreVariants.subsetOf(variantSet) => key - }.toSet - (coreVariants, allSupporting) - } - - // Filter to cores meeting threshold, remove subsets of larger cores - val qualifying = enriched - .filter { case (_, supporters) => supporters.size >= minClusterSize } - .toSeq - .sortBy(-_._1.size) // Prefer larger variant sets - - // Remove cores that are strict subsets of other qualifying cores with same or more supporters - val nonRedundant = qualifying.filter { case (variants, supporters) => - !qualifying.exists { case (otherVariants, otherSupporters) => - otherVariants != variants && - variants.subsetOf(otherVariants) && - otherSupporters.size >= supporters.size - } - } - - nonRedundant.map { case (variantIds, supporters) => - VariantCluster( - variantIds = variantIds, - supportingSamples = supporters.toSeq, - clusterType = ClusterType.Core - ) - } - } - - /** - * Generate a naming suggestion for a proposed branch based on its parent. - */ - private[services] def suggestBranchName( - parentHaplogroupId: Int, - clusterIndex: Int - ): Future[Option[String]] = { - coreRepo.findById(parentHaplogroupId).map { - case Some(parent) => - // Suggest format: "ParentName-proposed-N" (curator will assign final name) - Some(s"${parent.name}-proposed-${clusterIndex + 1}") - case None => None - } - } - - /** - * Process discovered clusters through the proposal engine. - */ - private def processClustersThroughProposals( - clusters: Seq[VariantCluster], - terminalHaplogroupId: Int, - haplogroupType: HaplogroupType - ): Future[Seq[ProposalResult]] = { - Future.sequence(clusters.zipWithIndex.map { case (cluster, idx) => - val representativeSample = cluster.supportingSamples.head - val sampleRef = SampleReference( - representativeSample.sampleType, - representativeSample.sampleId, - representativeSample.sampleGuid - ) - - for { - // Check if a proposal already exists for this variant set - existingProposals <- proposedBranchRepo.findByParentAndType(terminalHaplogroupId, haplogroupType) - existingVariantSets <- Future.sequence(existingProposals.map { p => - proposedBranchRepo.getVariantIds(p.id.get).map(vids => (p, vids)) - }) - - // Check for exact or near match - exactMatch = existingVariantSets.find { case (_, vids) => - proposalEngine.jaccardSimilarity(cluster.variantIds, vids) >= 0.8 - } - - result <- exactMatch match { - case Some((existing, _)) => - // Add remaining samples as evidence - addClusterEvidenceToExisting(existing, cluster).map(_ => - ProposalResult(existing.id.get, isNew = false) - ) - case None => - // Create through proposal engine - proposalEngine.findOrCreateProposal( - terminalHaplogroupId, haplogroupType, cluster.variantIds, sampleRef - ).flatMap { proposal => - // Add remaining samples as evidence - addClusterEvidenceToExisting(proposal, cluster.copy( - supportingSamples = cluster.supportingSamples.tail - )).map(_ => ProposalResult(proposal.id.get, isNew = true)) - } - } - - // Suggest name - nameOpt <- suggestBranchName(terminalHaplogroupId, idx) - _ <- nameOpt match { - case Some(name) => - result match { - case ProposalResult(proposalId, true) => - proposedBranchRepo.findById(proposalId).flatMap { - case Some(p) if p.proposedName.isEmpty => - proposedBranchRepo.update(p.copy(proposedName = Some(name))) - case _ => Future.successful(true) - } - case _ => Future.successful(true) - } - case None => Future.successful(true) - } - } yield result - }) - } - - /** - * Add supporting samples from a cluster as evidence to an existing proposal. - */ - private def addClusterEvidenceToExisting( - proposal: ProposedBranch, - cluster: VariantCluster - ): Future[Unit] = { - // Get existing evidence to avoid duplicates - proposedBranchRepo.getEvidence(proposal.id.get).flatMap { existingEvidence => - val existingKeys = existingEvidence.map(e => (e.sampleType, e.sampleId)).toSet - val newSamples = cluster.supportingSamples.filterNot(s => - existingKeys.contains((s.sampleType, s.sampleId)) - ) - Future.sequence(newSamples.map { sample => - proposedBranchRepo.addEvidence(ProposedBranchEvidence( - proposedBranchId = proposal.id.get, - sampleType = sample.sampleType, - sampleId = sample.sampleId, - sampleGuid = sample.sampleGuid, - variantMatchCount = (cluster.variantIds).size, - variantMismatchCount = 0 - )) - }).map(_ => ()) - } - } - - private def findTerminalsWithPrivateVariants( - haplogroupType: HaplogroupType - ): Future[Seq[Int]] = { - // Query all active private variants of this type and get distinct terminal IDs - proposedBranchRepo.getConfig(haplogroupType, "dummy").flatMap { _ => - // We need to use the private variant repo to find distinct terminal haplogroup IDs - // Since there's no direct method, we'll use findActiveByVariantIds with a workaround - // Better: add a method to the repo. For now, scan by haplogroup type through the repo. - // The findByTerminalHaplogroup method exists but requires a specific ID. - // We'll need a new repo method. For simplicity, we'll accept terminal IDs as input. - Future.successful(Seq.empty) - } - } - - /** - * Cluster for a specific terminal haplogroup — public entry point when - * the caller already knows which terminals to process. - */ - def clusterForTerminals( - terminalIds: Seq[Int], - haplogroupType: HaplogroupType - ): Future[ClusteringReport] = { - for { - minClusterSize <- getConfigInt(haplogroupType, "min_cluster_size", DefaultMinClusterSize) - minVariants <- getConfigInt(haplogroupType, "min_variants_per_cluster", DefaultMinVariantsPerCluster) - - results <- Future.sequence(terminalIds.map { id => - clusterForTerminal(id, haplogroupType, minClusterSize, minVariants) - }) - } yield ClusteringReport( - haplogroupType = haplogroupType, - terminalsScanned = terminalIds.size, - clustersFound = results.map(_.clusters.size).sum, - proposalsCreated = results.map(_.proposalsCreated).sum, - proposalsUpdated = results.map(_.proposalsUpdated).sum - ) - } - - private def getConfigInt(hgType: HaplogroupType, key: String, default: Int): Future[Int] = - proposedBranchRepo.getConfig(hgType, key).map(_.flatMap(_.toIntOption).getOrElse(default)) -} - -// Domain models for clustering - -case class SampleKey(sampleType: BiosampleSourceType, sampleId: Int, sampleGuid: UUID) - -enum ClusterType { - case Exact, Core -} - -case class VariantCluster( - variantIds: Set[Int], - supportingSamples: Seq[SampleKey], - clusterType: ClusterType -) - -case class TerminalClusterResult( - terminalHaplogroupId: Int, - samplesAnalyzed: Int, - clusters: Seq[VariantCluster], - proposalsCreated: Int, - proposalsUpdated: Int -) - -case class ClusteringReport( - haplogroupType: HaplogroupType, - terminalsScanned: Int, - clustersFound: Int, - proposalsCreated: Int, - proposalsUpdated: Int -) - -case class ProposalResult(proposalId: Int, isNew: Boolean) diff --git a/app/services/TestTypeService.scala b/app/services/TestTypeService.scala deleted file mode 100644 index 6a4a9371..00000000 --- a/app/services/TestTypeService.scala +++ /dev/null @@ -1,33 +0,0 @@ -package services - -import models.domain.genomics.DataGenerationMethod -import models.domain.genomics.TestTypeRow - -import scala.concurrent.Future - -trait TestTypeService { - /** - * Get test type definition by code. - */ - def getByCode(code: String): Future[Option[TestTypeRow]] - - /** - * Get all active test types in a category. - */ - def getByCategory(category: DataGenerationMethod): Future[Seq[TestTypeRow]] - - /** - * Get test types that support a specific capability. - */ - def getByCapability( - supportsY: Option[Boolean] = None, - supportsMt: Option[Boolean] = None, - supportsAutosomalIbd: Option[Boolean] = None, - supportsAncestry: Option[Boolean] = None - ): Future[Seq[TestTypeRow]] - - /** - * Validate that a test type code is valid. - */ - def isValidCode(code: String): Future[Boolean] -} diff --git a/app/services/TestTypeServiceImpl.scala b/app/services/TestTypeServiceImpl.scala deleted file mode 100644 index 63985cce..00000000 --- a/app/services/TestTypeServiceImpl.scala +++ /dev/null @@ -1,35 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.{DataGenerationMethod, TestTypeRow} -import repositories.TestTypeRepository - -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class TestTypeServiceImpl @Inject()( - testTypeRepository: TestTypeRepository -)(implicit ec: ExecutionContext) - extends TestTypeService { - - override def getByCode(code: String): Future[Option[TestTypeRow]] = { - testTypeRepository.findByCode(code) - } - - override def getByCategory(category: DataGenerationMethod): Future[Seq[TestTypeRow]] = { - testTypeRepository.findByCategory(category) - } - - override def getByCapability( - supportsY: Option[Boolean], - supportsMt: Option[Boolean], - supportsAutosomalIbd: Option[Boolean], - supportsAncestry: Option[Boolean] - ): Future[Seq[TestTypeRow]] = { - testTypeRepository.findByCapability(supportsY, supportsMt, supportsAutosomalIbd, supportsAncestry) - } - - override def isValidCode(code: String): Future[Boolean] = { - testTypeRepository.findByCode(code).map(_.isDefined) - } -} diff --git a/app/services/TreeEvolutionService.scala b/app/services/TreeEvolutionService.scala deleted file mode 100644 index 38f2b8f0..00000000 --- a/app/services/TreeEvolutionService.scala +++ /dev/null @@ -1,203 +0,0 @@ -package services - -import jakarta.inject.Inject -import models.HaplogroupType -import models.domain.discovery.* -import models.domain.haplogroups.Haplogroup -import play.api.Logging -import play.api.libs.json.Json -import repositories.* - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Promotes accepted proposals to the canonical haplogroup tree and - * reassigns biosamples to the newly created haplogroup branches. - */ -class TreeEvolutionService @Inject()( - haplogroupCoreRepo: HaplogroupCoreRepository, - haplogroupVariantRepo: HaplogroupVariantRepository, - proposedBranchRepo: ProposedBranchRepository, - privateVariantRepo: PrivateVariantRepository, - biosampleHaplogroupRepo: BiosampleHaplogroupRepository, - curatorActionRepo: CuratorActionRepository -)(implicit ec: ExecutionContext) extends Logging { - - /** - * Promote an accepted proposal to the canonical haplogroup tree. - * Creates a new haplogroup, links its variants, reassigns biosamples, - * and updates private variant statuses. - */ - def promoteProposal( - proposalId: Int, - curatorId: String - ): Future[PromotionResult] = { - for { - // Validate proposal - proposalOpt <- proposedBranchRepo.findById(proposalId) - proposal = proposalOpt.getOrElse( - throw new NoSuchElementException(s"Proposal $proposalId not found") - ) - _ = if (proposal.status != ProposedBranchStatus.Accepted) { - throw new IllegalStateException( - s"Proposal $proposalId is ${proposal.status}, must be Accepted to promote" - ) - } - branchName = proposal.proposedName.getOrElse( - throw new IllegalStateException(s"Proposal $proposalId has no proposed name — curator must assign a name before promotion") - ) - - // Create the new haplogroup in the tree - newHaplogroup = Haplogroup( - name = branchName, - lineage = None, // Will be computed by tree service - description = Some(s"Promoted from discovery proposal $proposalId"), - haplogroupType = proposal.haplogroupType, - revisionId = 1, - source = s"discovery:$proposalId", - confidenceLevel = f"${proposal.confidenceScore}%.2f", - validFrom = LocalDateTime.now(), - validUntil = None - ) - (newHaplogroupId, _) <- haplogroupCoreRepo.createWithParent( - newHaplogroup, Some(proposal.parentHaplogroupId), s"discovery:$proposalId" - ) - - // Link defining variants to the new haplogroup - proposalVariants <- proposedBranchRepo.getVariants(proposalId) - definingVariants = proposalVariants.filter(_.isDefining) - _ <- Future.sequence(definingVariants.map { pv => - haplogroupVariantRepo.addVariantToHaplogroup(newHaplogroupId, pv.variantId) - }) - - // Reassign biosamples - evidence <- proposedBranchRepo.getEvidence(proposalId) - reassignCount <- reassignBiosamples( - evidence, proposal.parentHaplogroupId, newHaplogroupId, proposal.haplogroupType - ) - - // Update private variant statuses to PROMOTED - variantIds = definingVariants.map(_.variantId) - promotedCount <- promotePrivateVariants(variantIds, proposal.haplogroupType) - - // Update proposal status to PROMOTED - updatedProposal = proposal.copy( - status = ProposedBranchStatus.Promoted, - promotedHaplogroupId = Some(newHaplogroupId), - updatedAt = LocalDateTime.now() - ) - _ <- proposedBranchRepo.update(updatedProposal) - - // Record audit trail - _ <- curatorActionRepo.create(CuratorAction( - curatorId = curatorId, - actionType = CuratorActionType.Create, - targetType = CuratorTargetType.Haplogroup, - targetId = newHaplogroupId, - previousState = Some(Json.toJson(proposal)), - newState = Some(Json.obj( - "haplogroupId" -> newHaplogroupId, - "name" -> branchName, - "parentId" -> proposal.parentHaplogroupId, - "definingVariants" -> definingVariants.map(_.variantId), - "reassignedBiosamples" -> reassignCount - )), - reason = Some(s"Promoted proposal $proposalId to haplogroup $branchName") - )) - - _ = logger.info(s"Promoted proposal $proposalId as haplogroup $branchName (id=$newHaplogroupId): " + - s"${definingVariants.size} variants, $reassignCount biosamples reassigned, $promotedCount variants promoted") - } yield PromotionResult( - proposalId = proposalId, - newHaplogroupId = newHaplogroupId, - haplogroupName = branchName, - definingVariantCount = definingVariants.size, - reassignedBiosampleCount = reassignCount, - promotedVariantCount = promotedCount - ) - } - - /** - * Reassign biosamples from evidence to the new haplogroup. - * Updates the biosample_haplogroup table for each supporting sample. - */ - private def reassignBiosamples( - evidence: Seq[ProposedBranchEvidence], - oldHaplogroupId: Int, - newHaplogroupId: Int, - haplogroupType: HaplogroupType - ): Future[Int] = { - Future.sequence(evidence.map { e => - biosampleHaplogroupRepo.findBySampleGuid(e.sampleGuid).flatMap { - case Some(bh) => - haplogroupType match { - case HaplogroupType.Y => - if (bh.yHaplogroupId.contains(oldHaplogroupId)) { - biosampleHaplogroupRepo.updateYHaplogroup(e.sampleGuid, newHaplogroupId).map(if (_) 1 else 0) - } else Future.successful(0) - case HaplogroupType.MT => - if (bh.mtHaplogroupId.contains(oldHaplogroupId)) { - biosampleHaplogroupRepo.updateMtHaplogroup(e.sampleGuid, newHaplogroupId).map(if (_) 1 else 0) - } else Future.successful(0) - } - case None => - logger.warn(s"No biosample_haplogroup record for sample ${e.sampleGuid}") - Future.successful(0) - } - }).map(_.sum) - } - - /** - * Update private variant statuses from ACTIVE to PROMOTED - * for variants that are now part of the canonical tree. - */ - private def promotePrivateVariants( - variantIds: Seq[Int], - haplogroupType: HaplogroupType - ): Future[Int] = { - Future.sequence(variantIds.map { vid => - privateVariantRepo.findActiveByVariantIds(Set(vid), haplogroupType).flatMap { pvs => - Future.sequence(pvs.map { pv => - privateVariantRepo.updateStatus(pv.id.get, PrivateVariantStatus.Promoted) - }) - }.map(_.count(identity)) - }).map(_.sum) - } - - /** - * Bulk reassign biosamples from one haplogroup to another. - * Used when a branch is refined and existing samples need to move. - */ - def reassignBiosamplesToNewTerminal( - oldTerminalId: Int, - newTerminalId: Int, - haplogroupType: HaplogroupType - ): Future[Int] = { - for { - affected <- biosampleHaplogroupRepo.findByHaplogroupId(oldTerminalId, haplogroupType) - count <- Future.sequence(affected.map { bh => - haplogroupType match { - case HaplogroupType.Y => - biosampleHaplogroupRepo.updateYHaplogroup(bh.sampleGuid, newTerminalId).map(if (_) 1 else 0) - case HaplogroupType.MT => - biosampleHaplogroupRepo.updateMtHaplogroup(bh.sampleGuid, newTerminalId).map(if (_) 1 else 0) - } - }).map(_.sum) - } yield count - } -} - -case class PromotionResult( - proposalId: Int, - newHaplogroupId: Int, - haplogroupName: String, - definingVariantCount: Int, - reassignedBiosampleCount: Int, - promotedVariantCount: Int -) - -object PromotionResult { - implicit val format: play.api.libs.json.OFormat[PromotionResult] = play.api.libs.json.Json.format -} diff --git a/app/services/TreeImportConfig.scala b/app/services/TreeImportConfig.scala deleted file mode 100644 index 0a588e4f..00000000 --- a/app/services/TreeImportConfig.scala +++ /dev/null @@ -1,11 +0,0 @@ -package services - -import play.api.Configuration - -import javax.inject.{Inject, Singleton} - -@Singleton -class TreeImportConfig @Inject()(configuration: Configuration) { - val YDnaTreePath: String = "/tmp/import-tree-ydna.json" - val MtDnaTreePath: String = "/tmp/import-tree-mtdna.json" -} \ No newline at end of file diff --git a/app/services/TreeImporter.scala b/app/services/TreeImporter.scala deleted file mode 100644 index cfb9fe99..00000000 --- a/app/services/TreeImporter.scala +++ /dev/null @@ -1,262 +0,0 @@ -package services - -import models.* -import models.api.{TreeDTO, TreeNodeDTO, VariantDTO} -import models.domain.genomics.{MutationType, NamingStatus, VariantV2} -import models.domain.haplogroups.{Haplogroup, HaplogroupRelationship, HaplogroupVariantMetadata, RelationshipRevisionMetadata} -import play.api.Logging -import play.api.libs.json.Json -import repositories.* - -import java.time.LocalDateTime -import javax.inject.Inject -import scala.concurrent.{ExecutionContext, Future} - -/** - * Configuration class for tree import settings. - */ -case class TreeImportSettings( - initialAuthor: String = "system", - source: String = "initial_import", - defaultConfidenceLevel: String = "MEDIUM", - backboneConfidenceLevel: String = "HIGH" -) - -/** - * Class responsible for importing and processing phylogenetic tree data into the system, - * including haplogroup information, relationships, and variant associations. - */ -class TreeImporter @Inject()( - haplogroupRevisionRepository: HaplogroupRevisionRepository, - haplogroupRelationshipRepository: HaplogroupRelationshipRepository, - haplogroupVariantRepository: HaplogroupVariantRepository, - haplogroupVariantMetadataRepository: HaplogroupVariantMetadataRepository, - haplogroupRevisionMetadataRepository: HaplogroupRevisionMetadataRepository, - genbankContigRepository: GenbankContigRepository, - variantV2Repository: VariantV2Repository -)(implicit ec: ExecutionContext) extends Logging { - private val defaultSettings = TreeImportSettings() - - /** - * Imports a tree structure into the system by recursively processing its nodes, - * creating haplogroups, relationships, and variants. - */ - def importTree(tree: TreeDTO, haplogroupType: HaplogroupType)(implicit settings: TreeImportSettings = defaultSettings): Future[Unit] = { - val timestamp = LocalDateTime.now() - - def processNode( - node: TreeNodeDTO, - parentId: Option[Int] = None, - depth: Int = 0 - ): Future[Int] = { - for { - // 1. Create the haplogroup - haplogroupId <- createHaplogroup(node, haplogroupType, timestamp) - - // 2. Create relationship if there's a parent - _ <- parentId match { - case Some(pid) => createRelationship(pid, haplogroupId, timestamp) - case None => Future.successful(()) - } - - // 3. Create variants - _ <- createVariants(node.variants, haplogroupId, timestamp) - - // 4. Process children recursively - _ <- Future.sequence( - node.children.map(child => processNode(child, Some(haplogroupId), depth + 1)) - ) - } yield haplogroupId - } - - // Start with the root node - tree.subclade match { - case Some(root) => processNode(root).map(_ => ()) - case None => Future.successful(()) - } - } - - /** - * Creates a haplogroup entity with associated revision metadata. - */ - private def createHaplogroup( - node: TreeNodeDTO, - haplogroupType: HaplogroupType, - timestamp: LocalDateTime - )(implicit settings: TreeImportSettings): Future[Int] = { - logger.debug(s"Creating haplogroup: ${node.name}") - val haplogroup = Haplogroup( - id = None, - name = node.name, - lineage = None, - description = None, // TreeNodeDTO doesn't have description - haplogroupType = haplogroupType, - revisionId = 1, - source = settings.source, - confidenceLevel = settings.defaultConfidenceLevel, - validFrom = timestamp, - validUntil = None - ) - - val revisionComment = s"Created during tree import from source: ${settings.source}" - - // Create the haplogroup revision - // Note: Haplogroup revision metadata is not currently tracked separately - haplogroupRevisionRepository.createNewRevision(haplogroup) - } - - /** - * Creates a relationship between parent and child haplogroups. - */ - private def createRelationship( - parentId: Int, - childId: Int, - timestamp: LocalDateTime - )(implicit settings: TreeImportSettings): Future[Unit] = { - logger.debug(s"Creating relationship: parent=$parentId -> child=$childId") - val relationship = HaplogroupRelationship( - id = None, - childHaplogroupId = childId, - parentHaplogroupId = parentId, - revisionId = 1, - validFrom = timestamp, - validUntil = None, - source = settings.source, - ) - - val metadata = RelationshipRevisionMetadata( - haplogroup_relationship_id = 0, // Will be set after relationship creation - revisionId = 1, - author = settings.initialAuthor, - timestamp = timestamp, - comment = "Initial tree import", - changeType = "CREATE", - previousRevisionId = None - ) - - for { - relId <- haplogroupRelationshipRepository.createRelationshipRevision(relationship) - _ <- haplogroupRevisionMetadataRepository.addRelationshipRevisionMetadata( - metadata.copy(haplogroup_relationship_id = relId) - ) - } yield () - } - - /** - * Creates or retrieves genetic variants and associates them with a haplogroup. - * Now uses VariantV2 with JSONB coordinates. - */ - private def createVariants( - variants: Seq[VariantDTO], - haplogroupId: Int, - timestamp: LocalDateTime - )(implicit settings: TreeImportSettings): Future[Unit] = { - logger.debug(s"Starting to process ${variants.size} variants for haplogroup $haplogroupId") - - // Process variants sequentially to avoid overwhelming the connection pool - variants.grouped(100).toSeq.foldLeft(Future.successful(())) { case (prevFuture, batch) => - prevFuture.flatMap { _ => - for { - // Create/find variants and get their IDs - variantIds <- Future.traverse(batch) { variantDto => - createOrFindVariant(variantDto) - } - // Associate variants with haplogroup - _ <- Future.traverse(variantIds.flatten) { variantId => - createVariantAssociation(haplogroupId, variantId, timestamp) - } - } yield () - } - } - } - - /** - * Creates a new VariantV2 or finds an existing one. - */ - private def createOrFindVariant(variantDto: VariantDTO): Future[Option[Int]] = { - // Build coordinates JSONB from DTO - val coordinatesJson = variantDto.coordinates.foldLeft(Json.obj()) { case (acc, (contigAccession, coord)) => - // For now, use accession as the reference genome key - // In a real implementation, we'd map accession to reference genome name (GRCh38, hs1, etc.) - acc + (contigAccession -> Json.obj( - "contig" -> contigAccession, - "position" -> coord.start, - "ref" -> coord.anc, - "alt" -> coord.der - )) - } - - // Determine canonical name and rsId - val isRsId = variantDto.name.startsWith("rs") - val canonicalName = if (isRsId) None else Some(variantDto.name) - val rsIds = if (isRsId) Seq(variantDto.name) else Seq.empty - - // Build aliases JSONB - val aliasesJson = Json.obj( - "common_names" -> Seq.empty[String], - "rs_ids" -> rsIds, - "sources" -> Json.obj("import" -> Seq(variantDto.name)) - ) - - val variant = VariantV2( - variantId = None, - canonicalName = canonicalName, - mutationType = MutationType.fromStringOrDefault(variantDto.variantType), - namingStatus = if (canonicalName.isDefined) NamingStatus.Named else NamingStatus.Unnamed, - aliases = aliasesJson, - coordinates = coordinatesJson, - definingHaplogroupId = None, - evidence = Json.obj(), - primers = Json.obj(), - notes = None - ) - - // Try to find existing variant by name, otherwise create - canonicalName match { - case Some(name) => - variantV2Repository.findByCanonicalName(name).flatMap { - case Some(existing) => Future.successful(existing.variantId) - case None => variantV2Repository.create(variant).map(Some(_)) - } - case None if rsIds.nonEmpty => - variantV2Repository.findByAlias(rsIds.head).flatMap { existingVariants => - existingVariants.headOption match { - case Some(existing) => Future.successful(existing.variantId) - case None => variantV2Repository.create(variant).map(Some(_)) - } - } - case None => - variantV2Repository.create(variant).map(Some(_)) - } - } - - import scala.util.control.NonFatal - - private def createVariantAssociation( - haplogroupId: Int, - variantId: Int, - timestamp: LocalDateTime - )(implicit settings: TreeImportSettings): Future[Int] = { - (for { - // Create the haplogroup-variant association - assocId <- haplogroupVariantRepository.addVariantToHaplogroup(haplogroupId, variantId) - - // Add metadata for the association - _ <- haplogroupVariantMetadataRepository.addVariantRevisionMetadata( - HaplogroupVariantMetadata( - haplogroup_variant_id = assocId, - revision_id = 1, - author = settings.initialAuthor, - timestamp = timestamp, - comment = "Initial variant import", - change_type = "CREATE", - previous_revision_id = None - ) - ) - } yield assocId).recover { - case NonFatal(e) => - logger.error(s"Error creating variant association for haplogroupId: $haplogroupId, variantId: $variantId. Error: ${e.getMessage}") - 0 - } - } -} diff --git a/app/services/TreeInitializationService.scala b/app/services/TreeInitializationService.scala deleted file mode 100644 index a6b61526..00000000 --- a/app/services/TreeInitializationService.scala +++ /dev/null @@ -1,92 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.api.TreeDTO -import play.api.Logging -import play.api.libs.json.Json -import repositories.HaplogroupRevisionRepository - -import java.nio.file.{Files, Path, Paths} -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service responsible for initializing haplogroup trees (e.g., Y-DNA and mtDNA trees). - * This service checks and imports missing tree data from specified files, if needed. - * - * @constructor Creates an instance of TreeInitializationService with injected dependencies. - * @param haplogroupRevisionRepository Repository for accessing haplogroup revision data. - * @param treeImporter Component responsible for importing tree structures into the database. - * @param config Configuration object containing file paths and related settings for tree imports. - * @param ec ExecutionContext for handling asynchronous operations. - */ -@Singleton -class TreeInitializationService @Inject()( - haplogroupRevisionRepository: HaplogroupRevisionRepository, - treeImporter: TreeImporter, - config: TreeImportConfig - )(implicit ec: ExecutionContext) - extends Logging { - private val YDnaTreePath = Paths.get(config.YDnaTreePath) - private val MtDnaTreePath = Paths.get(config.MtDnaTreePath) - - /** - * Checks and initializes both Y-DNA and mtDNA trees if needed. - * - * @return Future containing a map of tree type to import status - */ - def initializeIfNeeded(): Future[Map[HaplogroupType, Boolean]] = { - for { - // Check each tree type independently - yTreeStatus <- initializeTreeType(HaplogroupType.Y, YDnaTreePath) - mtTreeStatus <- initializeTreeType(HaplogroupType.MT, MtDnaTreePath) - } yield Map( - HaplogroupType.Y -> yTreeStatus, - HaplogroupType.MT -> mtTreeStatus - ) - } - - private def initializeTreeType( - haplogroupType: HaplogroupType, - filePath: Path - ): Future[Boolean] = { - for { - // Check if this tree type exists in DB - isEmpty <- isTreeTypeEmpty(haplogroupType) - // Check if import file exists - fileExists = Files.exists(filePath) - // Perform import if conditions are met - result <- (isEmpty, fileExists) match { - case (true, true) => - logger.info(s"Importing $haplogroupType tree from $filePath") - importFromFile(filePath, haplogroupType) - case (false, _) => - logger.info(s"$haplogroupType tree already exists in database, skipping import") - Future.successful(false) - case (_, false) => - logger.warn(s"Import file not found for $haplogroupType tree at $filePath") - Future.successful(false) - } - } yield result - } - - private def isTreeTypeEmpty(haplogroupType: HaplogroupType): Future[Boolean] = { - haplogroupRevisionRepository.countByType(haplogroupType).map(_ == 0) - } - - private def importFromFile(path: Path, haplogroupType: HaplogroupType): Future[Boolean] = { - Future { - val content = Files.readString(path) - Json.parse(content).as[TreeDTO] - }.flatMap { tree => - treeImporter.importTree(tree, haplogroupType) - .map { _ => - logger.info(s"Successfully imported ${haplogroupType} tree") - true - } - }.recover { case ex => - logger.error(s"Failed to import ${haplogroupType} tree", ex) - false - } - } -} diff --git a/app/services/TreeLayoutService.scala b/app/services/TreeLayoutService.scala deleted file mode 100644 index e0fe44c4..00000000 --- a/app/services/TreeLayoutService.scala +++ /dev/null @@ -1,165 +0,0 @@ -package services - -import models.api.* -import models.view.{TreeLinkViewModel, TreeNodeViewModel, TreeViewModel} - -import java.time.ZonedDateTime -import java.time.temporal.ChronoUnit - -enum TreeOrientation: - case Horizontal, Vertical - -/** - * Provides services for laying out a tree structure for rendering, including calculating coordinates, - * determining node connections, and styling nodes and links based on their properties. - */ -object TreeLayoutService { - - // Configuration for layout - private val NODE_WIDTH = 150.0 - private val NODE_HEIGHT = 80.0 - private val MARGIN_TOP = 50.0 - private val MARGIN_LEFT = 120.0 - - /** - * Transforms a TreeDTO into a TreeViewModel with calculated coordinates, link paths, and node colors. - * Collapses non-backbone branches only when the absolute top-level root (e.g., "Y") is displayed. - * When re-rooting to any other node, its full subtree is displayed. - * - * @param treeDto The TreeDTO representing the tree to be laid out. treeDto.subclade is the root of the currently displayed tree. - * @param isAbsoluteTopRoot True ONLY if the current display root (treeDto.subclade) is the actual top-most root of the entire system (e.g., "Y"). - * @param orientation The orientation of the tree (Horizontal or Vertical). Defaults to Horizontal. - * @return An Option containing the TreeViewModel if a subclade exists. - */ - def layoutTree(treeDto: TreeDTO, isAbsoluteTopRoot: Boolean, orientation: TreeOrientation = TreeOrientation.Horizontal): Option[TreeViewModel] = { - val oneYearAgo = ZonedDateTime.now().minus(1, ChronoUnit.YEARS) - - // Determine spacing based on orientation - // Horizontal: Depth is X (Level), Breadth is Y (Stack). Nodes are 80px high. - // Vertical: Depth is Y (Level), Breadth is X (Row). Nodes are 150px wide. - val (depthSpacing, breadthSpacing) = orientation match { - case TreeOrientation.Horizontal => (200.0, 90.0) - case TreeOrientation.Vertical => (130.0, 180.0) - } - - treeDto.subclade.map { currentDisplayRootDTO => - val initialBreadthPosition = orientation match { - case TreeOrientation.Horizontal => MARGIN_TOP - case TreeOrientation.Vertical => MARGIN_LEFT - } - - val allNodes = collection.mutable.ListBuffer[TreeNodeViewModel]() - val allLinks = collection.mutable.ListBuffer[TreeLinkViewModel]() - - /** Returns (nodeViewModel, nextBreadthPosition) */ - def calculateNodePositions(nodeDTO: TreeNodeDTO, depth: Int, isCurrentDisplayRoot: Boolean, breadthPosition: Double): (TreeNodeViewModel, Double) = { - // Depth Position (Level) - // Horizontal: Left-to-Right axis (svg x). - // Vertical: Top-to-Bottom axis (svg y). - val depthPos = depth * depthSpacing + (if (orientation == TreeOrientation.Horizontal) MARGIN_LEFT else MARGIN_TOP) - - val isRecentlyUpdated = nodeDTO.updated.isAfter(oneYearAgo) - - val fillColor = if (nodeDTO.isBackbone) { - "#d4edda" // Soft sage green (established) - } else if (isRecentlyUpdated) { - "#ffeeba" // Warm amber/tan (recently edited) - } else { - "#f8f9fa" // Light gray (default) - } - - val childrenToProcess = if (isCurrentDisplayRoot) { - nodeDTO.children - } else if (isAbsoluteTopRoot && !nodeDTO.isBackbone) { - List.empty[TreeNodeDTO] // Collapse - } else { - nodeDTO.children - } - - val (childViewModels, nextBreadth) = childrenToProcess.sortBy(_.weight).foldLeft((List.empty[TreeNodeViewModel], breadthPosition)) { - case ((accChildren, currentBreadth), childDTO) => - val (childVm, updatedBreadth) = calculateNodePositions(childDTO, depth + 1, false, currentBreadth) - (accChildren :+ childVm, updatedBreadth) - } - - // Breadth Position (Stack/Row) - // Horizontal: Top-to-Bottom axis (svg y). - // Vertical: Left-to-Right axis (svg x). - val (breadthPos, finalBreadth) = if (childViewModels.isEmpty) { - (breadthPosition, breadthPosition + breadthSpacing) - } else { - val firstChild = childViewModels.head - val lastChild = childViewModels.last - // Children store: x = breadth, y = depth. - ((firstChild.x + lastChild.x) / 2, nextBreadth) - } - - // Store in ViewModel: - // x = Breadth (Vertical pos in Horizontal layout; Horizontal pos in Vertical layout) - // y = Depth (Horizontal pos in Horizontal layout; Vertical pos in Vertical layout) - // This naming is confusing but preserved for backward compatibility with haplogroup.scala.html - // haplogroup.scala.html expects: x="@(node.y...)" (Depth->SVG X), y="@(node.x...)" (Breadth->SVG Y) - val nodeViewModel = TreeNodeViewModel( - name = nodeDTO.name, - variantsCount = nodeDTO.variantCount, - children = childViewModels, - fillColor = fillColor, - isBackbone = nodeDTO.isBackbone, - isRecentlyUpdated = isRecentlyUpdated, - formedYbp = nodeDTO.formedYbp, - tmrcaYbp = nodeDTO.tmrcaYbp, - x = breadthPos, // Breadth - y = depthPos // Depth - ) - allNodes += nodeViewModel - - childViewModels.foreach { child => - // Generate path data based on orientation - val pathData = orientation match { - case TreeOrientation.Horizontal => - val sourceDepth = nodeViewModel.y + NODE_WIDTH / 2 - val sourceBreadth = nodeViewModel.x - val targetDepth = child.y - NODE_WIDTH / 2 - val targetBreadth = child.x - s"M $sourceDepth $sourceBreadth " + - s"H ${(sourceDepth + targetDepth) / 2} " + - s"V $targetBreadth " + - s"H $targetDepth" - - case TreeOrientation.Vertical => - val sourceBreadth = nodeViewModel.x - val sourceDepth = nodeViewModel.y + NODE_HEIGHT / 2 - val targetBreadth = child.x - val targetDepth = child.y - NODE_HEIGHT / 2 - s"M $sourceBreadth $sourceDepth " + - s"V ${(sourceDepth + targetDepth) / 2} " + - s"H $targetBreadth " + - s"V $targetDepth" - } - - allLinks += TreeLinkViewModel(nodeViewModel.name, child.name, pathData) - } - - (nodeViewModel, finalBreadth) - } - - val (rootViewModel, _) = calculateNodePositions(currentDisplayRootDTO, 0, true, initialBreadthPosition) - - // Calculate SVG dimensions - // x = Breadth, y = Depth - val maxBreadth = allNodes.map(_.x).maxOption.getOrElse(0.0) - val maxDepth = allNodes.map(_.y).maxOption.getOrElse(0.0) - - val (svgWidth, svgHeight) = orientation match { - case TreeOrientation.Horizontal => - // Width = Depth, Height = Breadth - (maxDepth + NODE_WIDTH + MARGIN_LEFT, maxBreadth + NODE_HEIGHT + MARGIN_TOP) - case TreeOrientation.Vertical => - // Width = Breadth, Height = Depth - (maxBreadth + NODE_WIDTH + MARGIN_LEFT, maxDepth + NODE_HEIGHT + MARGIN_TOP) - } - - TreeViewModel(rootViewModel, allNodes.toList, allLinks.toList, svgWidth, svgHeight) - } - } -} \ No newline at end of file diff --git a/app/services/TreeMergeStagingHelper.scala b/app/services/TreeMergeStagingHelper.scala deleted file mode 100644 index 8be53df6..00000000 --- a/app/services/TreeMergeStagingHelper.scala +++ /dev/null @@ -1,190 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.dal.domain.haplogroups.{WipHaplogroupRow, WipHaplogroupVariantRow, WipRelationshipRow, WipReparentRow} -import models.domain.haplogroups.{Haplogroup, MergeContext} -import play.api.Logging -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, WipTreeRepository} - -import java.util.concurrent.atomic.AtomicInteger -import scala.concurrent.{ExecutionContext, Future} - -/** - * Helper service for routing tree merge operations to either WIP (staging) tables - * or production tables based on the merge context's staging mode flag. - * - * This service manages: - * - Placeholder ID generation for WIP nodes (negative IDs to avoid collision) - * - Routing haplogroup creation to WIP or production tables - * - Routing variant additions to WIP or production tables - * - Routing reparent operations to WIP or production tables - * - * In staging mode, all changes are written to WIP shadow tables for curator review. - * When not in staging mode, changes are applied directly to production tables. - */ -@Singleton -class TreeMergeStagingHelper @Inject()( - haplogroupRepository: HaplogroupCoreRepository, - haplogroupVariantRepository: HaplogroupVariantRepository, - wipTreeRepository: WipTreeRepository -)(implicit ec: ExecutionContext) extends Logging { - - // Placeholder ID counter for WIP nodes (starts negative to avoid collision with real IDs) - // Reset per merge operation via resetPlaceholderCounter() - private val placeholderCounter = new AtomicInteger(-1) - - /** - * Reset the placeholder counter for a new merge operation. - * Should be called at the start of each merge to ensure clean placeholder IDs. - */ - def resetPlaceholderCounter(): Unit = { - placeholderCounter.set(-1) - } - - /** - * Check if an ID is a placeholder (negative = WIP node, positive = production node). - */ - def isPlaceholder(id: Int): Boolean = id < 0 - - /** - * Create a haplogroup - routes to WIP table if staging mode, else production. - * - * @param haplogroup The haplogroup to create - * @param parentId Parent ID (can be production ID or placeholder) - * @param context Merge context with staging mode flag - * @return (newId, relationshipId) - newId is placeholder in staging mode, real ID otherwise - */ - def createHaplogroupStaged( - haplogroup: Haplogroup, - parentId: Option[Int], - context: MergeContext - ): Future[(Int, Option[Int])] = { - if (context.stagingMode) { - val changeSetId = context.changeSetId.getOrElse( - throw new IllegalStateException("Staging mode requires a change set ID") - ) - val placeholderId = placeholderCounter.getAndDecrement() - - // Create WIP haplogroup row - val wipHaplogroup = WipHaplogroupRow( - id = None, - changeSetId = changeSetId, - placeholderId = placeholderId, - name = haplogroup.name, - lineage = haplogroup.lineage, - description = haplogroup.description, - haplogroupType = haplogroup.haplogroupType, - source = haplogroup.source, - confidenceLevel = haplogroup.confidenceLevel, - formedYbp = haplogroup.formedYbp, - formedYbpLower = haplogroup.formedYbpLower, - formedYbpUpper = haplogroup.formedYbpUpper, - tmrcaYbp = haplogroup.tmrcaYbp, - tmrcaYbpLower = haplogroup.tmrcaYbpLower, - tmrcaYbpUpper = haplogroup.tmrcaYbpUpper, - ageEstimateSource = haplogroup.ageEstimateSource, - provenance = haplogroup.provenance, - createdAt = context.timestamp - ) - - for { - _ <- wipTreeRepository.createWipHaplogroup(wipHaplogroup) - - // Create WIP relationship if parent specified - _ <- parentId match { - case Some(pid) => - val wipRelationship = WipRelationshipRow( - id = None, - changeSetId = changeSetId, - childHaplogroupId = None, // Child is WIP - childPlaceholderId = Some(placeholderId), - parentHaplogroupId = if (isPlaceholder(pid)) None else Some(pid), - parentPlaceholderId = if (isPlaceholder(pid)) Some(pid) else None, - source = context.sourceName, - createdAt = context.timestamp - ) - wipTreeRepository.createWipRelationship(wipRelationship) - case None => - Future.successful(0) - } - } yield (placeholderId, None) // Return placeholder ID - } else { - haplogroupRepository.createWithParent(haplogroup, parentId, context.sourceName) - } - } - - /** - * Add variants to a haplogroup - routes to WIP table if staging mode. - * - * @param haplogroupId Haplogroup ID (can be production ID or placeholder) - * @param variantIds Variant IDs to add (always production variant IDs) - * @param context Merge context - * @return IDs of created associations - */ - def addVariantsStaged( - haplogroupId: Int, - variantIds: Seq[Int], - context: MergeContext - ): Future[Seq[Int]] = { - if (variantIds.isEmpty) { - Future.successful(Seq.empty) - } else if (context.stagingMode) { - val changeSetId = context.changeSetId.getOrElse( - throw new IllegalStateException("Staging mode requires a change set ID") - ) - val rows = variantIds.map { vid => - WipHaplogroupVariantRow( - id = None, - changeSetId = changeSetId, - haplogroupId = if (isPlaceholder(haplogroupId)) None else Some(haplogroupId), - haplogroupPlaceholderId = if (isPlaceholder(haplogroupId)) Some(haplogroupId) else None, - variantId = vid, - source = Some(context.sourceName), - createdAt = context.timestamp - ) - } - // Use upsert to handle cases where the same variant is added multiple times - wipTreeRepository.upsertWipHaplogroupVariants(rows) - } else { - haplogroupVariantRepository.bulkAddVariantsToHaplogroups( - variantIds.map(vid => (haplogroupId, vid)) - ) - } - } - - /** - * Reparent an existing production haplogroup - routes to WIP table if staging mode. - * - * @param haplogroupId The production haplogroup to reparent - * @param oldParentId Current parent (for reference) - * @param newParentId New parent ID (can be production ID or placeholder) - * @param context Merge context - */ - def reparentStaged( - haplogroupId: Int, - oldParentId: Option[Int], - newParentId: Int, - context: MergeContext - ): Future[Unit] = { - if (context.stagingMode) { - val changeSetId = context.changeSetId.getOrElse( - throw new IllegalStateException("Staging mode requires a change set ID") - ) - val wipReparent = WipReparentRow( - id = None, - changeSetId = changeSetId, - haplogroupId = haplogroupId, // Production haplogroup being reparented - oldParentId = oldParentId, - newParentId = if (isPlaceholder(newParentId)) None else Some(newParentId), - newParentPlaceholderId = if (isPlaceholder(newParentId)) Some(newParentId) else None, - source = context.sourceName, - createdAt = context.timestamp - ) - // Use upsert to handle cases where the same node is reparented multiple times - // (e.g., once by SUBTREE_LOOK_AHEAD and again by DEPTH_GRAFT) - wipTreeRepository.upsertWipReparent(wipReparent).map(_ => ()) - } else { - haplogroupRepository.updateParent(haplogroupId, newParentId, context.sourceName).map(_ => ()) - } - } -} diff --git a/app/services/TreeRestructuringService.scala b/app/services/TreeRestructuringService.scala deleted file mode 100644 index cc49fc67..00000000 --- a/app/services/TreeRestructuringService.scala +++ /dev/null @@ -1,210 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.VariantV2 -import models.domain.haplogroups.Haplogroup -import play.api.Logging -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantV2Repository} - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for tree restructuring operations: split and merge. - */ -@Singleton -class TreeRestructuringService @Inject()( - haplogroupRepository: HaplogroupCoreRepository, - haplogroupVariantRepository: HaplogroupVariantRepository, - variantV2Repository: VariantV2Repository, - auditService: CuratorAuditService -)(implicit ec: ExecutionContext) extends Logging { - - /** - * Split: Create a new subclade by moving variants and optionally re-parenting children. - * - * @param parentId ID of the parent haplogroup - * @param newHaplogroup The new subclade haplogroup to create - * @param variantIds IDs of variants to MOVE from parent to new child - * @param childIds IDs of existing children to re-parent under new subclade - * @param userId User performing the operation - * @return ID of newly created haplogroup - */ - def splitBranch( - parentId: Int, - newHaplogroup: Haplogroup, - variantIds: Seq[Int], - childIds: Seq[Int], - userId: UUID - ): Future[Int] = { - for { - // Verify parent exists - parentOpt <- haplogroupRepository.findById(parentId) - parent = parentOpt.getOrElse(throw new IllegalArgumentException(s"Parent haplogroup $parentId not found")) - - // Get parent's current children to validate childIds - currentChildren <- haplogroupRepository.getDirectChildren(parentId) - currentChildIds = currentChildren.flatMap(_.id).toSet - _ = if (!childIds.forall(currentChildIds.contains)) { - throw new IllegalArgumentException("Some childIds are not direct children of the parent") - } - - // Create the new subclade with parent as its parent - (newId, _) <- haplogroupRepository.createWithParent(newHaplogroup, Some(parentId), "split-operation") - - // Move variants from parent to new child - movedVariantCount <- moveVariants(parentId, newId, variantIds) - - // Re-parent selected children to the new subclade - _ <- Future.traverse(childIds) { childId => - haplogroupRepository.updateParent(childId, newId, "split-operation") - } - - // Log the operation - _ <- auditService.logBranchSplit(userId, parentId, newId, movedVariantCount, childIds, Some(s"Split ${newHaplogroup.name} from ${parent.name}")) - - } yield newId - } - - /** - * Merge: Absorb a child haplogroup into its parent (inverse of split). - * Child's variants move to parent, child's children become parent's children, child is deleted. - * - * @param childId ID of the child haplogroup to absorb - * @param userId User performing the operation - * @return ID of the parent haplogroup - */ - def mergeIntoParent(childId: Int, userId: UUID): Future[Int] = { - for { - // Verify child exists and has a parent - childOpt <- haplogroupRepository.findById(childId) - child = childOpt.getOrElse(throw new IllegalArgumentException(s"Haplogroup $childId not found")) - - parentOpt <- haplogroupRepository.getParent(childId) - parent = parentOpt.getOrElse(throw new IllegalArgumentException(s"Haplogroup $childId has no parent - cannot merge root")) - parentId = parent.id.get - - // Get child's children (grandchildren) to promote - grandchildren <- haplogroupRepository.getDirectChildren(childId) - grandchildIds = grandchildren.flatMap(_.id) - - // Get child's variants to move up - childVariants <- haplogroupVariantRepository.getHaplogroupVariants(childId) - - // Get parent's existing variants to check for duplicates - parentVariants <- haplogroupVariantRepository.getHaplogroupVariants(parentId) - parentVariantIds = parentVariants.flatMap(_.variantId).toSet - - // Move unique variants from child to parent - movedVariantCount <- moveVariantsUp(childId, parentId, parentVariantIds) - - // Promote grandchildren to parent - _ <- Future.traverse(grandchildIds) { grandchildId => - haplogroupRepository.updateParent(grandchildId, parentId, "merge-operation") - } - - // Soft-delete the child (this will also soft-delete its parent relationship) - _ <- haplogroupRepository.softDelete(childId, "merge-operation") - - // Log the operation - _ <- auditService.logMergeIntoParent(userId, parentId, childId, movedVariantCount, grandchildIds.size, Some(s"Merged ${child.name} into ${parent.name}")) - - } yield parentId - } - - /** - * Move variants from source haplogroup to target haplogroup. - */ - private def moveVariants(sourceId: Int, targetId: Int, variantIds: Seq[Int]): Future[Int] = { - if (variantIds.isEmpty) { - Future.successful(0) - } else { - Future.traverse(variantIds) { variantId => - for { - // Remove from source - _ <- haplogroupVariantRepository.removeVariantFromHaplogroup(sourceId, variantId) - // Add to target - _ <- haplogroupVariantRepository.addVariantToHaplogroup(targetId, variantId) - } yield 1 - }.map(_.sum) - } - } - - /** - * Move all unique variants from child to parent. - */ - private def moveVariantsUp(childId: Int, parentId: Int, existingParentVariantIds: Set[Int]): Future[Int] = { - for { - childVariants <- haplogroupVariantRepository.getHaplogroupVariants(childId) - childVariantIds = childVariants.flatMap(_.variantId) - - // Only move variants that don't already exist on parent - uniqueVariantIds = childVariantIds.filterNot(existingParentVariantIds.contains) - - // Move unique variants - _ <- Future.traverse(uniqueVariantIds) { variantId => - for { - _ <- haplogroupVariantRepository.removeVariantFromHaplogroup(childId, variantId) - _ <- haplogroupVariantRepository.addVariantToHaplogroup(parentId, variantId) - } yield () - } - } yield uniqueVariantIds.size - } - - /** - * Get preview information for a split operation. - */ - def getSplitPreview(parentId: Int): Future[SplitPreview] = { - for { - parentOpt <- haplogroupRepository.findById(parentId) - parent = parentOpt.getOrElse(throw new IllegalArgumentException(s"Parent haplogroup $parentId not found")) - variants <- haplogroupVariantRepository.getHaplogroupVariants(parentId) - children <- haplogroupRepository.getDirectChildren(parentId) - } yield SplitPreview(parent, variants, children) - } - - /** - * Get preview information for a merge operation. - */ - def getMergePreview(childId: Int): Future[MergePreview] = { - for { - childOpt <- haplogroupRepository.findById(childId) - child = childOpt.getOrElse(throw new IllegalArgumentException(s"Haplogroup $childId not found")) - - parentOpt <- haplogroupRepository.getParent(childId) - parent = parentOpt.getOrElse(throw new IllegalArgumentException(s"Haplogroup $childId has no parent")) - - childVariants <- haplogroupVariantRepository.getHaplogroupVariants(childId) - grandchildren <- haplogroupRepository.getDirectChildren(childId) - - parentVariants <- haplogroupVariantRepository.getHaplogroupVariants(parent.id.get) - parentVariantIds = parentVariants.flatMap(_.variantId).toSet - - // Calculate unique variants that will be moved - uniqueVariants = childVariants.filter { v => - v.variantId.exists(!parentVariantIds.contains(_)) - } - - } yield MergePreview(child, parent, childVariants, uniqueVariants, grandchildren) - } -} - -/** - * Preview data for a split operation. - */ -case class SplitPreview( - parent: Haplogroup, - variants: Seq[VariantV2], - children: Seq[Haplogroup] -) - -/** - * Preview data for a merge operation. - */ -case class MergePreview( - child: Haplogroup, - parent: Haplogroup, - allVariants: Seq[VariantV2], - uniqueVariants: Seq[VariantV2], - grandchildren: Seq[Haplogroup] -) diff --git a/app/services/TreeVersioningService.scala b/app/services/TreeVersioningService.scala deleted file mode 100644 index 9daca1b1..00000000 --- a/app/services/TreeVersioningService.scala +++ /dev/null @@ -1,1629 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.api.haplogroups.MergeStatistics -import models.domain.haplogroups.* -import play.api.Logging -import play.api.libs.json.Json -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, TreeVersioningRepository, WipTreeRepository} - -import java.time.LocalDateTime -import java.time.format.DateTimeFormatter -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for managing tree versioning (Production/WIP). - * - * Provides functionality to: - * - Create and manage change sets for bulk operations - * - Record individual tree changes during merges - * - Finalize change sets for curator review - * - Apply or discard change sets - * - * This service acts as a facade over the TreeVersioningRepository, - * adding business logic and coordination with other services. - */ -trait TreeVersioningService { - - // ============================================================================ - // Change Set Lifecycle - // ============================================================================ - - /** - * Create a new change set for bulk operations. - * Only one active (DRAFT/READY_FOR_REVIEW/UNDER_REVIEW) change set per type at a time. - */ - def createChangeSet( - haplogroupType: HaplogroupType, - sourceName: String, - description: Option[String] = None, - createdBy: String = "system" - ): Future[ChangeSet] - - /** - * Get the active change set for a haplogroup type (if any). - */ - def getActiveChangeSet(haplogroupType: HaplogroupType): Future[Option[ChangeSet]] - - /** - * Get a change set by ID with full details. - */ - def getChangeSetDetails(id: Int): Future[Option[ChangeSetDetails]] - - /** - * List change sets with optional filters. - */ - def listChangeSets( - haplogroupType: Option[HaplogroupType] = None, - status: Option[ChangeSetStatus] = None, - page: Int = 1, - pageSize: Int = 20 - ): Future[(Seq[ChangeSetSummary], Int)] - - /** - * Finalize a change set, moving it from DRAFT to READY_FOR_REVIEW. - * Called after a merge operation completes. - */ - def finalizeChangeSet( - changeSetId: Int, - statistics: MergeStatistics, - ambiguityReportPath: Option[String] = None - ): Future[Boolean] - - /** - * Mark a change set as under review. - */ - def startReview(changeSetId: Int, curatorId: String): Future[Boolean] - - /** - * Apply a change set to Production. - * All pending changes are applied and the set moves to APPLIED status. - */ - def applyChangeSet(changeSetId: Int, curatorId: String): Future[Boolean] - - /** - * Discard a change set. - * All changes are abandoned and the set moves to DISCARDED status. - */ - def discardChangeSet(changeSetId: Int, curatorId: String, reason: String): Future[Boolean] - - // ============================================================================ - // Change Recording - // ============================================================================ - - /** - * Record a CREATE change (new haplogroup). - */ - def recordCreate( - changeSetId: Int, - haplogroupData: String, // JSON representation of haplogroup - parentId: Option[Int], - ambiguityType: Option[String] = None, - ambiguityConfidence: Option[Double] = None - ): Future[Int] - - /** - * Record an UPDATE change (haplogroup metadata update). - */ - def recordUpdate( - changeSetId: Int, - haplogroupId: Int, - oldData: String, // JSON of previous state - newData: String, // JSON of new state - ambiguityType: Option[String] = None, - ambiguityConfidence: Option[Double] = None - ): Future[Int] - - /** - * Record a REPARENT change. - */ - def recordReparent( - changeSetId: Int, - haplogroupId: Int, - oldParentId: Option[Int], - newParentId: Int, - ambiguityType: Option[String] = None, - ambiguityConfidence: Option[Double] = None - ): Future[Int] - - /** - * Record an ADD_VARIANT change. - */ - def recordAddVariant( - changeSetId: Int, - haplogroupId: Int, - variantId: Int - ): Future[Int] - - /** - * Record a REMOVE_VARIANT change. - */ - def recordRemoveVariant( - changeSetId: Int, - haplogroupId: Int, - variantId: Int - ): Future[Int] - - // ============================================================================ - // Change Review - // ============================================================================ - - /** - * Get pending changes for review, ordered by ambiguity confidence (lowest first). - */ - def getPendingReviewChanges(changeSetId: Int, limit: Int = 50): Future[Seq[TreeChange]] - - /** - * Get pending changes for review with names resolved for UI display. - */ - def getPendingReviewChangesWithNames(changeSetId: Int, limit: Int = 50): Future[Seq[TreeChangeView]] - - /** - * Review a specific change. - */ - def reviewChange( - changeId: Int, - curatorId: String, - action: ChangeStatus, // APPLIED, SKIPPED, REVERTED - notes: Option[String] = None - ): Future[Boolean] - - /** - * Bulk approve all remaining pending changes. - */ - def approveAllPending(changeSetId: Int, curatorId: String): Future[Int] - - // ============================================================================ - // Comments - // ============================================================================ - - /** - * Add a comment to a change set. - */ - def addComment( - changeSetId: Int, - author: String, - content: String, - treeChangeId: Option[Int] = None - ): Future[Int] - - /** - * List comments for a change set. - */ - def listComments(changeSetId: Int): Future[Seq[ChangeSetComment]] - - // ============================================================================ - // Tree Diff (Phase 3) - // ============================================================================ - - /** - * Get the diff between Production and WIP for a specific change set. - * Computes differences by analyzing pending changes. - */ - def getTreeDiff(changeSetId: Int): Future[TreeDiff] - - /** - * Get the diff between Production and the active WIP change set (if any). - */ - def getActiveTreeDiff(haplogroupType: HaplogroupType): Future[Option[TreeDiff]] - - /** - * Get all changes for a change set grouped by type for diff display. - */ - def getChangesForDiff(changeSetId: Int): Future[Seq[TreeChange]] - - /** - * Generate an ASCII tree preview of proposed changes. - * Shows affected subtrees with new nodes marked [+], reparented nodes marked [→], - * and variants listed for each node. - */ - def getTreePreview(changeSetId: Int): Future[String] -} - -@Singleton -class TreeVersioningServiceImpl @Inject()( - repository: TreeVersioningRepository, - wipTreeRepository: WipTreeRepository, - haplogroupRepository: HaplogroupCoreRepository, - haplogroupVariantRepository: HaplogroupVariantRepository, - auditService: CuratorAuditService -)(implicit ec: ExecutionContext) - extends TreeVersioningService - with Logging { - - private val timestampFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd-HHmmss") - - // ============================================================================ - // Change Set Lifecycle - // ============================================================================ - - override def createChangeSet( - haplogroupType: HaplogroupType, - sourceName: String, - description: Option[String], - createdBy: String - ): Future[ChangeSet] = { - // Check for existing active change set - repository.getActiveChangeSet(haplogroupType).flatMap { - case Some(existing) => - Future.failed(new IllegalStateException( - s"Active change set already exists for $haplogroupType: ${existing.name} (${existing.status})" - )) - case None => - val now = LocalDateTime.now() - val name = s"$sourceName-${now.format(timestampFormatter)}" - val changeSet = ChangeSet( - id = None, - haplogroupType = haplogroupType, - name = name, - description = description, - sourceName = sourceName, - createdAt = now, - createdBy = createdBy - ) - repository.createChangeSet(changeSet).flatMap { id => - repository.getChangeSet(id).map { csOpt => - val cs = csOpt.getOrElse( - throw new IllegalStateException(s"Failed to retrieve created change set with id $id") - ) - // Log audit entry for change set creation - auditService.logChangeSetCreate(createdBy, cs, Some(s"Created from $sourceName")) - cs - } - } - } - } - - override def getActiveChangeSet(haplogroupType: HaplogroupType): Future[Option[ChangeSet]] = { - repository.getActiveChangeSet(haplogroupType) - } - - override def getChangeSetDetails(id: Int): Future[Option[ChangeSetDetails]] = { - for { - changeSetOpt <- repository.getChangeSet(id) - result <- changeSetOpt match { - case Some(changeSet) => - for { - totalChanges <- repository.countTreeChanges(id) - byType <- repository.getChangeSummaryByType(id) - byStatus <- repository.getChangeSummaryByStatus(id) - comments <- repository.listComments(id) - } yield Some(ChangeSetDetails( - changeSet = changeSet, - totalChanges = totalChanges, - changesByType = byType.map { case (k, v) => TreeChangeType.toDbString(k) -> v }, - changesByStatus = byStatus.map { case (k, v) => ChangeStatus.toDbString(k) -> v }, - comments = comments.toList - )) - case None => - Future.successful(None) - } - } yield result - } - - override def listChangeSets( - haplogroupType: Option[HaplogroupType], - status: Option[ChangeSetStatus], - page: Int, - pageSize: Int - ): Future[(Seq[ChangeSetSummary], Int)] = { - val offset = (page - 1) * pageSize - for { - changeSets <- repository.listChangeSets(haplogroupType, status, pageSize, offset) - total <- repository.countChangeSets(haplogroupType, status) - summaries <- Future.sequence(changeSets.map { cs => - for { - totalChanges <- repository.countTreeChanges(cs.id.get) - pendingChanges <- repository.countTreeChanges(cs.id.get, status = Some(ChangeStatus.Pending)) - // Count reviewed as anything that's not PENDING - reviewedChanges = totalChanges - pendingChanges - } yield ChangeSetSummary( - id = cs.id.get, - haplogroupType = cs.haplogroupType, - name = cs.name, - sourceName = cs.sourceName, - status = cs.status, - createdAt = cs.createdAt, - createdBy = cs.createdBy, - statistics = cs.statistics, - totalChanges = totalChanges, - pendingChanges = pendingChanges, - reviewedChanges = reviewedChanges - ) - }) - } yield (summaries, total) - } - - override def finalizeChangeSet( - changeSetId: Int, - statistics: MergeStatistics, - ambiguityReportPath: Option[String] - ): Future[Boolean] = { - val csStats = ChangeSetStatistics( - nodesProcessed = statistics.nodesProcessed, - nodesCreated = statistics.nodesCreated, - nodesUpdated = statistics.nodesUpdated, - nodesUnchanged = statistics.nodesUnchanged, - variantsAdded = statistics.variantsAdded, - relationshipsCreated = statistics.relationshipsCreated, - relationshipsUpdated = statistics.relationshipsUpdated, - splitOperations = statistics.splitOperations, - ambiguityCount = 0 // Will be updated from the changes - ) - repository.finalizeChangeSet(changeSetId, csStats, ambiguityReportPath).map { result => - if (result) { - logger.info(s"Change set $changeSetId finalized and ready for review") - } - result - } - } - - override def startReview(changeSetId: Int, curatorId: String): Future[Boolean] = { - repository.getChangeSet(changeSetId).flatMap { - case Some(cs) if cs.status == ChangeSetStatus.ReadyForReview => - repository.updateChangeSetStatus(changeSetId, ChangeSetStatus.UnderReview).map { result => - if (result) { - logger.info(s"Change set $changeSetId now under review by $curatorId") - // Log audit entry for status change - auditService.logChangeSetStatusChange( - curatorId, changeSetId, - ChangeSetStatus.ReadyForReview, ChangeSetStatus.UnderReview, - Some(s"Review started by $curatorId") - ) - } - result - } - case Some(cs) => - Future.failed(new IllegalStateException( - s"Cannot start review: change set is ${cs.status}, expected READY_FOR_REVIEW" - )) - case None => - Future.failed(new NoSuchElementException(s"Change set $changeSetId not found")) - } - } - - override def applyChangeSet(changeSetId: Int, curatorId: String): Future[Boolean] = { - repository.getChangeSet(changeSetId).flatMap { - case Some(cs) if cs.status == ChangeSetStatus.UnderReview || cs.status == ChangeSetStatus.ReadyForReview => - for { - // Check for WIP data (staging mode merge) - wipStats <- wipTreeRepository.getWipStatistics(changeSetId) - hasWipData = wipStats.haplogroups > 0 || wipStats.relationships > 0 || - wipStats.variants > 0 || wipStats.reparents > 0 - - // Apply WIP data to production if present - appliedWipCount <- if (hasWipData) { - logger.info(s"Applying WIP data for change set $changeSetId: " + - s"${wipStats.haplogroups} haplogroups, ${wipStats.relationships} relationships, " + - s"${wipStats.variants} variants, ${wipStats.reparents} reparents") - applyWipToProduction(changeSetId, cs.haplogroupType, cs.sourceName) - } else { - Future.successful(0) - } - - // Apply all pending tree changes (legacy mode) - appliedCount <- repository.applyAllPendingChanges(changeSetId) - - // Mark the change set as applied - result <- repository.applyChangeSet(changeSetId, curatorId) - - // Clean up WIP data (tables have ON DELETE CASCADE, but explicit cleanup is cleaner) - _ <- if (hasWipData) wipTreeRepository.deleteWipDataForChangeSet(changeSetId) - else Future.successful(0) - - // Get updated change set for audit - updatedCs <- repository.getChangeSet(changeSetId) - } yield { - if (result) { - val totalApplied = appliedCount + appliedWipCount - logger.info(s"Change set $changeSetId applied to Production by $curatorId " + - s"($totalApplied changes: $appliedCount legacy, $appliedWipCount from WIP)") - // Log audit entry for apply action - updatedCs.foreach { ucs => - auditService.logChangeSetApply(curatorId, ucs, totalApplied, Some("Applied to Production")) - } - } - result - } - case Some(cs) => - Future.failed(new IllegalStateException( - s"Cannot apply: change set is ${cs.status}, expected READY_FOR_REVIEW or UNDER_REVIEW" - )) - case None => - Future.failed(new NoSuchElementException(s"Change set $changeSetId not found")) - } - } - - /** - * Apply WIP (staging) data to production tables. - * - * This method is called when applying a change set that was created in staging mode. - * It copies data from WIP shadow tables to production tables, resolving placeholder - * IDs to real production IDs. - * - * Resolution handling: - * - REPARENT: Uses the resolution's parent instead of the WIP parent - * - EDIT_VARIANTS: Applies variant add/remove after node creation - * - MERGE_EXISTING: Skips node creation, remaps relationships to merge target - * - DEFER: Skips the item entirely (remains in WIP) - * - * @param changeSetId The change set ID - * @param haplogroupType Y or MT DNA type - * @param sourceName Source name for new records - * @return Number of operations applied - */ - private def applyWipToProduction( - changeSetId: Int, - haplogroupType: HaplogroupType, - sourceName: String - ): Future[Int] = { - import models.dal.domain.haplogroups.WipResolutionRow - - val now = LocalDateTime.now() - - for { - // 0. Get all pending resolutions for this change set - resolutions <- wipTreeRepository.getPendingResolutions(changeSetId) - _ = logger.info(s"Found ${resolutions.size} pending resolutions to apply") - - // Build resolution lookup maps - resolutionsByWipHg = resolutions.filter(_.wipHaplogroupId.isDefined) - .map(r => r.wipHaplogroupId.get -> r).toMap - resolutionsByWipReparent = resolutions.filter(_.wipReparentId.isDefined) - .map(r => r.wipReparentId.get -> r).toMap - - // Identify deferred and merged WIP haplogroups - deferredWipHgIds = resolutions - .filter(r => r.resolutionType == "DEFER" && r.wipHaplogroupId.isDefined) - .flatMap(_.wipHaplogroupId).toSet - mergeResolutions = resolutions - .filter(r => r.resolutionType == "MERGE_EXISTING" && r.wipHaplogroupId.isDefined) - mergedWipHgIds = mergeResolutions.flatMap(_.wipHaplogroupId).toSet - - // Build merge mapping: WIP haplogroup ID -> production merge target ID - mergeMapping = mergeResolutions.flatMap { r => - for { - wipId <- r.wipHaplogroupId - targetId <- r.mergeTargetId - } yield wipId -> targetId - }.toMap - - // 1. Get all WIP haplogroups (excluding deferred and merged) - allWipHaplogroups <- wipTreeRepository.getWipHaplogroupsForChangeSet(changeSetId) - wipHaplogroups = allWipHaplogroups.filterNot { wh => - val wipId = wh.id.getOrElse(-1) - deferredWipHgIds.contains(wipId) || mergedWipHgIds.contains(wipId) - } - _ = logger.info(s"Creating ${wipHaplogroups.size} haplogroups from WIP " + - s"(${deferredWipHgIds.size} deferred, ${mergedWipHgIds.size} merged)") - - // 2. Create production haplogroups and build placeholder → real ID mapping - placeholderToRealId <- createProductionHaplogroups(wipHaplogroups, haplogroupType, sourceName, now) - - // Add merge mappings: for merged WIP nodes, map placeholder to merge target - wipHgToPlaceholder = allWipHaplogroups.flatMap(wh => wh.id.map(_ -> wh.placeholderId)).toMap - mergeIdMapping = mergeMapping.flatMap { case (wipHgId, targetId) => - wipHgToPlaceholder.get(wipHgId).map(_ -> targetId) - } - fullPlaceholderMapping = placeholderToRealId ++ mergeIdMapping - _ = logger.info(s"Created ${placeholderToRealId.size} production haplogroups, " + - s"${mergeIdMapping.size} mapped to merge targets") - - // 3. Get all WIP relationships and create them in production - // Filter out relationships involving deferred nodes - allWipRelationships <- wipTreeRepository.getWipRelationshipsForChangeSet(changeSetId) - deferredPlaceholders = allWipHaplogroups - .filter(wh => wh.id.exists(deferredWipHgIds.contains)) - .map(_.placeholderId).toSet - wipRelationships = allWipRelationships.filterNot { rel => - rel.childPlaceholderId.exists(deferredPlaceholders.contains) || - rel.parentPlaceholderId.exists(deferredPlaceholders.contains) - } - _ = logger.info(s"Creating ${wipRelationships.size} relationships from WIP") - - // Apply REPARENT resolutions to relationships - reparentResolutions = resolutions.filter(_.resolutionType == "REPARENT") - relationshipsCreated <- createProductionRelationshipsWithResolutions( - wipRelationships, fullPlaceholderMapping, sourceName, reparentResolutions - ) - - // 4. Get all WIP variant associations and create them in production - allWipVariants <- wipTreeRepository.getWipVariantsForChangeSet(changeSetId) - wipVariants = allWipVariants.filterNot { v => - v.haplogroupPlaceholderId.exists(deferredPlaceholders.contains) - } - _ = logger.info(s"Creating ${wipVariants.size} variant associations from WIP") - variantsCreated <- createProductionVariants(wipVariants, fullPlaceholderMapping) - - // 5. Apply EDIT_VARIANTS resolutions - editVariantResolutions = resolutions.filter(_.resolutionType == "EDIT_VARIANTS") - editVariantsApplied <- applyEditVariantResolutions(editVariantResolutions, fullPlaceholderMapping, now) - _ = logger.info(s"Applied ${editVariantsApplied} edit variant resolutions") - - // 6. Get all WIP reparents and apply them (excluding deferred) - allWipReparents <- wipTreeRepository.getWipReparentsForChangeSet(changeSetId) - deferredReparentIds = resolutions - .filter(r => r.resolutionType == "DEFER" && r.wipReparentId.isDefined) - .flatMap(_.wipReparentId).toSet - wipReparents = allWipReparents.filterNot(r => r.id.exists(deferredReparentIds.contains)) - _ = logger.info(s"Applying ${wipReparents.size} reparents from WIP") - - // Apply reparents with resolution overrides - reparentsApplied <- applyProductionReparentsWithResolutions( - wipReparents, fullPlaceholderMapping, sourceName, resolutionsByWipReparent - ) - - // 7. Mark all applied resolutions as APPLIED - _ <- markResolutionsApplied(resolutions.filterNot(_.resolutionType == "DEFER"), now) - - } yield wipHaplogroups.size + relationshipsCreated + variantsCreated + reparentsApplied + editVariantsApplied - } - - /** - * Create production haplogroups from WIP data. - * Returns a map of placeholder ID → real production ID. - */ - private def createProductionHaplogroups( - wipHaplogroups: Seq[models.dal.domain.haplogroups.WipHaplogroupRow], - haplogroupType: HaplogroupType, - sourceName: String, - now: LocalDateTime - ): Future[Map[Int, Int]] = { - // Process in order (by placeholder ID) to ensure parents are created before children - val sortedWip = wipHaplogroups.sortBy(_.placeholderId)(Ordering[Int].reverse) // Most negative (first created) first - - sortedWip.foldLeft(Future.successful(Map.empty[Int, Int])) { (accFuture, wip) => - accFuture.flatMap { mapping => - val haplogroup = Haplogroup( - id = None, - name = wip.name, - lineage = wip.lineage, - description = wip.description, - haplogroupType = haplogroupType, - revisionId = 1, - source = wip.source, - confidenceLevel = wip.confidenceLevel, - validFrom = now, - validUntil = None, - formedYbp = wip.formedYbp, - formedYbpLower = wip.formedYbpLower, - formedYbpUpper = wip.formedYbpUpper, - tmrcaYbp = wip.tmrcaYbp, - tmrcaYbpLower = wip.tmrcaYbpLower, - tmrcaYbpUpper = wip.tmrcaYbpUpper, - ageEstimateSource = wip.ageEstimateSource, - provenance = wip.provenance - ) - - // Create without parent (relationships handled separately) - haplogroupRepository.createWithParent(haplogroup, None, sourceName).map { case (realId, _) => - logger.debug(s"Created haplogroup ${wip.name}: placeholder ${wip.placeholderId} → real $realId") - mapping + (wip.placeholderId -> realId) - } - } - } - } - - /** - * Create production relationships from WIP data. - * Processes in batches to avoid thread pool exhaustion. - */ - private def createProductionRelationships( - wipRelationships: Seq[models.dal.domain.haplogroups.WipRelationshipRow], - placeholderToRealId: Map[Int, Int], - sourceName: String - ): Future[Int] = { - def resolveId(haplogroupId: Option[Int], placeholderId: Option[Int]): Option[Int] = { - haplogroupId.orElse(placeholderId.flatMap(placeholderToRealId.get)) - } - - val batchSize = 100 - val batches = wipRelationships.grouped(batchSize).toSeq - - // Process batches sequentially - batches.foldLeft(Future.successful(0)) { (accFuture, batch) => - accFuture.flatMap { acc => - // Process items within batch in parallel - Future.sequence(batch.map { wip => - val childId = resolveId(wip.childHaplogroupId, wip.childPlaceholderId) - val parentId = resolveId(wip.parentHaplogroupId, wip.parentPlaceholderId) - - (childId, parentId) match { - case (Some(cid), Some(pid)) => - haplogroupRepository.updateParent(cid, pid, sourceName).map(_ => 1) - case _ => - logger.warn(s"Could not resolve relationship: child=${wip.childHaplogroupId}/${wip.childPlaceholderId}, " + - s"parent=${wip.parentHaplogroupId}/${wip.parentPlaceholderId}") - Future.successful(0) - } - }).map(results => acc + results.sum) - } - } - } - - /** - * Create production variant associations from WIP data. - */ - private def createProductionVariants( - wipVariants: Seq[models.dal.domain.haplogroups.WipHaplogroupVariantRow], - placeholderToRealId: Map[Int, Int] - ): Future[Int] = { - val resolvedVariants = wipVariants.flatMap { wip => - val haplogroupId = wip.haplogroupId.orElse(wip.haplogroupPlaceholderId.flatMap(placeholderToRealId.get)) - haplogroupId.map(hid => (hid, wip.variantId)) - } - - if (resolvedVariants.nonEmpty) { - haplogroupVariantRepository.bulkAddVariantsToHaplogroups(resolvedVariants).map(_.size) - } else { - Future.successful(0) - } - } - - /** - * Apply reparent operations from WIP data. - * Processes in batches to avoid thread pool exhaustion. - */ - private def applyProductionReparents( - wipReparents: Seq[models.dal.domain.haplogroups.WipReparentRow], - placeholderToRealId: Map[Int, Int], - sourceName: String - ): Future[Int] = { - val batchSize = 100 - val batches = wipReparents.grouped(batchSize).toSeq - - // Process batches sequentially - batches.foldLeft(Future.successful(0)) { (accFuture, batch) => - accFuture.flatMap { acc => - // Process items within batch in parallel - Future.sequence(batch.map { wip => - val newParentId = wip.newParentId.orElse(wip.newParentPlaceholderId.flatMap(placeholderToRealId.get)) - - newParentId match { - case Some(pid) => - haplogroupRepository.updateParent(wip.haplogroupId, pid, sourceName).map(_ => 1) - case None => - logger.warn(s"Could not resolve reparent for haplogroup ${wip.haplogroupId}: " + - s"newParent=${wip.newParentId}/${wip.newParentPlaceholderId}") - Future.successful(0) - } - }).map(results => acc + results.sum) - } - } - } - - /** - * Create production relationships with REPARENT resolution overrides. - * If a resolution specifies a different parent for a WIP haplogroup, use that instead. - */ - private def createProductionRelationshipsWithResolutions( - wipRelationships: Seq[models.dal.domain.haplogroups.WipRelationshipRow], - placeholderToRealId: Map[Int, Int], - sourceName: String, - reparentResolutions: Seq[models.dal.domain.haplogroups.WipResolutionRow] - ): Future[Int] = { - // Build lookup: WIP haplogroup ID -> resolution with new parent - val resolutionByWipHgId = reparentResolutions - .filter(_.wipHaplogroupId.isDefined) - .map(r => r.wipHaplogroupId.get -> r) - .toMap - - def resolveId(haplogroupId: Option[Int], placeholderId: Option[Int]): Option[Int] = { - haplogroupId.orElse(placeholderId.flatMap(placeholderToRealId.get)) - } - - def resolveParentWithResolution( - childWipHgId: Option[Int], - originalParentHgId: Option[Int], - originalParentPlaceholderId: Option[Int] - ): Option[Int] = { - // Check if there's a REPARENT resolution for this child - childWipHgId.flatMap(resolutionByWipHgId.get) match { - case Some(resolution) => - // Use resolution's parent instead - resolution.newParentId.orElse( - resolution.newParentPlaceholderId.flatMap(placeholderToRealId.get) - ) - case None => - // Use original parent - resolveId(originalParentHgId, originalParentPlaceholderId) - } - } - - val batchSize = 100 - val batches = wipRelationships.grouped(batchSize).toSeq - - batches.foldLeft(Future.successful(0)) { (accFuture, batch) => - accFuture.flatMap { acc => - Future.sequence(batch.map { wip => - val childId = resolveId(wip.childHaplogroupId, wip.childPlaceholderId) - - // Note: We need the WIP haplogroup ID to check resolutions - // For now, we use the relationship's child placeholder to look up - val parentId = resolveParentWithResolution( - None, // Would need WIP haplogroup ID here - see below - wip.parentHaplogroupId, - wip.parentPlaceholderId - ) - - (childId, parentId) match { - case (Some(cid), Some(pid)) => - haplogroupRepository.updateParent(cid, pid, sourceName).map(_ => 1) - case _ => - logger.warn(s"Could not resolve relationship: child=${wip.childHaplogroupId}/${wip.childPlaceholderId}, " + - s"parent=${wip.parentHaplogroupId}/${wip.parentPlaceholderId}") - Future.successful(0) - } - }).map(results => acc + results.sum) - } - } - } - - /** - * Apply EDIT_VARIANTS resolutions. - * Adds/removes variants from haplogroups as specified in resolutions. - */ - private def applyEditVariantResolutions( - resolutions: Seq[models.dal.domain.haplogroups.WipResolutionRow], - placeholderToRealId: Map[Int, Int], - now: LocalDateTime - ): Future[Int] = { - import play.api.libs.json.Json - - if (resolutions.isEmpty) { - Future.successful(0) - } else { - Future.sequence(resolutions.map { resolution => - // Parse variant IDs from JSON arrays - val variantsToAdd = resolution.variantsToAdd - .flatMap(s => scala.util.Try(Json.parse(s).as[Seq[Int]]).toOption) - .getOrElse(Seq.empty) - val variantsToRemove = resolution.variantsToRemove - .flatMap(s => scala.util.Try(Json.parse(s).as[Seq[Int]]).toOption) - .getOrElse(Seq.empty) - - // Resolve haplogroup ID (from WIP haplogroup if specified) - // For now, EDIT_VARIANTS works with production haplogroups - // (variants to add to existing nodes, not new WIP nodes) - val haplogroupIdOpt = resolution.wipHaplogroupId.flatMap { wipHgId => - // This would need enhancement to look up WIP haplogroup → placeholder → real ID - // For now, assume it's used with production haplogroup IDs via newParentId field - // or the resolution is created after the node exists - None - } - - // If we have a real haplogroup ID to work with - haplogroupIdOpt match { - case Some(hgId) => - for { - // Add variants - added <- if (variantsToAdd.nonEmpty) { - haplogroupVariantRepository.bulkAddVariantsToHaplogroups( - variantsToAdd.map(vid => (hgId, vid)) - ).map(_.size) - } else Future.successful(0) - - // Remove variants - removed <- if (variantsToRemove.nonEmpty) { - Future.sequence(variantsToRemove.map { vid => - haplogroupVariantRepository.removeVariantFromHaplogroup(hgId, vid) - }).map(_.count(_ > 0)) - } else Future.successful(0) - } yield added + removed - - case None => - logger.debug(s"Skipping EDIT_VARIANTS resolution ${resolution.id} - no resolvable haplogroup ID") - Future.successful(0) - } - }).map(_.sum) - } - } - - /** - * Apply reparent operations with resolution overrides. - * If a resolution specifies a different parent for a reparent operation, use that instead. - */ - private def applyProductionReparentsWithResolutions( - wipReparents: Seq[models.dal.domain.haplogroups.WipReparentRow], - placeholderToRealId: Map[Int, Int], - sourceName: String, - resolutionsByWipReparent: Map[Int, models.dal.domain.haplogroups.WipResolutionRow] - ): Future[Int] = { - val batchSize = 100 - val batches = wipReparents.grouped(batchSize).toSeq - - batches.foldLeft(Future.successful(0)) { (accFuture, batch) => - accFuture.flatMap { acc => - Future.sequence(batch.map { wip => - // Check for REPARENT resolution override - val resolution = wip.id.flatMap(resolutionsByWipReparent.get) - - val newParentId = resolution match { - case Some(r) if r.resolutionType == "REPARENT" => - // Use resolution's parent - r.newParentId.orElse(r.newParentPlaceholderId.flatMap(placeholderToRealId.get)) - case _ => - // Use original WIP reparent's parent - wip.newParentId.orElse(wip.newParentPlaceholderId.flatMap(placeholderToRealId.get)) - } - - newParentId match { - case Some(pid) => - haplogroupRepository.updateParent(wip.haplogroupId, pid, sourceName).map(_ => 1) - case None => - logger.warn(s"Could not resolve reparent for haplogroup ${wip.haplogroupId}: " + - s"newParent=${wip.newParentId}/${wip.newParentPlaceholderId}") - Future.successful(0) - } - }).map(results => acc + results.sum) - } - } - } - - /** - * Mark resolutions as APPLIED after successful processing. - */ - private def markResolutionsApplied( - resolutions: Seq[models.dal.domain.haplogroups.WipResolutionRow], - appliedAt: LocalDateTime - ): Future[Int] = { - if (resolutions.isEmpty) { - Future.successful(0) - } else { - Future.sequence(resolutions.flatMap(_.id).map { resolutionId => - wipTreeRepository.updateResolutionStatus(resolutionId, "APPLIED", Some(appliedAt)) - }).map(_.sum) - } - } - - override def discardChangeSet(changeSetId: Int, curatorId: String, reason: String): Future[Boolean] = { - repository.getChangeSet(changeSetId).flatMap { - case Some(cs) if cs.status != ChangeSetStatus.Applied => - repository.discardChangeSet(changeSetId, curatorId, reason).map { result => - if (result) { - logger.info(s"Change set $changeSetId discarded by $curatorId: $reason") - // Log audit entry for discard action - auditService.logChangeSetDiscard(curatorId, cs, reason) - } - result - } - case Some(cs) => - Future.failed(new IllegalStateException( - s"Cannot discard: change set is already APPLIED" - )) - case None => - Future.failed(new NoSuchElementException(s"Change set $changeSetId not found")) - } - } - - // ============================================================================ - // Change Recording - // ============================================================================ - - private def createChange( - changeSetId: Int, - changeType: TreeChangeType, - haplogroupId: Option[Int] = None, - variantId: Option[Int] = None, - oldParentId: Option[Int] = None, - newParentId: Option[Int] = None, - haplogroupData: Option[String] = None, - oldData: Option[String] = None, - ambiguityType: Option[String] = None, - ambiguityConfidence: Option[Double] = None - ): Future[Int] = { - for { - seqNum <- repository.getNextSequenceNum(changeSetId) - change = TreeChange( - id = None, - changeSetId = changeSetId, - changeType = changeType, - haplogroupId = haplogroupId, - variantId = variantId, - oldParentId = oldParentId, - newParentId = newParentId, - haplogroupData = haplogroupData, - oldData = oldData, - sequenceNum = seqNum, - ambiguityType = ambiguityType, - ambiguityConfidence = ambiguityConfidence - ) - id <- repository.createTreeChange(change) - } yield id - } - - override def recordCreate( - changeSetId: Int, - haplogroupData: String, - parentId: Option[Int], - ambiguityType: Option[String], - ambiguityConfidence: Option[Double] - ): Future[Int] = { - createChange( - changeSetId = changeSetId, - changeType = TreeChangeType.Create, - newParentId = parentId, - haplogroupData = Some(haplogroupData), - ambiguityType = ambiguityType, - ambiguityConfidence = ambiguityConfidence - ) - } - - override def recordUpdate( - changeSetId: Int, - haplogroupId: Int, - oldData: String, - newData: String, - ambiguityType: Option[String], - ambiguityConfidence: Option[Double] - ): Future[Int] = { - createChange( - changeSetId = changeSetId, - changeType = TreeChangeType.Update, - haplogroupId = Some(haplogroupId), - haplogroupData = Some(newData), - oldData = Some(oldData), - ambiguityType = ambiguityType, - ambiguityConfidence = ambiguityConfidence - ) - } - - override def recordReparent( - changeSetId: Int, - haplogroupId: Int, - oldParentId: Option[Int], - newParentId: Int, - ambiguityType: Option[String], - ambiguityConfidence: Option[Double] - ): Future[Int] = { - createChange( - changeSetId = changeSetId, - changeType = TreeChangeType.Reparent, - haplogroupId = Some(haplogroupId), - oldParentId = oldParentId, - newParentId = Some(newParentId), - ambiguityType = ambiguityType, - ambiguityConfidence = ambiguityConfidence - ) - } - - override def recordAddVariant( - changeSetId: Int, - haplogroupId: Int, - variantId: Int - ): Future[Int] = { - createChange( - changeSetId = changeSetId, - changeType = TreeChangeType.AddVariant, - haplogroupId = Some(haplogroupId), - variantId = Some(variantId) - ) - } - - override def recordRemoveVariant( - changeSetId: Int, - haplogroupId: Int, - variantId: Int - ): Future[Int] = { - createChange( - changeSetId = changeSetId, - changeType = TreeChangeType.RemoveVariant, - haplogroupId = Some(haplogroupId), - variantId = Some(variantId) - ) - } - - // ============================================================================ - // Change Review - // ============================================================================ - - override def getPendingReviewChanges(changeSetId: Int, limit: Int): Future[Seq[TreeChange]] = { - repository.getPendingReviewChanges(changeSetId, limit) - } - - override def getPendingReviewChangesWithNames(changeSetId: Int, limit: Int): Future[Seq[TreeChangeView]] = { - for { - changeSetOpt <- repository.getChangeSet(changeSetId) - changes <- repository.getPendingReviewChanges(changeSetId, limit) - // Collect all haplogroup IDs we need to look up - haplogroupIds = changes.flatMap(c => c.haplogroupId.toSeq ++ c.oldParentId.toSeq ++ c.newParentId.toSeq).toSet - names <- repository.getHaplogroupNamesById(haplogroupIds) - } yield { - val changeSetName = changeSetOpt.map(_.name).getOrElse(s"ChangeSet #$changeSetId") - val sourceName = changeSetOpt.map(_.sourceName).getOrElse("Unknown") - - changes.map { change => - // For CREATE, try to extract name from haplogroupData JSON - val haplogroupName = change.haplogroupId.flatMap(names.get).orElse { - change.haplogroupData.flatMap { data => - (Json.parse(data) \ "name").asOpt[String] - } - } - - TreeChangeView( - change = change, - changeSetName = changeSetName, - sourceName = sourceName, - haplogroupName = haplogroupName, - parentName = change.newParentId.flatMap(names.get).orElse(change.oldParentId.flatMap(names.get)), - variantName = None // Could be enhanced later if needed - ) - } - } - } - - override def reviewChange( - changeId: Int, - curatorId: String, - action: ChangeStatus, - notes: Option[String] - ): Future[Boolean] = { - if (action == ChangeStatus.Pending) { - Future.failed(new IllegalArgumentException("Cannot set status back to PENDING")) - } else { - for { - changeOpt <- repository.getTreeChange(changeId) - result <- repository.reviewTreeChange(changeId, curatorId, notes, action) - } yield { - if (result) { - logger.debug(s"Change $changeId reviewed by $curatorId: $action") - // Log audit entry for change review - changeOpt.foreach { change => - auditService.logChangeReview(curatorId, change, ChangeStatus.toDbString(action), notes) - } - } - result - } - } - } - - override def approveAllPending(changeSetId: Int, curatorId: String): Future[Int] = { - repository.applyAllPendingChanges(changeSetId).map { count => - logger.info(s"Bulk approved $count pending changes in set $changeSetId by $curatorId") - count - } - } - - // ============================================================================ - // Comments - // ============================================================================ - - override def addComment( - changeSetId: Int, - author: String, - content: String, - treeChangeId: Option[Int] - ): Future[Int] = { - val comment = ChangeSetComment( - id = None, - changeSetId = changeSetId, - treeChangeId = treeChangeId, - author = author, - content = content, - createdAt = LocalDateTime.now() - ) - repository.addComment(comment) - } - - override def listComments(changeSetId: Int): Future[Seq[ChangeSetComment]] = { - repository.listComments(changeSetId) - } - - // ============================================================================ - // Tree Diff (Phase 3) - // ============================================================================ - - override def getTreeDiff(changeSetId: Int): Future[TreeDiff] = { - for { - changeSetOpt <- repository.getChangeSet(changeSetId) - changes <- repository.getChangesForChangeSet(changeSetId) - // If no TreeChange records, try WIP tables (staging mode) - result <- changeSetOpt match { - case None => - Future.successful(TreeDiff.empty.copy(changeSetId = changeSetId)) - case Some(changeSet) if changes.isEmpty => - // No TreeChange records - compute from WIP tables (staging mode) - computeTreeDiffFromWip(changeSet) - case Some(changeSet) => - // Has TreeChange records - use those - val haplogroupIds = changes.flatMap(c => c.haplogroupId.toSeq ++ c.oldParentId.toSeq ++ c.newParentId.toSeq ++ c.createdHaplogroupId.toSeq).toSet - repository.getHaplogroupNamesById(haplogroupIds).map { names => - computeTreeDiff(changeSet, changes, names) - } - } - } yield result - } - - override def getActiveTreeDiff(haplogroupType: HaplogroupType): Future[Option[TreeDiff]] = { - repository.getActiveChangeSet(haplogroupType).flatMap { - case None => Future.successful(None) - case Some(cs) => getTreeDiff(cs.id.get).map(Some(_)) - } - } - - override def getChangesForDiff(changeSetId: Int): Future[Seq[TreeChange]] = { - repository.getChangesForChangeSet(changeSetId) - } - - /** - * Compute tree diff from change set and its changes. - * @param names Map of haplogroup ID -> name for display - */ - private def computeTreeDiff(changeSet: ChangeSet, changes: Seq[TreeChange], names: Map[Int, String]): TreeDiff = { - // Helper to get name or fallback to ID - def getName(idOpt: Option[Int]): Option[String] = idOpt.map(id => names.getOrElse(id, s"#$id")) - def getNameOrId(idOpt: Option[Int]): String = idOpt.map(id => names.getOrElse(id, s"#$id")).getOrElse("?") - - // Group changes by type - val createChanges = changes.filter(_.changeType == TreeChangeType.Create) - val updateChanges = changes.filter(_.changeType == TreeChangeType.Update) - val deleteChanges = changes.filter(_.changeType == TreeChangeType.Delete) - val reparentChanges = changes.filter(_.changeType == TreeChangeType.Reparent) - val addVariantChanges = changes.filter(_.changeType == TreeChangeType.AddVariant) - val removeVariantChanges = changes.filter(_.changeType == TreeChangeType.RemoveVariant) - - // Build diff entries - val entries = List.newBuilder[TreeDiffEntry] - - // CREATE entries (Added nodes) - createChanges.foreach { change => - val haplogroupName = change.haplogroupData - .flatMap(data => (Json.parse(data) \ "name").asOpt[String]) - .orElse(change.createdHaplogroupId.flatMap(names.get)) - .getOrElse(s"Node ${change.createdHaplogroupId.getOrElse("?")}") - - val parentName = getName(change.newParentId) - - entries += TreeDiffEntry( - diffType = DiffType.Added, - haplogroupId = change.createdHaplogroupId, - haplogroupName = haplogroupName, - oldParentName = None, - newParentName = parentName, - changeDescription = s"New node created under parent ${parentName.getOrElse("root")}", - changeIds = List(change.id.get) - ) - } - - // DELETE entries (Removed nodes) - deleteChanges.foreach { change => - entries += TreeDiffEntry( - diffType = DiffType.Removed, - haplogroupId = change.haplogroupId, - haplogroupName = getNameOrId(change.haplogroupId), - oldParentName = None, - newParentName = None, - changeDescription = "Node marked for deletion", - changeIds = List(change.id.get) - ) - } - - // REPARENT entries - reparentChanges.foreach { change => - val oldParent = getName(change.oldParentId) - val newParent = getName(change.newParentId) - - entries += TreeDiffEntry( - diffType = DiffType.Reparented, - haplogroupId = change.haplogroupId, - haplogroupName = getNameOrId(change.haplogroupId), - oldParentName = oldParent, - newParentName = newParent, - changeDescription = s"Parent changed from ${oldParent.getOrElse("none")} to ${newParent.getOrElse("none")}", - changeIds = List(change.id.get) - ) - } - - // Group UPDATE and variant changes by haplogroup for Modified entries - val updatesByHg = updateChanges.groupBy(_.haplogroupId) - val variantAddsByHg = addVariantChanges.groupBy(_.haplogroupId) - val variantRemovesByHg = removeVariantChanges.groupBy(_.haplogroupId) - - val allModifiedHgs = (updatesByHg.keySet ++ variantAddsByHg.keySet ++ variantRemovesByHg.keySet).flatten - - allModifiedHgs.foreach { hgId => - val updates = updatesByHg.getOrElse(Some(hgId), Seq.empty) - val variantAdds = variantAddsByHg.getOrElse(Some(hgId), Seq.empty) - val variantRemoves = variantRemovesByHg.getOrElse(Some(hgId), Seq.empty) - - val changeIds = (updates.flatMap(_.id) ++ variantAdds.flatMap(_.id) ++ variantRemoves.flatMap(_.id)).toList - val description = List( - if (updates.nonEmpty) s"${updates.size} update(s)" else "", - if (variantAdds.nonEmpty) s"${variantAdds.size} variant(s) added" else "", - if (variantRemoves.nonEmpty) s"${variantRemoves.size} variant(s) removed" else "" - ).filter(_.nonEmpty).mkString(", ") - - entries += TreeDiffEntry( - diffType = DiffType.Modified, - haplogroupId = Some(hgId), - haplogroupName = names.getOrElse(hgId, s"#$hgId"), - oldParentName = None, - newParentName = None, - changeDescription = description, - changeIds = changeIds, - variantsAdded = variantAdds.flatMap(_.variantId).map(v => s"#$v").toList, - variantsRemoved = variantRemoves.flatMap(_.variantId).map(v => s"#$v").toList - ) - } - - // Build summary - val summary = TreeDiffSummary( - totalChanges = changes.size, - nodesAdded = createChanges.size, - nodesRemoved = deleteChanges.size, - nodesModified = allModifiedHgs.size, - nodesReparented = reparentChanges.size, - variantsAdded = addVariantChanges.size, - variantsRemoved = removeVariantChanges.size - ) - - TreeDiff( - changeSetId = changeSet.id.get, - changeSetName = changeSet.name, - haplogroupType = changeSet.haplogroupType, - entries = entries.result(), - summary = summary - ) - } - - /** - * Compute tree diff from WIP tables (for staging mode). - * This is used when no TreeChange records exist because changes are staged in WIP tables. - */ - private def computeTreeDiffFromWip(changeSet: ChangeSet): Future[TreeDiff] = { - val changeSetId = changeSet.id.get - - for { - // Get all WIP data - wipHaplogroups <- wipTreeRepository.getWipHaplogroupsForChangeSet(changeSetId) - wipReparents <- wipTreeRepository.getWipReparentsForChangeSet(changeSetId) - wipVariants <- wipTreeRepository.getWipVariantsForChangeSet(changeSetId) - wipRelationships <- wipTreeRepository.getWipRelationshipsForChangeSet(changeSetId) - - // Collect all production haplogroup IDs we need to look up names for - productionHgIds = ( - wipReparents.map(_.haplogroupId) ++ - wipReparents.flatMap(_.oldParentId) ++ - wipReparents.flatMap(_.newParentId) ++ - wipVariants.flatMap(_.haplogroupId) ++ - wipRelationships.flatMap(_.childHaplogroupId) ++ - wipRelationships.flatMap(_.parentHaplogroupId) - ).toSet - - names <- repository.getHaplogroupNamesById(productionHgIds) - } yield { - val entries = List.newBuilder[TreeDiffEntry] - - // Helper to get name - def getName(idOpt: Option[Int]): Option[String] = idOpt.map(id => names.getOrElse(id, s"#$id")) - - // Build map of placeholder ID -> WIP haplogroup name - val wipNames = wipHaplogroups.map(h => h.placeholderId -> h.name).toMap - - // Build map of placeholder ID -> parent info from relationships - val parentByPlaceholder = wipRelationships - .filter(_.childPlaceholderId.isDefined) - .map(r => r.childPlaceholderId.get -> (r.parentHaplogroupId, r.parentPlaceholderId)) - .toMap - - // CREATE entries from WIP haplogroups - wipHaplogroups.foreach { wh => - val parentInfo = parentByPlaceholder.get(wh.placeholderId) - val parentName = parentInfo.flatMap { - case (Some(prodId), _) => names.get(prodId) - case (_, Some(placeholderId)) => wipNames.get(placeholderId) - case _ => None - } - - entries += TreeDiffEntry( - diffType = DiffType.Added, - haplogroupId = None, // Placeholder, not production ID - haplogroupName = wh.name, - oldParentName = None, - newParentName = parentName, - changeDescription = s"New node to be created under parent ${parentName.getOrElse("root")}", - changeIds = List.empty - ) - } - - // REPARENT entries from WIP reparents - wipReparents.foreach { wr => - val haplogroupName = names.getOrElse(wr.haplogroupId, s"#${wr.haplogroupId}") - val oldParent = getName(wr.oldParentId) - val newParent = wr.newParentId.flatMap(names.get).orElse( - wr.newParentPlaceholderId.flatMap(wipNames.get) - ) - - entries += TreeDiffEntry( - diffType = DiffType.Reparented, - haplogroupId = Some(wr.haplogroupId), - haplogroupName = haplogroupName, - oldParentName = oldParent, - newParentName = newParent, - changeDescription = s"Parent to be changed from ${oldParent.getOrElse("none")} to ${newParent.getOrElse("new node")}", - changeIds = List.empty - ) - } - - // MODIFIED entries from WIP variants (for existing production haplogroups) - val variantsByProductionHg = wipVariants - .filter(_.haplogroupId.isDefined) - .groupBy(_.haplogroupId.get) - - variantsByProductionHg.foreach { case (hgId, variants) => - val haplogroupName = names.getOrElse(hgId, s"#$hgId") - - entries += TreeDiffEntry( - diffType = DiffType.Modified, - haplogroupId = Some(hgId), - haplogroupName = haplogroupName, - oldParentName = None, - newParentName = None, - changeDescription = s"${variants.size} variant(s) to be added", - changeIds = List.empty, - variantsAdded = variants.map(v => s"#${v.variantId}").toList - ) - } - - // Build summary - val summary = TreeDiffSummary( - totalChanges = wipHaplogroups.size + wipReparents.size + variantsByProductionHg.size, - nodesAdded = wipHaplogroups.size, - nodesRemoved = 0, - nodesModified = variantsByProductionHg.size, - nodesReparented = wipReparents.size, - variantsAdded = wipVariants.size, - variantsRemoved = 0 - ) - - TreeDiff( - changeSetId = changeSetId, - changeSetName = changeSet.name, - haplogroupType = changeSet.haplogroupType, - entries = entries.result(), - summary = summary - ) - } - } - - // ============================================================================ - // Tree Preview (ASCII visualization) - // ============================================================================ - - override def getTreePreview(changeSetId: Int): Future[String] = { - for { - changeSetOpt <- repository.getChangeSet(changeSetId) - wipHaplogroups <- wipTreeRepository.getWipHaplogroupsForChangeSet(changeSetId) - wipRelationships <- wipTreeRepository.getWipRelationshipsForChangeSet(changeSetId) - wipReparents <- wipTreeRepository.getWipReparentsForChangeSet(changeSetId) - wipVariants <- wipTreeRepository.getWipVariantsForChangeSet(changeSetId) - - // Get all affected production haplogroup IDs - affectedProdIds = ( - wipReparents.map(_.haplogroupId) ++ - wipReparents.flatMap(_.oldParentId) ++ - wipReparents.flatMap(_.newParentId) ++ - wipVariants.flatMap(_.haplogroupId) ++ - wipRelationships.flatMap(_.childHaplogroupId) ++ - wipRelationships.flatMap(_.parentHaplogroupId) - ).toSet - - // Get names for production haplogroups - prodNames <- repository.getHaplogroupNamesById(affectedProdIds) - - // Get children of affected parents for context - parentIds = wipRelationships.flatMap(_.parentHaplogroupId).toSet ++ - wipReparents.flatMap(_.newParentId).toSet - existingChildren <- Future.sequence(parentIds.toSeq.map { pid => - haplogroupRepository.getDirectChildren(pid).map(children => pid -> children) - }).map(_.toMap) - - // Get variant names for display - allVariantIds = wipVariants.map(_.variantId).toSet - variantNames <- if (allVariantIds.nonEmpty) { - haplogroupVariantRepository.getVariantNamesByIds(allVariantIds) - } else { - Future.successful(Map.empty[Int, String]) - } - - } yield { - changeSetOpt match { - case None => s"Change set $changeSetId not found" - case Some(changeSet) => - buildTreePreview( - changeSet, - wipHaplogroups, - wipRelationships, - wipReparents, - wipVariants, - prodNames, - existingChildren, - variantNames - ) - } - } - } - - /** - * Build ASCII tree preview from WIP data. - */ - private def buildTreePreview( - changeSet: ChangeSet, - wipHaplogroups: Seq[models.dal.domain.haplogroups.WipHaplogroupRow], - wipRelationships: Seq[models.dal.domain.haplogroups.WipRelationshipRow], - wipReparents: Seq[models.dal.domain.haplogroups.WipReparentRow], - wipVariants: Seq[models.dal.domain.haplogroups.WipHaplogroupVariantRow], - prodNames: Map[Int, String], - existingChildren: Map[Int, Seq[models.domain.haplogroups.Haplogroup]], - variantNames: Map[Int, String] - ): String = { - val sb = new StringBuilder - - // Header - sb.append(s"=== Tree Preview: ${changeSet.name} ===\n") - sb.append(s"Type: ${changeSet.haplogroupType} | Status: ${changeSet.status}\n") - sb.append(s"New nodes: ${wipHaplogroups.size} | Reparents: ${wipReparents.size} | Variant additions: ${wipVariants.size}\n") - sb.append("\nLegend: [+] = new node, [→] = reparented, [~] = modified\n") - sb.append("=" * 50 + "\n\n") - - // Build WIP name lookup (placeholder ID -> name) - val wipNames = wipHaplogroups.map(h => h.placeholderId -> h.name).toMap - - // Build parent relationships for WIP nodes - val wipParentMap = wipRelationships.flatMap { rel => - rel.childPlaceholderId.map { childPh => - childPh -> (rel.parentHaplogroupId, rel.parentPlaceholderId) - } - }.toMap - - // Group WIP variants by haplogroup (placeholder or production) - val variantsByPlaceholder = wipVariants.filter(_.haplogroupPlaceholderId.isDefined) - .groupBy(_.haplogroupPlaceholderId.get) - val variantsByProdHg = wipVariants.filter(_.haplogroupId.isDefined) - .groupBy(_.haplogroupId.get) - - // Build reparent lookup - val reparentedNodes = wipReparents.map(r => r.haplogroupId -> r).toMap - - // Find root-level changes (nodes whose parent is a production node) - val rootParents = wipRelationships - .filter(r => r.parentHaplogroupId.isDefined && r.childPlaceholderId.isDefined) - .groupBy(_.parentHaplogroupId.get) - - // Also include production nodes being reparented - val prodNodesBeingReparented = wipReparents.groupBy { r => - r.newParentId.orElse(r.newParentPlaceholderId.flatMap { ph => - wipParentMap.get(ph).flatMap(_._1) - }) - } - - // Render each affected subtree - val renderedParents = scala.collection.mutable.Set[Int]() - - // Helper to format variants - def formatVariants(variantIds: Seq[Int]): String = { - if (variantIds.isEmpty) "" - else { - val names = variantIds.take(5).map(vid => variantNames.getOrElse(vid, s"#$vid")) - val suffix = if (variantIds.size > 5) s" +${variantIds.size - 5} more" else "" - s" (${names.mkString(", ")}$suffix)" - } - } - - // Recursive function to render WIP subtree - def renderWipNode(placeholderId: Int, prefix: String, isLast: Boolean): Unit = { - val name = wipNames.getOrElse(placeholderId, s"?$placeholderId") - val variants = variantsByPlaceholder.getOrElse(placeholderId, Seq.empty).map(_.variantId) - val variantStr = formatVariants(variants) - - val connector = if (isLast) "└── " else "├── " - val childPrefix = prefix + (if (isLast) " " else "│ ") - - sb.append(s"$prefix$connector[+] $name$variantStr\n") - - // Find children of this placeholder - val children = wipRelationships - .filter(r => r.parentPlaceholderId.contains(placeholderId)) - .flatMap(_.childPlaceholderId) - .distinct - - children.zipWithIndex.foreach { case (childPh, idx) => - renderWipNode(childPh, childPrefix, idx == children.size - 1) - } - } - - // Render subtrees rooted at production nodes - rootParents.toSeq.sortBy { case (pid, _) => prodNames.getOrElse(pid, "") }.foreach { case (parentId, rels) => - if (!renderedParents.contains(parentId)) { - renderedParents += parentId - val parentName = prodNames.getOrElse(parentId, s"#$parentId") - - sb.append(s"$parentName\n") - - // Get existing children for context - val existingKids = existingChildren.getOrElse(parentId, Seq.empty) - .filterNot(h => reparentedNodes.contains(h.id.get)) // Exclude reparented ones - .map(h => (h.name, false, h.id.get)) // (name, isNew, id) - - // Get new WIP children - val newKids = rels.flatMap(_.childPlaceholderId).map { ph => - (wipNames.getOrElse(ph, s"?$ph"), true, ph) - } - - // Get reparented production children - val reparentedKids = wipReparents - .filter(r => r.newParentId.contains(parentId) || r.newParentPlaceholderId.isEmpty && r.newParentId.contains(parentId)) - .map(r => (prodNames.getOrElse(r.haplogroupId, s"#${r.haplogroupId}"), false, r.haplogroupId, true)) - - // Combine and sort - val allChildren = existingKids.map(k => (k._1, k._2, k._3, false)) ++ - newKids.map(k => (k._1, k._2, k._3, false)) ++ - reparentedKids - - allChildren.sortBy(_._1).zipWithIndex.foreach { case ((name, isNew, id, isReparented), idx) => - val isLast = idx == allChildren.size - 1 - val connector = if (isLast) "└── " else "├── " - val childPrefix = if (isLast) " " else "│ " - - if (isNew) { - // New WIP node - render its subtree - val variants = variantsByPlaceholder.getOrElse(id, Seq.empty).map(_.variantId) - val variantStr = formatVariants(variants) - sb.append(s"$connector[+] $name$variantStr\n") - - // Render children of this WIP node - val wipChildren = wipRelationships - .filter(r => r.parentPlaceholderId.contains(id)) - .flatMap(_.childPlaceholderId) - .distinct - - wipChildren.zipWithIndex.foreach { case (childPh, childIdx) => - renderWipNode(childPh, childPrefix, childIdx == wipChildren.size - 1) - } - } else if (isReparented) { - // Reparented production node - val variants = variantsByProdHg.getOrElse(id, Seq.empty).map(_.variantId) - val variantStr = formatVariants(variants) - sb.append(s"$connector[→] $name$variantStr\n") - } else { - // Existing node (for context) - val variants = variantsByProdHg.getOrElse(id, Seq.empty).map(_.variantId) - val variantStr = if (variants.nonEmpty) { - s" [~]${formatVariants(variants)}" - } else "" - sb.append(s"$connector$name$variantStr\n") - } - } - - sb.append("\n") - } - } - - // Render reparents that move to WIP nodes (new parent is placeholder) - val reparentsToWip = wipReparents.filter(_.newParentPlaceholderId.isDefined) - if (reparentsToWip.nonEmpty) { - sb.append("--- Nodes reparented to new WIP nodes ---\n") - reparentsToWip.foreach { r => - val nodeName = prodNames.getOrElse(r.haplogroupId, s"#${r.haplogroupId}") - val oldParent = r.oldParentId.flatMap(prodNames.get).getOrElse("?") - val newParent = r.newParentPlaceholderId.flatMap(wipNames.get).getOrElse("?") - sb.append(s" $nodeName: $oldParent → $newParent [+]\n") - } - sb.append("\n") - } - - // Summary of variant additions to existing nodes - val variantAdditions = variantsByProdHg.filter(_._2.nonEmpty) - if (variantAdditions.nonEmpty) { - sb.append("--- Variant additions to existing nodes ---\n") - variantAdditions.toSeq.sortBy { case (hgId, _) => prodNames.getOrElse(hgId, "") }.foreach { case (hgId, vars) => - val nodeName = prodNames.getOrElse(hgId, s"#$hgId") - val variantStr = formatVariants(vars.map(_.variantId)) - sb.append(s" $nodeName:$variantStr\n") - } - } - - sb.toString() - } -} diff --git a/app/services/UserPermissionHelper.scala b/app/services/UserPermissionHelper.scala deleted file mode 100644 index afed6847..00000000 --- a/app/services/UserPermissionHelper.scala +++ /dev/null @@ -1,35 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.user.User -import play.api.mvc.RequestHeader -import services.AuthService - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class UserPermissionHelper @Inject()( - authService: AuthService - )(implicit ec: ExecutionContext) { - - /** - * Checks if the current user in the request has the specified permission. - * - * @param permissionName The name of the permission to check. - * @param request The current request header (containing the session). - * @return Future[Boolean] True if the user has the permission, false otherwise. - */ - def hasPermission(permissionName: String)(implicit request: RequestHeader): Future[Boolean] = { - request.session.get("userId") match { - case Some(userIdStr) => - try { - val userId = UUID.fromString(userIdStr) - authService.hasPermission(userId, permissionName) - } catch { - case _: IllegalArgumentException => Future.successful(false) - } - case None => Future.successful(false) - } - } -} diff --git a/app/services/VariantBrowserService.scala b/app/services/VariantBrowserService.scala deleted file mode 100644 index 95ead2e9..00000000 --- a/app/services/VariantBrowserService.scala +++ /dev/null @@ -1,49 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.VariantV2 -import models.domain.haplogroups.Haplogroup -import repositories.{HaplogroupVariantRepository, VariantV2Repository} - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for variant browser functionality. - * Abstracts repository access and provides business logic for the variant browser UI. - */ -@Singleton -class VariantBrowserService @Inject()( - variantV2Repository: VariantV2Repository, - haplogroupVariantRepository: HaplogroupVariantRepository -)(implicit ec: ExecutionContext) { - - /** - * Search variants with pagination. - * - * @param query Search query string - * @param offset Pagination offset - * @param limit Page size - * @return Tuple of (variants, total count) - */ - def searchPaginated(query: String, offset: Int, limit: Int): Future[(Seq[VariantV2], Int)] = { - variantV2Repository.searchPaginated(query, offset, limit) - } - - /** - * Get variant detail with associated haplogroups. - * Fetches variant and haplogroups in a single method call. - * - * @param variantId The variant ID - * @return Option of (variant, haplogroups) tuple, None if variant not found - */ - def getVariantWithHaplogroups(variantId: Int): Future[Option[(VariantV2, Seq[Haplogroup])]] = { - variantV2Repository.findById(variantId).flatMap { - case Some(variant) => - haplogroupVariantRepository.getHaplogroupsByVariant(variantId).map { haplogroups => - Some((variant, haplogroups)) - } - case None => - Future.successful(None) - } - } -} diff --git a/app/services/VariantExportService.scala b/app/services/VariantExportService.scala deleted file mode 100644 index 1f43f4b2..00000000 --- a/app/services/VariantExportService.scala +++ /dev/null @@ -1,228 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.api.* -import models.domain.genomics.VariantV2 -import play.api.{Configuration, Logging} -import play.api.libs.json.{JsObject, Json, OFormat} -import repositories.{HaplogroupVariantRepository, VariantV2Repository} - -import java.io.{BufferedOutputStream, FileOutputStream, OutputStreamWriter} -import java.nio.file.{Files, Path, Paths, StandardCopyOption} -import java.time.{Instant, LocalDateTime, ZoneOffset} -import java.util.zip.GZIPOutputStream -import scala.concurrent.{ExecutionContext, Future} - -/** - * Export metadata for tracking export file status. - */ -case class ExportMetadata( - generatedAt: Instant, - variantCount: Int, - fileSizeBytes: Long -) - -object ExportMetadata { - implicit val format: OFormat[ExportMetadata] = Json.format[ExportMetadata] -} - -/** - * Result of an export operation. - */ -case class ExportResult( - success: Boolean, - variantCount: Int = 0, - fileSizeBytes: Long = 0, - error: Option[String] = None, - generationTimeMs: Long = 0 -) - -object ExportResult { - implicit val format: OFormat[ExportResult] = Json.format[ExportResult] -} - -/** - * Record structure for exported variants. - */ -case class VariantExportRecord( - variantId: Int, - canonicalName: Option[String], - variantType: String, - namingStatus: String, - coordinates: Map[String, VariantCoordinateDTO], - rsIds: Seq[String], - commonNames: Seq[String] -) - -object VariantExportRecord { - implicit val format: OFormat[VariantExportRecord] = Json.format[VariantExportRecord] -} - -/** - * Coordinate information for export. - */ -case class VariantCoordinateDTO( - contig: String, - position: Int, - ref: String, - alt: String -) - -object VariantCoordinateDTO { - implicit val format: OFormat[VariantCoordinateDTO] = Json.format[VariantCoordinateDTO] -} - -/** - * Service for generating bulk variant export files. - * Creates a gzipped JSONL file containing all variants for Edge App consumption. - */ -@Singleton -class VariantExportService @Inject()( - variantV2Repository: VariantV2Repository, - haplogroupVariantRepository: HaplogroupVariantRepository, - configuration: Configuration -)(implicit ec: ExecutionContext) extends Logging { - - private val exportDir = Paths.get(configuration.getOptional[String]("variant.export.dir").getOrElse("/tmp/variant-exports")) - private val exportFileName = "variants-full.jsonl.gz" - private val metadataFileName = "variants-export-metadata.json" - - // Ensure export directory exists - if (!Files.exists(exportDir)) { - Files.createDirectories(exportDir) - } - - /** - * Get the path to the current export file. - */ - def getExportFilePath: Path = exportDir.resolve(exportFileName) - - /** - * Get the path to the metadata file. - */ - def getMetadataFilePath: Path = exportDir.resolve(metadataFileName) - - /** - * Check if an export file exists and return its metadata. - */ - def getExportMetadata: Option[ExportMetadata] = { - val metaPath = getMetadataFilePath - if (Files.exists(metaPath)) { - try { - val content = Files.readString(metaPath) - Some(Json.parse(content).as[ExportMetadata]) - } catch { - case e: java.io.IOException => - logger.warn(s"Failed to read export metadata file: ${e.getMessage}") - None - case e: play.api.libs.json.JsResultException => - logger.warn(s"Failed to parse export metadata JSON: ${e.getMessage}") - None - } - } else { - None - } - } - - private val ExportBatchSize = 10000 - - /** - * Generate a new export file by fetching variants in batches to avoid loading - * the entire table into memory. - */ - def generateExport(): Future[ExportResult] = { - val startTime = System.currentTimeMillis() - logger.info("Starting variant export generation") - - val tempFile = exportDir.resolve(s"$exportFileName.tmp") - val finalFile = getExportFilePath - - variantV2Repository.countAll().flatMap { totalCount => - val gzOut = new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(tempFile.toFile))) - val writer = new OutputStreamWriter(gzOut, "UTF-8") - - val batches = (0 until totalCount by ExportBatchSize).toSeq - - batches.foldLeft(Future.successful(0)) { (accFuture, offset) => - accFuture.flatMap { written => - variantV2Repository.fetchBatch(offset, ExportBatchSize).map { variants => - for (variant <- variants) { - val exportRecord = variantToExportRecord(variant) - writer.write(Json.stringify(Json.toJson(exportRecord))) - writer.write("\n") - } - written + variants.size - } - } - }.map { variantCount => - writer.close() - - Files.move(tempFile, finalFile, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE) - val fileSizeBytes = Files.size(finalFile) - - val metadata = ExportMetadata( - generatedAt = Instant.now(), - variantCount = variantCount, - fileSizeBytes = fileSizeBytes - ) - Files.writeString(getMetadataFilePath, Json.stringify(Json.toJson(metadata))) - - val generationTimeMs = System.currentTimeMillis() - startTime - logger.info(s"Export generation complete: $variantCount variants in ${generationTimeMs}ms") - - ExportResult( - success = true, - variantCount = variantCount, - fileSizeBytes = fileSizeBytes, - error = None, - generationTimeMs = generationTimeMs - ) - }.recover { - case e: java.io.IOException => - writer.close() - logger.error(s"Export generation failed due to I/O error: ${e.getMessage}", e) - ExportResult( - success = false, - error = Some(s"I/O error: ${e.getMessage}"), - generationTimeMs = System.currentTimeMillis() - startTime - ) - } - } - } - - /** - * Convert a VariantV2 to an export record. - */ - private def variantToExportRecord(variant: VariantV2): VariantExportRecord = { - // Extract coordinates from JSONB - val coordinates = variant.coordinates.asOpt[Map[String, JsObject]].getOrElse(Map.empty) - - val coordDtos = coordinates.flatMap { case (refGenome, coords) => - for { - contig <- (coords \ "contig").asOpt[String] - position <- (coords \ "position").asOpt[Int] - ref <- (coords \ "ref").asOpt[String] - alt <- (coords \ "alt").asOpt[String] - } yield refGenome -> VariantCoordinateDTO( - contig = contig, - position = position, - ref = ref, - alt = alt - ) - } - - // Extract aliases from JSONB - val rsIds = variant.rsIds - val commonNames = variant.commonNames - - VariantExportRecord( - variantId = variant.variantId.getOrElse(0), - canonicalName = variant.canonicalName, - variantType = variant.mutationType.dbValue, - namingStatus = variant.namingStatus.dbValue, - coordinates = coordDtos, - rsIds = rsIds, - commonNames = commonNames - ) - } -} diff --git a/app/services/VariantPublicApiService.scala b/app/services/VariantPublicApiService.scala deleted file mode 100644 index fd8055a7..00000000 --- a/app/services/VariantPublicApiService.scala +++ /dev/null @@ -1,182 +0,0 @@ -package services - -import jakarta.inject.{Inject, Singleton} -import models.api.* -import models.domain.genomics.VariantV2 -import play.api.cache.AsyncCacheApi -import play.api.libs.json.JsObject -import repositories.{HaplogroupVariantRepository, VariantV2Repository} - -import scala.concurrent.duration.* -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service for the public Variant API. - * Transforms internal VariantV2 models to forward-compatible API DTOs. - * Results are cached for performance. - * - * With the consolidated VariantV2 schema, this service is much simpler: - * - No grouping logic needed (variants are already consolidated) - * - Aliases are embedded in JSONB (no separate repository) - * - Coordinates for all assemblies are in one row - */ -@Singleton -class VariantPublicApiService @Inject()( - variantV2Repository: VariantV2Repository, - haplogroupVariantRepository: HaplogroupVariantRepository, - cache: AsyncCacheApi -)(implicit ec: ExecutionContext) { - - private val SearchCacheDuration = 10.minutes - private val DetailCacheDuration = 30.minutes - - /** - * Search variants with pagination, returning API DTOs. - */ - def searchVariants(query: Option[String], page: Int, pageSize: Int): Future[VariantSearchResponse] = { - val cacheKey = s"api-variant-search:${query.getOrElse("").toLowerCase.trim}:$page:$pageSize" - - cache.getOrElseUpdate(cacheKey, SearchCacheDuration) { - val offset = (page - 1) * pageSize - - for { - (variants, totalCount) <- variantV2Repository.searchPaginated(query.getOrElse(""), offset, pageSize) - dtos <- Future.traverse(variants)(variantToDto) - } yield { - val totalPages = Math.max(1, ((totalCount + pageSize - 1) / pageSize)) - VariantSearchResponse( - items = dtos, - currentPage = page, - pageSize = pageSize, - totalItems = totalCount, - totalPages = totalPages - ) - } - } - } - - /** - * Get a single variant by ID. - */ - def getVariantById(variantId: Int): Future[Option[PublicVariantDTO]] = { - val cacheKey = s"api-variant-detail:$variantId" - - cache.getOrElseUpdate(cacheKey, DetailCacheDuration) { - for { - variantOpt <- variantV2Repository.findById(variantId) - result <- variantOpt match { - case Some(variant) => - for { - haplogroups <- haplogroupVariantRepository.getHaplogroupsByVariant(variantId) - } yield Some(buildDto(variant, haplogroups.headOption)) - case None => - Future.successful(None) - } - } yield result - } - } - - /** - * Get variants defining a specific haplogroup. - */ - def getVariantsByHaplogroup(haplogroupName: String): Future[Seq[PublicVariantDTO]] = { - val cacheKey = s"api-variants-by-haplogroup:$haplogroupName" - - cache.getOrElseUpdate(cacheKey, DetailCacheDuration) { - haplogroupVariantRepository.getVariantsByHaplogroupName(haplogroupName).map { variants => - variants.map(v => buildDto(v, None)) // Haplogroup already known from context - } - } - } - - /** - * Transform a VariantV2 to a PublicVariantDTO. - */ - private def variantToDto(variant: VariantV2): Future[PublicVariantDTO] = { - val variantId = variant.variantId.getOrElse(0) - - for { - haplogroups <- if (variantId > 0) haplogroupVariantRepository.getHaplogroupsByVariant(variantId) else Future.successful(Seq.empty) - } yield buildDto(variant, haplogroups.headOption) - } - - /** - * Build the DTO from a VariantV2. - * - * With VariantV2, the transformation is straightforward: - * - Coordinates come directly from JSONB - * - Aliases come directly from JSONB - * - No grouping or joining needed - */ - private def buildDto( - variant: VariantV2, - definingHaplogroup: Option[models.domain.haplogroups.Haplogroup] - ): PublicVariantDTO = { - - // Extract coordinates from JSONB - one entry per reference genome - val coordinates: Map[String, VariantCoordinateDTO] = variant.coordinates.asOpt[Map[String, JsObject]].map { coordsMap => - coordsMap.flatMap { case (refGenome, coords) => - for { - contig <- (coords \ "contig").asOpt[String] - position <- (coords \ "position").asOpt[Int] - ref <- (coords \ "ref").asOpt[String] - alt <- (coords \ "alt").asOpt[String] - } yield refGenome -> VariantCoordinateDTO( - contig = contig, - position = position, - ref = ref, - alt = alt - ) - } - }.getOrElse(Map.empty) - - // Extract aliases from JSONB - val aliasesDto = buildAliasesDto(variant) - - // Build defining haplogroup DTO - val definingHaplogroupDto = definingHaplogroup.map { hg => - DefiningHaplogroupDTO( - haplogroupId = hg.id.get, - haplogroupName = hg.name - ) - } - - PublicVariantDTO( - variantId = variant.variantId.getOrElse(0), - canonicalName = variant.canonicalName, - variantType = variant.mutationType.dbValue, - namingStatus = variant.namingStatus.dbValue, - coordinates = coordinates, - aliases = aliasesDto, - definingHaplogroup = definingHaplogroupDto - ) - } - - /** - * Build aliases DTO from VariantV2 JSONB aliases field. - */ - private def buildAliasesDto(variant: VariantV2): VariantAliasesDTO = { - val aliases = variant.aliases - - // Extract common_names array - val commonNames = (aliases \ "common_names").asOpt[Seq[String]].getOrElse(Seq.empty) - - // Extract rs_ids array - val rsIds = (aliases \ "rs_ids").asOpt[Seq[String]].getOrElse(Seq.empty) - - // Extract sources map - val sources = (aliases \ "sources").asOpt[Map[String, Seq[String]]].getOrElse(Map.empty) - - // Include canonical name if not already in common_names - val allCommonNames = variant.canonicalName match { - case Some(name) if !commonNames.contains(name) => name +: commonNames - case _ => commonNames - } - - VariantAliasesDTO( - commonNames = allCommonNames, - rsIds = rsIds, - sources = sources - ) - } -} diff --git a/app/services/ena/EnaApiClient.scala b/app/services/ena/EnaApiClient.scala deleted file mode 100644 index 484eb833..00000000 --- a/app/services/ena/EnaApiClient.scala +++ /dev/null @@ -1,145 +0,0 @@ -package services.ena - -import org.apache.pekko.stream.Materializer -import play.api.Logging -import play.api.libs.json.{JsArray, JsValue} -import play.api.libs.ws.WSClient - -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -case class EnaStudyData( - accession: String, - title: String, - centerName: String, - studyName: String, - details: String - ) - -case class EnaBiosampleData( - sampleAccession: String, - description: String, - alias: Option[String], - centerName: String, - sex: Option[String], - latitude: Option[Double], - longitude: Option[Double], - collectionDate: Option[String] - ) - -/** - * A client for interacting with the ENA (European Nucleotide Archive) portal API. - * - * This class provides methods to fetch study details and biosample data from the ENA portal API. - * - * The API client utilizes a WSClient implementation for making HTTP requests - * and is designed for asynchronous operations. - * - * @param ws the WSClient instance for making HTTP requests - * @param ec the execution context used for asynchronous operations - * @param mat the materializer required for certain asynchronous processing - */ -@Singleton -class EnaApiClient @Inject()(ws: WSClient)(implicit ec: ExecutionContext, mat: Materializer) extends Logging { - private val enaPortalApiBaseUrl = "https://www.ebi.ac.uk/ena/portal/api/search" - private val ValidSexValues = Set("male", "female", "intersex") - - /** - * Retrieves detailed information about a study from ENA (European Nucleotide Archive) - * based on a given accession identifier. - * - * @param accession The unique accession identifier for the study in ENA. - * @return A Future containing an Option of EnaStudyData. The Option is None if the - * study details are not found or there is an error in the API call. EnaStudyData - * provides metadata about the study, including accession, title, center name, - * study name, and description. - */ - def getStudyDetails(accession: String): Future[Option[EnaStudyData]] = { - val query = s"study_accession=$accession" - val fields = "study_accession,study_title,center_name,study_name,study_description" - - ws.url(enaPortalApiBaseUrl) - .withQueryStringParameters( - "result" -> "study", - "query" -> query, - "fields" -> fields, - "format" -> "json" - ) - .get() - .map { response => - response.status match { - case 200 => - val jsonArray = response.json.as[JsArray] - jsonArray.value.headOption.map { studyJson => - EnaStudyData( - accession = (studyJson \ "study_accession").as[String], - title = (studyJson \ "study_title").as[String], - centerName = (studyJson \ "center_name").asOpt[String].getOrElse("N/A"), - studyName = (studyJson \ "study_name").asOpt[String].getOrElse("N/A"), - details = (studyJson \ "study_description").asOpt[String].getOrElse("") - ) - } - case _ => - logger.error(s"Error fetching ENA study $accession: ${response.status} - ${response.body}") - None - } - } - .recover { - case e: Exception => - logger.error(s"Exception during ENA API call for $accession: $e") - None - } - } - - /** - * Fetches biosample metadata associated with a specific study accession from the ENA (European Nucleotide Archive). - * - * @param studyAccession The unique accession identifier for the study in ENA. - * @return A Future containing a sequence of EnaBiosampleData objects. If no biosamples are found or an error occurs, - * the sequence will be empty. - */ - def getBiosamples(studyAccession: String): Future[Seq[EnaBiosampleData]] = { - val fields = "sample_accession,description,sample_alias,center_name,sex,lat,lon,collection_date" - - ws.url(enaPortalApiBaseUrl) - .withQueryStringParameters( - "result" -> "sample", - "query" -> s"study_accession=$studyAccession", - "fields" -> fields, - "format" -> "json", - "limit" -> "0" - ) - .get() - .map { response => - response.status match { - case 200 => - val jsonArray = response.json.as[JsArray] - jsonArray.value.map { sampleJson => - EnaBiosampleData( - sampleAccession = (sampleJson \ "sample_accession").as[String], - description = (sampleJson \ "description").asOpt[String].getOrElse(""), - alias = (sampleJson \ "sample_alias").asOpt[String], - centerName = (sampleJson \ "center_name").asOpt[String].getOrElse("N/A"), - sex = (sampleJson \ "sex").asOpt[String].flatMap(validateSex), - latitude = (sampleJson \ "lat").asOpt[String].flatMap(_.toDoubleOption), - longitude = (sampleJson \ "lon").asOpt[String].flatMap(_.toDoubleOption), - collectionDate = (sampleJson \ "collection_date").asOpt[String] - ) - }.toSeq - case _ => - logger.error(s"Error fetching ENA samples for study $studyAccession: ${response.status} - ${response.body}") - Seq.empty - } - } - .recover { - case e: Exception => - logger.error(s"Exception during ENA samples API call for $studyAccession: $e") - Seq.empty - } - } - - private def validateSex(sex: String): Option[String] = { - val normalized = sex.toLowerCase.trim - Some(normalized).filter(ValidSexValues.contains) - } -} \ No newline at end of file diff --git a/app/services/firehose/AtmosphereEventHandler.scala b/app/services/firehose/AtmosphereEventHandler.scala deleted file mode 100644 index 5633edfa..00000000 --- a/app/services/firehose/AtmosphereEventHandler.scala +++ /dev/null @@ -1,1174 +0,0 @@ -package services.firehose - -import jakarta.inject.{Inject, Singleton} -import models.atmosphere.* -import models.domain.{GroupProject, GroupProjectMember, Project} -import models.domain.genomics.* -import models.domain.ibd.{MatchConsentTracking, MatchRequestTracking} -import play.api.Logging -import repositories.* -import services.TestTypeService -import services.ibd.PopulationAnalysisService - -import java.time.{LocalDateTime, ZoneId, ZonedDateTime} -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -/** - * Handles Atmosphere Lexicon events (Phase 3). - * Processes granular records: Biosample, SequenceRun, Alignment, etc. - */ -@Singleton -class AtmosphereEventHandler @Inject()( - citizenBiosampleRepository: CitizenBiosampleRepository, - sequenceLibraryRepository: SequenceLibraryRepository, - sequenceFileRepository: SequenceFileRepository, - alignmentRepository: AlignmentRepository, - specimenDonorRepository: SpecimenDonorRepository, - projectRepository: ProjectRepository, - testTypeService: TestTypeService, - genotypeDataRepository: GenotypeDataRepository, - populationBreakdownRepository: PopulationBreakdownRepository, - haplogroupReconciliationRepository: HaplogroupReconciliationRepository, - instrumentObservationRepository: InstrumentObservationRepository, - groupProjectRepository: GroupProjectRepository, - groupProjectMemberRepository: GroupProjectMemberRepository, - matchConsentTrackingRepository: MatchConsentTrackingRepository, - matchRequestTrackingRepository: MatchRequestTrackingRepository, - populationAnalysisService: PopulationAnalysisService - )(implicit ec: ExecutionContext) extends Logging { - - def handle(event: FirehoseEvent): Future[FirehoseResult] = { - event match { - case e: BiosampleEvent => handleBiosample(e) - case e: SequenceRunEvent => handleSequenceRun(e) - case e: AlignmentEvent => handleAlignment(e) - case e: AtmosphereProjectEvent => handleProject(e) - case e: GenotypeEvent => handleGenotype(e) - case e: PopulationBreakdownEvent => handlePopulationBreakdown(e) - case e: HaplogroupReconciliationEvent => handleHaplogroupReconciliation(e) - case e: InstrumentObservationEvent => handleInstrumentObservation(e) - case e: GroupProjectEvent => handleGroupProject(e) - case e: ProjectMembershipEvent => handleProjectMembership(e) - case e: MatchConsentEvent => handleMatchConsent(e) - case e: MatchRequestEvent => handleMatchRequest(e) - case _ => - logger.warn(s"Unhandled event type: ${event.getClass.getSimpleName} for ${event.atUri}") - Future.successful(FirehoseResult.Success(event.atUri, "", None, "Ignored (Not Implemented)")) - } - } - - // --- Biosample Handling --- - - private def handleBiosample(event: BiosampleEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create => createBiosample(event) - case FirehoseAction.Update => updateBiosample(event) - case FirehoseAction.Delete => deleteBiosample(event) - } - } - - private def createBiosample(event: BiosampleEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - for { - donorId <- resolveOrCreateDonor(record) - sampleGuid = UUID.randomUUID() - newAtCid = UUID.randomUUID().toString // In real app, use record's CID if available or generated - - citizenBiosample = CitizenBiosample( - id = None, - atUri = Some(record.atUri), - accession = record.sampleAccession, - alias = None, - sourcePlatform = Some(record.centerName), - collectionDate = None, - sex = record.sex.map(s => models.domain.genomics.BiologicalSex.fromString(s)), - geocoord = None, - description = record.description, - yHaplogroup = record.haplogroups.flatMap(_.yDna).map(h => models.domain.genomics.HaplogroupResult( - h.haplogroupName, - h.score, - h.matchingSnps.getOrElse(0), - h.mismatchingSnps.getOrElse(0), - h.ancestralMatches.getOrElse(0), - h.treeDepth.getOrElse(0), - h.lineagePath.getOrElse(Seq.empty) - )), - mtHaplogroup = record.haplogroups.flatMap(_.mtDna).map(h => models.domain.genomics.HaplogroupResult( - h.haplogroupName, - h.score, - h.matchingSnps.getOrElse(0), - h.mismatchingSnps.getOrElse(0), - h.ancestralMatches.getOrElse(0), - h.treeDepth.getOrElse(0), - h.lineagePath.getOrElse(Seq.empty) - )), - sampleGuid = sampleGuid, - deleted = false, - atCid = Some(newAtCid), - createdAt = LocalDateTime.ofInstant(record.meta.createdAt, ZoneId.systemDefault()), - updatedAt = LocalDateTime.ofInstant(record.meta.updatedAt.getOrElse(record.meta.createdAt), ZoneId.systemDefault()), - specimenDonorId = donorId - ) - - created <- citizenBiosampleRepository.create(citizenBiosample) - } yield FirehoseResult.Success(event.atUri, newAtCid, Some(created.sampleGuid), "Created Biosample") - - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required for create")) - } - } - - private def updateBiosample(event: BiosampleEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - citizenBiosampleRepository.findByAtUri(record.atUri).flatMap { - case Some(existing) => - resolveOrCreateDonor(record).flatMap { donorId => - val updated = existing.copy( - description = record.description.orElse(existing.description), - sourcePlatform = Some(record.centerName), - sex = record.sex.map(s => models.domain.genomics.BiologicalSex.fromString(s)).orElse(existing.sex), - yHaplogroup = record.haplogroups.flatMap(_.yDna).map(h => models.domain.genomics.HaplogroupResult( - h.haplogroupName, h.score, h.matchingSnps.getOrElse(0), h.mismatchingSnps.getOrElse(0), - h.ancestralMatches.getOrElse(0), h.treeDepth.getOrElse(0), h.lineagePath.getOrElse(Seq.empty) - )).orElse(existing.yHaplogroup), - mtHaplogroup = record.haplogroups.flatMap(_.mtDna).map(h => models.domain.genomics.HaplogroupResult( - h.haplogroupName, h.score, h.matchingSnps.getOrElse(0), h.mismatchingSnps.getOrElse(0), - h.ancestralMatches.getOrElse(0), h.treeDepth.getOrElse(0), h.lineagePath.getOrElse(Seq.empty) - )).orElse(existing.mtHaplogroup), - atCid = Some(UUID.randomUUID().toString), - updatedAt = LocalDateTime.now(), - specimenDonorId = donorId - ) - citizenBiosampleRepository.update(updated, existing.atCid).map { success => - if (success) FirehoseResult.Success(event.atUri, updated.atCid.get, Some(updated.sampleGuid), "Updated Biosample") - else FirehoseResult.Conflict(event.atUri, "Update failed (optimistic locking)") - } - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required for update")) - } - } - - private def deleteBiosample(event: BiosampleEvent): Future[FirehoseResult] = { - citizenBiosampleRepository.softDeleteByAtUri(event.atUri).map { - case true => FirehoseResult.Success(event.atUri, "", None, "Deleted") - case false => FirehoseResult.NotFound(event.atUri) - } - } - - // --- Sequence Run Handling --- - - private def handleSequenceRun(event: SequenceRunEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create => createSequenceRun(event) - case FirehoseAction.Update => updateSequenceRun(event) - case FirehoseAction.Delete => deleteSequenceRun(event) - } - } - - private def createSequenceRun(event: SequenceRunEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - citizenBiosampleRepository.findByAtUri(record.biosampleRef).flatMap { - case Some(biosample) => - testTypeService.getByCode(record.testType).flatMap { - case Some(testTypeRow) => - val testTypeId = testTypeRow.id.getOrElse(throw new IllegalStateException("TestTypeRow ID not found")) - val lib = SequenceLibrary( - id = None, - sampleGuid = biosample.sampleGuid, - lab = record.platformName, - testTypeId = testTypeId, // <--- Changed to testTypeId - runDate = record.runDate.map(d => LocalDateTime.ofInstant(d, ZoneId.systemDefault())).getOrElse(LocalDateTime.now()), - instrument = record.instrumentModel.getOrElse("Unknown"), - reads = record.totalReads.getOrElse(0), - readLength = record.readLength.getOrElse(0), - pairedEnd = record.libraryLayout.exists(_.equalsIgnoreCase("PAIRED")), - insertSize = record.meanInsertSize.map(_.toInt), - atUri = Some(record.atUri), - atCid = Some(UUID.randomUUID().toString), - created_at = LocalDateTime.now(), - updated_at = Some(LocalDateTime.now()) - ) - - sequenceLibraryRepository.create(lib).map { _ => - FirehoseResult.Success(event.atUri, lib.atCid.get, None, "Sequence Run Created") - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, s"Invalid test type code: ${record.testType}")) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, s"Parent biosample not found: ${record.biosampleRef}")) - } - case None => Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def updateSequenceRun(event: SequenceRunEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - sequenceLibraryRepository.findByAtUri(record.atUri).flatMap { - case Some(existing) => - testTypeService.getByCode(record.testType).flatMap { - case Some(testTypeRow) => - val testTypeId = testTypeRow.id.getOrElse(throw new IllegalStateException("TestTypeRow ID not found")) - val updated = existing.copy( - lab = record.platformName, - testTypeId = testTypeId, // <--- Changed to testTypeId - runDate = record.runDate.map(d => LocalDateTime.ofInstant(d, ZoneId.systemDefault())).getOrElse(existing.runDate), - instrument = record.instrumentModel.getOrElse(existing.instrument), - reads = record.totalReads.getOrElse(existing.reads), - readLength = record.readLength.getOrElse(existing.readLength), - pairedEnd = record.libraryLayout.exists(_.equalsIgnoreCase("PAIRED")), - insertSize = record.meanInsertSize.map(_.toInt).orElse(existing.insertSize), - atCid = Some(UUID.randomUUID().toString), - updated_at = Some(LocalDateTime.now()) - ) - sequenceLibraryRepository.update(updated).map { _ => - FirehoseResult.Success(event.atUri, updated.atCid.get, None, "Sequence Run Updated") - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, s"Invalid test type code: ${record.testType}")) - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def deleteSequenceRun(event: SequenceRunEvent): Future[FirehoseResult] = { - sequenceLibraryRepository.deleteByAtUri(event.atUri).map { - case true => FirehoseResult.Success(event.atUri, "", None, "Sequence Run Deleted") - case false => FirehoseResult.NotFound(event.atUri) - } - } - - // --- Alignment Handling --- - - private def handleAlignment(event: AlignmentEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create => createAlignment(event) - case FirehoseAction.Update => updateAlignment(event) - case FirehoseAction.Delete => deleteAlignment(event) - } - } - - private def createAlignment(event: AlignmentEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - sequenceLibraryRepository.findByAtUri(record.sequenceRunRef).flatMap { - case Some(library) => - val libraryId = library.id.getOrElse(throw new IllegalStateException("Library ID missing")) - - val fileName = record.files.flatMap(_.headOption).map(_.fileName).getOrElse(s"alignment-${UUID.randomUUID()}") - - val checksumsJsonb = record.files.flatMap(_.headOption).flatMap(_.checksum.map { - cs => - models.domain.genomics.SequenceFileChecksumJsonb( - checksum = cs, - algorithm = record.files.flatMap(_.headOption.flatMap(_.checksumAlgorithm)).getOrElse("UNKNOWN"), - verifiedAt = Some(LocalDateTime.now()), - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - }).toList - - val httpLocationsJsonb = record.files.flatMap(_.headOption).flatMap(_.location.map { - loc => - models.domain.genomics.SequenceFileHttpLocationJsonb( - url = loc, - urlHash = UUID.nameUUIDFromBytes(loc.getBytes).toString, // Generate hash from URL - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - }).toList - - val seqFile = models.domain.genomics.SequenceFile( - id = None, - libraryId = libraryId, - fileName = fileName, - fileSizeBytes = record.files.flatMap(_.headOption.flatMap(_.fileSizeBytes)).getOrElse(0L), // Use actual file size if available - fileFormat = "BAM/CRAM", // Or derive from record.files if available - checksums = checksumsJsonb, - httpLocations = httpLocationsJsonb, - atpLocation = None, // No direct mapping from FileInfo - aligner = record.aligner, - targetReference = record.referenceBuild, - createdAt = LocalDateTime.now(), - updatedAt = Some(LocalDateTime.now()) - ) - - sequenceFileRepository.create(seqFile).flatMap { createdFile => - val metadata = AlignmentMetadata( - id = None, - sequenceFileId = createdFile.id.get, - genbankContigId = 0, // Needs resolution or specific contig logic, assuming global stats for now? - metricLevel = MetricLevel.GLOBAL, // Assuming global stats - referenceBuild = Some(record.referenceBuild), - variantCaller = record.variantCaller, - genomeTerritory = record.metrics.flatMap(_.genomeTerritory), - meanCoverage = record.metrics.flatMap(_.meanCoverage), - medianCoverage = record.metrics.flatMap(_.medianCoverage), - sdCoverage = record.metrics.flatMap(_.sdCoverage), - pctExcDupe = record.metrics.flatMap(_.pctExcDupe), - pctExcMapq = record.metrics.flatMap(_.pctExcMapq), - pct10x = record.metrics.flatMap(_.pct10x), - pct20x = record.metrics.flatMap(_.pct20x), - pct30x = record.metrics.flatMap(_.pct30x), - hetSnpSensitivity = record.metrics.flatMap(_.hetSnpSensitivity), - analysisTool = record.aligner, - metricsDate = LocalDateTime.now() - ) - - alignmentRepository.createMetadata(metadata).map { _ => - FirehoseResult.Success(event.atUri, "cid", None, "Alignment Created") - } - } - - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, s"Sequence Run not found: ${record.sequenceRunRef}")) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def updateAlignment(event: AlignmentEvent): Future[FirehoseResult] = { - // Implementing update logic for alignment is complex due to file dependencies. - // For now, returning success as placeholder. - Future.successful(FirehoseResult.Success(event.atUri, "", None, "Alignment Update Not Fully Implemented")) - } - - private def deleteAlignment(event: AlignmentEvent): Future[FirehoseResult] = { - // Requires finding by AT URI, but AlignmentMetadata doesn't have AT URI yet. - // Assuming for now we can't delete by AT URI directly without schema change. - Future.successful(FirehoseResult.Success(event.atUri, "", None, "Alignment Delete Not Fully Implemented")) - } - - // --- Project Handling --- - - private def handleProject(event: AtmosphereProjectEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create => createProject(event) - case FirehoseAction.Update => updateProject(event) - case FirehoseAction.Delete => deleteProject(event) - } - } - - private def createProject(event: AtmosphereProjectEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - val newAtCid = UUID.randomUUID().toString - val project = Project( - id = None, - projectGuid = UUID.randomUUID(), - name = record.projectName, - description = record.description, - ownerDid = record.administrator, - atUri = Some(record.atUri), - atCid = Some(newAtCid), - createdAt = LocalDateTime.ofInstant(record.meta.createdAt, ZoneId.systemDefault()), - updatedAt = LocalDateTime.now(), - deleted = false - ) - projectRepository.create(project).map { _ => - FirehoseResult.Success(event.atUri, newAtCid, None, "Project Created") - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def updateProject(event: AtmosphereProjectEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - projectRepository.findByAtUri(record.atUri).flatMap { - case Some(existing) => - val updated = existing.copy( - name = record.projectName, - description = record.description, - ownerDid = record.administrator, - atCid = Some(UUID.randomUUID().toString), - updatedAt = LocalDateTime.now() - ) - projectRepository.update(updated, existing.atCid).map { success => - if (success) FirehoseResult.Success(event.atUri, updated.atCid.get, None, "Project Updated") - else FirehoseResult.Conflict(event.atUri, "Update failed") - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def deleteProject(event: AtmosphereProjectEvent): Future[FirehoseResult] = { - projectRepository.softDeleteByAtUri(event.atUri).map { - case true => FirehoseResult.Success(event.atUri, "", None, "Project Deleted") - case false => FirehoseResult.NotFound(event.atUri) - } - } - - // --- Genotype Handling --- - - private def handleGenotype(event: GenotypeEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create => createGenotype(event) - case FirehoseAction.Update => updateGenotype(event) - case FirehoseAction.Delete => deleteGenotype(event) - } - } - - private def createGenotype(event: GenotypeEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - citizenBiosampleRepository.findByAtUri(record.biosampleRef).flatMap { - case Some(biosample) => - testTypeService.getByCode(record.testTypeCode).flatMap { - case Some(testTypeRow) => - val metrics = GenotypeMetrics( - totalMarkersCalled = record.totalMarkersCalled, - totalMarkersPossible = record.totalMarkersPossible, - callRate = record.callRate, - noCallRate = record.noCallRate, - yMarkersCalled = record.yMarkersCalled, - yMarkersTotal = record.yMarkersTotal, - mtMarkersCalled = record.mtMarkersCalled, - mtMarkersTotal = record.mtMarkersTotal, - autosomalMarkersCalled = record.autosomalMarkersCalled, - hetRate = record.hetRate, - testDate = record.testDate.map(i => LocalDateTime.ofInstant(i, ZoneId.systemDefault())), - processedAt = record.processedAt.map(i => LocalDateTime.ofInstant(i, ZoneId.systemDefault())), - derivedYHaplogroup = record.derivedHaplogroups.flatMap(_.yDna).map(h => models.domain.genomics.HaplogroupResult( - h.haplogroupName, h.score, h.matchingSnps.getOrElse(0), h.mismatchingSnps.getOrElse(0), - h.ancestralMatches.getOrElse(0), h.treeDepth.getOrElse(0), h.lineagePath.getOrElse(Seq.empty) - )), - derivedMtHaplogroup = record.derivedHaplogroups.flatMap(_.mtDna).map(h => models.domain.genomics.HaplogroupResult( - h.haplogroupName, h.score, h.matchingSnps.getOrElse(0), h.mismatchingSnps.getOrElse(0), - h.ancestralMatches.getOrElse(0), h.treeDepth.getOrElse(0), h.lineagePath.getOrElse(Seq.empty) - )), - files = record.files - ) - - val genotypeData = GenotypeData( - id = None, - atUri = Some(record.atUri), - atCid = Some(UUID.randomUUID().toString), - sampleGuid = biosample.sampleGuid, - testTypeId = testTypeRow.id, - provider = Some(record.provider), - chipVersion = record.chipVersion, - buildVersion = record.buildVersion, - sourceFileHash = record.sourceFileHash, - metrics = metrics, - populationBreakdownId = None, // Will be linked when PopulationBreakdown is created - deleted = false, - createdAt = LocalDateTime.ofInstant(record.meta.createdAt, ZoneId.systemDefault()), - updatedAt = LocalDateTime.now() - ) - - genotypeDataRepository.create(genotypeData).map { created => - FirehoseResult.Success(event.atUri, created.atCid.getOrElse(""), Some(created.sampleGuid), "Genotype Created") - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, s"Invalid test type code: ${record.testTypeCode}")) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, s"Biosample not found: ${record.biosampleRef}")) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def updateGenotype(event: GenotypeEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - genotypeDataRepository.findByAtUri(record.atUri).flatMap { - case Some(existing) => - val updatedMetrics = existing.metrics.copy( - totalMarkersCalled = record.totalMarkersCalled.orElse(existing.metrics.totalMarkersCalled), - totalMarkersPossible = record.totalMarkersPossible.orElse(existing.metrics.totalMarkersPossible), - callRate = record.callRate.orElse(existing.metrics.callRate), - noCallRate = record.noCallRate.orElse(existing.metrics.noCallRate), - testDate = record.testDate.map(i => LocalDateTime.ofInstant(i, ZoneId.systemDefault())).orElse(existing.metrics.testDate), - processedAt = record.processedAt.map(i => LocalDateTime.ofInstant(i, ZoneId.systemDefault())).orElse(existing.metrics.processedAt), - files = record.files.orElse(existing.metrics.files) - ) - - val updated = existing.copy( - provider = Some(record.provider), - chipVersion = record.chipVersion.orElse(existing.chipVersion), - buildVersion = record.buildVersion.orElse(existing.buildVersion), - sourceFileHash = record.sourceFileHash.orElse(existing.sourceFileHash), - metrics = updatedMetrics, - atCid = Some(UUID.randomUUID().toString), - updatedAt = LocalDateTime.now() - ) - - genotypeDataRepository.update(updated).map { success => - if (success) FirehoseResult.Success(event.atUri, updated.atCid.getOrElse(""), Some(updated.sampleGuid), "Genotype Updated") - else FirehoseResult.Conflict(event.atUri, "Update failed") - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def deleteGenotype(event: GenotypeEvent): Future[FirehoseResult] = { - genotypeDataRepository.findByAtUri(event.atUri).flatMap { - case Some(existing) => - genotypeDataRepository.softDelete(existing.id.get).map { - case true => FirehoseResult.Success(event.atUri, "", None, "Genotype Deleted") - case false => FirehoseResult.NotFound(event.atUri) - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - } - - // --- Population Breakdown Handling --- - - private def handlePopulationBreakdown(event: PopulationBreakdownEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create => createPopulationBreakdown(event) - case FirehoseAction.Update => updatePopulationBreakdown(event) - case FirehoseAction.Delete => deletePopulationBreakdown(event) - } - } - - private def createPopulationBreakdown(event: PopulationBreakdownEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - citizenBiosampleRepository.findByAtUri(record.biosampleRef).flatMap { - case Some(biosample) => - val pcaCoords = record.pcaCoordinates.map { coords => - PcaCoordinatesJsonb( - coords.headOption.getOrElse(0.0), - coords.lift(1).getOrElse(0.0), - coords.lift(2).getOrElse(0.0) - ) - } - - val breakdown = PopulationBreakdown( - id = None, - atUri = Some(record.atUri), - atCid = Some(UUID.randomUUID().toString), - sampleGuid = biosample.sampleGuid, - analysisMethod = record.analysisMethod, - panelType = record.panelType, - referencePopulations = record.referencePopulations, - referenceVersion = record.referenceVersion, - snpsAnalyzed = record.snpsAnalyzed, - snpsWithGenotype = record.snpsWithGenotype, - snpsMissing = record.snpsMissing, - confidenceLevel = record.confidenceLevel, - pcaCoordinates = pcaCoords, - analysisDate = record.analysisDate.map(i => LocalDateTime.ofInstant(i, ZoneId.systemDefault())), - pipelineVersion = record.pipelineVersion, - deleted = false, - createdAt = LocalDateTime.ofInstant(record.meta.createdAt, ZoneId.systemDefault()), - updatedAt = LocalDateTime.now() - ) - - populationBreakdownRepository.create(breakdown).flatMap { created => - // Create population components - val componentsFuture = populationBreakdownRepository.upsertComponentsByBreakdownId( - created.id.get, - record.components.map { c => - models.domain.genomics.PopulationComponent( - id = None, - populationBreakdownId = created.id.get, - populationCode = c.populationCode, - populationName = c.populationName, - superPopulation = c.superPopulation, - percentage = c.percentage, - confidenceLower = c.confidenceInterval.flatMap(_.get("lower")), - confidenceUpper = c.confidenceInterval.flatMap(_.get("upper")), - rank = c.rank - ) - } - ) - - // Create super population summaries if present - val summariesFuture = record.superPopulationSummary match { - case Some(summaries) => - populationBreakdownRepository.upsertSummariesByBreakdownId( - created.id.get, - summaries.map { s => - models.domain.genomics.SuperPopulationSummary( - id = None, - populationBreakdownId = created.id.get, - superPopulation = s.superPopulation, - percentage = s.percentage, - populations = Some(SuperPopulationListJsonb(s.populations)) - ) - } - ) - case None => Future.successful(Seq.empty) - } - - for { - _ <- componentsFuture - _ <- summariesFuture - } yield FirehoseResult.Success(event.atUri, created.atCid.getOrElse(""), Some(created.sampleGuid), "Population Breakdown Created") - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, s"Biosample not found: ${record.biosampleRef}")) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def updatePopulationBreakdown(event: PopulationBreakdownEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - populationBreakdownRepository.findByAtUri(record.atUri).flatMap { - case Some(existing) => - val updated = existing.copy( - analysisMethod = record.analysisMethod, - panelType = record.panelType.orElse(existing.panelType), - referencePopulations = record.referencePopulations.orElse(existing.referencePopulations), - confidenceLevel = record.confidenceLevel.orElse(existing.confidenceLevel), - atCid = Some(UUID.randomUUID().toString), - updatedAt = LocalDateTime.now() - ) - - for { - success <- populationBreakdownRepository.update(updated) - _ <- populationBreakdownRepository.upsertComponentsByBreakdownId( - existing.id.get, - record.components.map { c => - models.domain.genomics.PopulationComponent( - id = None, - populationBreakdownId = existing.id.get, - populationCode = c.populationCode, - populationName = c.populationName, - superPopulation = c.superPopulation, - percentage = c.percentage, - confidenceLower = c.confidenceInterval.flatMap(_.get("lower")), - confidenceUpper = c.confidenceInterval.flatMap(_.get("upper")), - rank = c.rank - ) - } - ) - } yield { - if (success) FirehoseResult.Success(event.atUri, updated.atCid.getOrElse(""), Some(updated.sampleGuid), "Population Breakdown Updated") - else FirehoseResult.Conflict(event.atUri, "Update failed") - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def deletePopulationBreakdown(event: PopulationBreakdownEvent): Future[FirehoseResult] = { - populationBreakdownRepository.findByAtUri(event.atUri).flatMap { - case Some(existing) => - populationBreakdownRepository.softDelete(existing.id.get).map { - case true => FirehoseResult.Success(event.atUri, "", None, "Population Breakdown Deleted") - case false => FirehoseResult.NotFound(event.atUri) - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - } - - // --- Haplogroup Reconciliation Handling --- - - private def handleHaplogroupReconciliation(event: HaplogroupReconciliationEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create => createHaplogroupReconciliation(event) - case FirehoseAction.Update => updateHaplogroupReconciliation(event) - case FirehoseAction.Delete => deleteHaplogroupReconciliation(event) - } - } - - private def createHaplogroupReconciliation(event: HaplogroupReconciliationEvent): Future[FirehoseResult] = { - import play.api.libs.json.Json - - event.payload match { - case Some(record) => - // Resolve specimen donor - could be by AT URI or identifier - specimenDonorRepository.findByAtUri(record.specimenDonorRef).flatMap { - case Some(donor) => - val dnaType = DnaType.fromString(record.dnaType).getOrElse( - throw new IllegalArgumentException(s"Invalid DNA type: ${record.dnaType}") - ) - - val status = models.domain.genomics.ReconciliationStatus( - compatibilityLevel = Some(record.status.compatibilityLevel), - consensusHaplogroup = Some(record.status.consensusHaplogroup), - statusConfidence = record.status.confidence, - branchCompatibilityScore = record.status.branchCompatibilityScore, - snpConcordance = record.status.snpConcordance, - runCount = record.status.runCount, - warnings = record.status.warnings - ) - - val reconciliation = HaplogroupReconciliation( - id = None, - atUri = Some(record.atUri), - atCid = Some(UUID.randomUUID().toString), - specimenDonorId = donor.id.get, - dnaType = dnaType, - status = status, - runCalls = Json.toJson(record.runCalls), - snpConflicts = record.snpConflicts.map(Json.toJson(_)), - heteroplasmyObservations = record.heteroplasmyObservations.map(Json.toJson(_)), - identityVerification = record.identityVerification.map(Json.toJson(_)), - manualOverride = record.manualOverride.map(Json.toJson(_)), - auditLog = record.auditLog.map(Json.toJson(_)), - lastReconciliationAt = record.lastReconciliationAt.map(i => LocalDateTime.ofInstant(i, ZoneId.systemDefault())), - deleted = false, - createdAt = LocalDateTime.ofInstant(record.meta.createdAt, ZoneId.systemDefault()), - updatedAt = LocalDateTime.now() - ) - - haplogroupReconciliationRepository.upsertBySpecimenDonorAndDnaType(reconciliation).map { created => - FirehoseResult.Success(event.atUri, created.atCid.getOrElse(""), None, "Haplogroup Reconciliation Created") - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, s"Specimen donor not found: ${record.specimenDonorRef}")) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def updateHaplogroupReconciliation(event: HaplogroupReconciliationEvent): Future[FirehoseResult] = { - import play.api.libs.json.Json - - event.payload match { - case Some(record) => - haplogroupReconciliationRepository.findByAtUri(record.atUri).flatMap { - case Some(existing) => - val updatedStatus = existing.status.copy( - compatibilityLevel = Some(record.status.compatibilityLevel), - consensusHaplogroup = Some(record.status.consensusHaplogroup), - statusConfidence = record.status.confidence.orElse(existing.status.statusConfidence), - branchCompatibilityScore = record.status.branchCompatibilityScore.orElse(existing.status.branchCompatibilityScore), - snpConcordance = record.status.snpConcordance.orElse(existing.status.snpConcordance), - runCount = record.status.runCount.orElse(existing.status.runCount), - warnings = record.status.warnings.orElse(existing.status.warnings) - ) - - val updated = existing.copy( - status = updatedStatus, - runCalls = Json.toJson(record.runCalls), - snpConflicts = record.snpConflicts.map(Json.toJson(_)).orElse(existing.snpConflicts), - heteroplasmyObservations = record.heteroplasmyObservations.map(Json.toJson(_)).orElse(existing.heteroplasmyObservations), - identityVerification = record.identityVerification.map(Json.toJson(_)).orElse(existing.identityVerification), - manualOverride = record.manualOverride.map(Json.toJson(_)).orElse(existing.manualOverride), - auditLog = record.auditLog.map(Json.toJson(_)).orElse(existing.auditLog), - lastReconciliationAt = record.lastReconciliationAt.map(i => LocalDateTime.ofInstant(i, ZoneId.systemDefault())).orElse(existing.lastReconciliationAt), - atCid = Some(UUID.randomUUID().toString), - updatedAt = LocalDateTime.now() - ) - - haplogroupReconciliationRepository.update(updated).map { success => - if (success) FirehoseResult.Success(event.atUri, updated.atCid.getOrElse(""), None, "Haplogroup Reconciliation Updated") - else FirehoseResult.Conflict(event.atUri, "Update failed") - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def deleteHaplogroupReconciliation(event: HaplogroupReconciliationEvent): Future[FirehoseResult] = { - haplogroupReconciliationRepository.findByAtUri(event.atUri).flatMap { - case Some(existing) => - haplogroupReconciliationRepository.softDelete(existing.id.get).map { - case true => FirehoseResult.Success(event.atUri, "", None, "Haplogroup Reconciliation Deleted") - case false => FirehoseResult.NotFound(event.atUri) - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - } - - // --- Helpers --- - - private def resolveOrCreateDonor(record: BiosampleRecord): Future[Option[Int]] = { - val citizenDid = record.citizenDid - val identifier = record.donorIdentifier - - specimenDonorRepository.findByDidAndIdentifier(citizenDid, identifier).flatMap { - case Some(donor) => Future.successful(donor.id) - case None => - val newDonor = SpecimenDonor( - donorIdentifier = identifier, - originBiobank = record.centerName, - donorType = BiosampleType.Citizen, - sex = record.sex.map(s => models.domain.genomics.BiologicalSex.fromString(s)), - geocoord = None, - pgpParticipantId = None, - atUri = Some(citizenDid), - dateRangeStart = None, - dateRangeEnd = None - ) - specimenDonorRepository.create(newDonor).map(_.id) - } - } - - // --- Instrument Observation Handling --- - - private def handleInstrumentObservation(event: InstrumentObservationEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create => createInstrumentObservation(event) - case FirehoseAction.Update => updateInstrumentObservation(event) - case FirehoseAction.Delete => deleteInstrumentObservation(event) - } - } - - private def createInstrumentObservation(event: InstrumentObservationEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - instrumentObservationRepository.findByAtUri(record.atUri).flatMap { - case Some(_) => - Future.successful(FirehoseResult.Conflict(event.atUri, "Instrument observation already exists")) - case None => - val observation = InstrumentObservation( - atUri = record.atUri, - atCid = event.atCid, - instrumentId = record.instrumentId, - labName = record.labName, - biosampleRef = record.biosampleRef, - sequenceRunRef = record.sequenceRunRef, - platform = record.platform, - instrumentModel = record.instrumentModel, - flowcellId = record.flowcellId, - runDate = record.runDate.map(i => LocalDateTime.ofInstant(i, ZoneId.systemDefault())), - confidence = record.confidence - .map(ObservationConfidence.fromString) - .getOrElse(ObservationConfidence.Inferred) - ) - instrumentObservationRepository.create(observation).map { created => - logger.info(s"Created instrument observation for instrument ${record.instrumentId} at lab ${record.labName}") - FirehoseResult.Success(event.atUri, UUID.randomUUID().toString, None, "Instrument observation created") - } - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Missing payload for InstrumentObservationEvent")) - } - } - - private def updateInstrumentObservation(event: InstrumentObservationEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - instrumentObservationRepository.findByAtUri(event.atUri).flatMap { - case Some(existing) => - val updated = existing.copy( - atCid = event.atCid, - instrumentId = record.instrumentId, - labName = record.labName, - biosampleRef = record.biosampleRef, - sequenceRunRef = record.sequenceRunRef, - platform = record.platform, - instrumentModel = record.instrumentModel, - flowcellId = record.flowcellId, - runDate = record.runDate.map(i => LocalDateTime.ofInstant(i, ZoneId.systemDefault())), - confidence = record.confidence - .map(ObservationConfidence.fromString) - .getOrElse(existing.confidence) - ) - instrumentObservationRepository.update(updated).map { success => - if (success) FirehoseResult.Success(event.atUri, UUID.randomUUID().toString, None, "Instrument observation updated") - else FirehoseResult.Error(event.atUri, "Failed to update instrument observation") - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Missing payload for InstrumentObservationEvent")) - } - } - - private def deleteInstrumentObservation(event: InstrumentObservationEvent): Future[FirehoseResult] = { - instrumentObservationRepository.deleteByAtUri(event.atUri).map { deleted => - if (deleted) FirehoseResult.Success(event.atUri, "", None, "Instrument observation deleted") - else FirehoseResult.NotFound(event.atUri) - } - } - - // --- Group Project Handling --- - - private def handleGroupProject(event: GroupProjectEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create => createGroupProject(event) - case FirehoseAction.Update => updateGroupProject(event) - case FirehoseAction.Delete => deleteGroupProject(event) - } - } - - private def createGroupProject(event: GroupProjectEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - val ownerDid = record.governance.administrators.headOption.map(_.citizenDid).getOrElse("") - if (ownerDid.isEmpty) { - Future.successful(FirehoseResult.ValidationError(event.atUri, "At least one administrator is required")) - } else { - val newAtCid = UUID.randomUUID().toString - val project = GroupProject( - projectName = record.projectName, - projectType = record.projectType, - targetHaplogroup = record.targetHaplogroup, - targetLineage = record.targetLineage, - description = record.description, - backgroundInfo = record.backgroundInfo, - joinPolicy = record.joinPolicy.getOrElse("APPROVAL_REQUIRED"), - haplogroupRequirement = record.haplogroupRequirement, - memberListVisibility = record.visibilityPolicy.flatMap(_.memberListVisibility).getOrElse("MEMBERS_ONLY"), - strPolicy = record.visibilityPolicy.flatMap(_.strPolicy).getOrElse("DISTANCE_ONLY"), - snpPolicy = record.visibilityPolicy.flatMap(_.snpPolicy).getOrElse("TERMINAL_ONLY"), - publicTreeView = record.visibilityPolicy.flatMap(_.publicTreeView).getOrElse(false), - successionPolicy = record.governance.successionPolicy, - ownerDid = ownerDid, - atUri = Some(record.atUri), - atCid = Some(newAtCid), - createdAt = LocalDateTime.ofInstant(record.meta.createdAt, ZoneId.systemDefault()), - updatedAt = LocalDateTime.now() - ) - groupProjectRepository.create(project).flatMap { created => - val adminMember = GroupProjectMember( - groupProjectId = created.id.get, - citizenDid = ownerDid, - role = "ADMIN", - status = "ACTIVE", - joinedAt = Some(LocalDateTime.now()) - ) - groupProjectMemberRepository.create(adminMember).map { _ => - FirehoseResult.Success(event.atUri, newAtCid, None, "Group Project Created") - } - } - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def updateGroupProject(event: GroupProjectEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - groupProjectRepository.findByAtUri(event.atUri).flatMap { - case Some(existing) => - val updated = existing.copy( - projectName = record.projectName, - description = record.description, - backgroundInfo = record.backgroundInfo, - joinPolicy = record.joinPolicy.getOrElse(existing.joinPolicy), - haplogroupRequirement = record.haplogroupRequirement.orElse(existing.haplogroupRequirement), - memberListVisibility = record.visibilityPolicy.flatMap(_.memberListVisibility).getOrElse(existing.memberListVisibility), - strPolicy = record.visibilityPolicy.flatMap(_.strPolicy).getOrElse(existing.strPolicy), - snpPolicy = record.visibilityPolicy.flatMap(_.snpPolicy).getOrElse(existing.snpPolicy), - publicTreeView = record.visibilityPolicy.flatMap(_.publicTreeView).getOrElse(existing.publicTreeView), - successionPolicy = record.governance.successionPolicy.orElse(existing.successionPolicy), - atCid = Some(UUID.randomUUID().toString) - ) - groupProjectRepository.update(updated).map { - case true => FirehoseResult.Success(event.atUri, updated.atCid.getOrElse(""), None, "Group Project Updated") - case false => FirehoseResult.Conflict(event.atUri, "Update failed") - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def deleteGroupProject(event: GroupProjectEvent): Future[FirehoseResult] = { - groupProjectRepository.softDeleteByAtUri(event.atUri).map { - case true => FirehoseResult.Success(event.atUri, "", None, "Group Project Deleted") - case false => FirehoseResult.NotFound(event.atUri) - } - } - - // --- Project Membership Handling --- - - private def handleProjectMembership(event: ProjectMembershipEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create => createProjectMembership(event) - case FirehoseAction.Update => updateProjectMembership(event) - case FirehoseAction.Delete => deleteProjectMembership(event) - } - } - - private def createProjectMembership(event: ProjectMembershipEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - groupProjectRepository.findByAtUri(record.projectRef).flatMap { - case Some(project) => - val status = if (project.joinPolicy == "OPEN") "ACTIVE" else record.status - val member = GroupProjectMember( - groupProjectId = project.id.get, - citizenDid = extractDidFromAtUri(record.atUri), - biosampleAtUri = Some(record.biosampleRef), - status = status, - displayName = record.displayName, - kitId = record.kitId, - subgroupIds = record.subgroupAssignments.getOrElse(Seq.empty).toList, - contributionLevel = record.contributionLevel, - joinedAt = if (status == "ACTIVE") Some(LocalDateTime.now()) else None, - atUri = Some(record.atUri), - atCid = Some(UUID.randomUUID().toString) - ) - groupProjectMemberRepository.create(member).map { created => - FirehoseResult.Success(event.atUri, created.atCid.getOrElse(""), None, "Project Membership Created") - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, s"Group project not found: ${record.projectRef}")) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def updateProjectMembership(event: ProjectMembershipEvent): Future[FirehoseResult] = { - event.payload match { - case Some(record) => - groupProjectMemberRepository.findByAtUri(event.atUri).flatMap { - case Some(existing) => - val updated = existing.copy( - status = record.status, - displayName = record.displayName.orElse(existing.displayName), - kitId = record.kitId.orElse(existing.kitId), - subgroupIds = record.subgroupAssignments.map(_.toList).getOrElse(existing.subgroupIds), - contributionLevel = record.contributionLevel.orElse(existing.contributionLevel), - atCid = Some(UUID.randomUUID().toString) - ) - groupProjectMemberRepository.update(updated).map { - case true => FirehoseResult.Success(event.atUri, updated.atCid.getOrElse(""), None, "Project Membership Updated") - case false => FirehoseResult.Conflict(event.atUri, "Update failed") - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required")) - } - } - - private def deleteProjectMembership(event: ProjectMembershipEvent): Future[FirehoseResult] = { - groupProjectMemberRepository.findByAtUri(event.atUri).flatMap { - case Some(existing) => - groupProjectMemberRepository.updateStatus(existing.id.get, "LEFT").map { - case true => FirehoseResult.Success(event.atUri, "", None, "Project Membership Removed") - case false => FirehoseResult.NotFound(event.atUri) - } - case None => - Future.successful(FirehoseResult.NotFound(event.atUri)) - } - } - - private def extractDidFromAtUri(atUri: String): String = { - // AT URI format: at://{DID}/{collection}/{rkey} - atUri.stripPrefix("at://").split("/").headOption.getOrElse("") - } - - // --- Match Consent Handling --- - - private def handleMatchConsent(event: MatchConsentEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create | FirehoseAction.Update => - event.payload match { - case Some(record) => - val did = extractDidFromAtUri(record.atUri) - citizenBiosampleRepository.findByAtUri(record.biosampleRef).flatMap { - case Some(biosample) => - val consent = MatchConsentTracking( - id = None, - atUri = record.atUri, - consentingDid = did, - sampleGuid = biosample.sampleGuid, - consentLevel = record.consentLevel, - allowedMatchTypes = record.allowedMatchTypes.map(t => play.api.libs.json.Json.toJson(t)), - shareContactInfo = record.shareContactInfo.getOrElse(false), - consentedAt = record.consentedAt.map(i => ZonedDateTime.ofInstant(i, ZoneId.of("UTC"))).getOrElse(ZonedDateTime.now()), - expiresAt = record.expiresAt.map(i => ZonedDateTime.ofInstant(i, ZoneId.of("UTC"))), - revokedAt = None - ) - matchConsentTrackingRepository.upsertFromFirehose(consent).map { saved => - FirehoseResult.Success(event.atUri, "", Some(biosample.sampleGuid), s"Match consent ${event.action}") - } - case None => - Future.successful(FirehoseResult.NotFound(record.biosampleRef)) - } - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required for match consent")) - } - case FirehoseAction.Delete => - matchConsentTrackingRepository.deleteByAtUri(event.atUri).map { - case true => FirehoseResult.Success(event.atUri, "", None, "Match consent deleted") - case false => FirehoseResult.NotFound(event.atUri) - } - } - } - - // --- Match Request Handling --- - - private def handleMatchRequest(event: MatchRequestEvent): Future[FirehoseResult] = { - event.action match { - case FirehoseAction.Create | FirehoseAction.Update => - event.payload match { - case Some(record) => - val did = extractDidFromAtUri(record.atUri) - for { - fromBiosample <- citizenBiosampleRepository.findByAtUri(record.fromBiosampleRef) - toBiosample <- citizenBiosampleRepository.findByAtUri(record.toBiosampleRef) - result <- (fromBiosample, toBiosample) match { - case (Some(from), Some(to)) => - val targetDid = extractDidFromAtUri(record.toBiosampleRef) - val tracking = MatchRequestTracking( - id = None, - atUri = record.atUri, - requesterDid = did, - targetDid = if (targetDid.nonEmpty) Some(targetDid) else None, - fromSampleGuid = from.sampleGuid, - toSampleGuid = to.sampleGuid, - requestType = "FULL", - status = record.status, - discoveryReason = None, - message = record.message, - createdAt = ZonedDateTime.now(), - updatedAt = ZonedDateTime.now(), - expiresAt = record.expiresAt.map(i => ZonedDateTime.ofInstant(i, ZoneId.of("UTC"))), - completedAt = None - ) - matchRequestTrackingRepository.upsertFromFirehose(tracking).map { saved => - FirehoseResult.Success(event.atUri, "", Some(from.sampleGuid), s"Match request ${event.action}") - } - case _ => - Future.successful(FirehoseResult.NotFound(s"Biosample ref not found for ${record.fromBiosampleRef} or ${record.toBiosampleRef}")) - } - } yield result - case None => - Future.successful(FirehoseResult.ValidationError(event.atUri, "Payload required for match request")) - } - case FirehoseAction.Delete => - matchRequestTrackingRepository.updateStatus(event.atUri, "WITHDRAWN").map { - case true => FirehoseResult.Success(event.atUri, "", None, "Match request withdrawn") - case false => FirehoseResult.NotFound(event.atUri) - } - } - } - -} \ No newline at end of file diff --git a/app/services/firehose/FirehoseEvent.scala b/app/services/firehose/FirehoseEvent.scala deleted file mode 100644 index e308a1c3..00000000 --- a/app/services/firehose/FirehoseEvent.scala +++ /dev/null @@ -1,381 +0,0 @@ -package services.firehose - -import models.api.{ExternalBiosampleRequest, ProjectRequest} -import models.atmosphere.* -import play.api.libs.json.* -import play.api.libs.functional.syntax._ - -/** - * Represents events from the AT Protocol Firehose (or simulated via REST API). - * - * This abstraction allows the same event processing logic to be used whether - * events arrive via: - * - Phase 1: Direct REST API calls (wrapped as events) - * - Phase 2: AT Protocol Firehose subscription - * - * Each event includes: - * - `atUri`: The canonical AT Protocol identifier for the record - * - `atCid`: Content identifier for optimistic locking / version tracking - * - `action`: The operation type (Create, Update, Delete) - */ -sealed trait FirehoseEvent { - def atUri: String - - def atCid: Option[String] - - def action: FirehoseAction -} - -/** - * Actions that can be performed on a record. - * Maps to AT Protocol commit operations. - */ -enum FirehoseAction: - case Create, Update, Delete - -object FirehoseAction { - implicit val reads: Reads[FirehoseAction] = Reads.of[String].map(FirehoseAction.valueOf) - implicit val writes: Writes[FirehoseAction] = Writes.of[String].contramap(_.toString) - implicit val format: Format[FirehoseAction] = Format(reads, writes) -} - -// --- Atmosphere Lexicon Events (Phase 3) --- - -case class BiosampleEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[BiosampleRecord] - ) extends FirehoseEvent - -object BiosampleEvent { - implicit val format: OFormat[BiosampleEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[BiosampleEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[BiosampleRecord] - )(BiosampleEvent.apply, (e: BiosampleEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class SequenceRunEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[SequenceRunRecord] - ) extends FirehoseEvent - -object SequenceRunEvent { - implicit val format: OFormat[SequenceRunEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[SequenceRunEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[SequenceRunRecord] - )(SequenceRunEvent.apply, (e: SequenceRunEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class AlignmentEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[AlignmentRecord] - ) extends FirehoseEvent - -object AlignmentEvent { - implicit val format: OFormat[AlignmentEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[AlignmentEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[AlignmentRecord] - )(AlignmentEvent.apply, (e: AlignmentEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class GenotypeEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[GenotypeRecord] - ) extends FirehoseEvent - -object GenotypeEvent { - implicit val format: OFormat[GenotypeEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[GenotypeEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[GenotypeRecord] - )(GenotypeEvent.apply, (e: GenotypeEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class ImputationEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[ImputationRecord] - ) extends FirehoseEvent - -object ImputationEvent { - implicit val format: OFormat[ImputationEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[ImputationEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[ImputationRecord] - )(ImputationEvent.apply, (e: ImputationEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class AtmosphereProjectEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[ProjectRecord] - ) extends FirehoseEvent - -object AtmosphereProjectEvent { - implicit val format: OFormat[AtmosphereProjectEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[AtmosphereProjectEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[ProjectRecord] - )(AtmosphereProjectEvent.apply, (e: AtmosphereProjectEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class PopulationBreakdownEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[PopulationBreakdownRecord] - ) extends FirehoseEvent - -object PopulationBreakdownEvent { - implicit val format: OFormat[PopulationBreakdownEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[PopulationBreakdownEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[PopulationBreakdownRecord] - )(PopulationBreakdownEvent.apply, (e: PopulationBreakdownEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class InstrumentObservationEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[InstrumentObservationRecord] - ) extends FirehoseEvent - -object InstrumentObservationEvent { - implicit val format: OFormat[InstrumentObservationEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[InstrumentObservationEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[InstrumentObservationRecord] - )(InstrumentObservationEvent.apply, (e: InstrumentObservationEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class MatchConsentEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[MatchConsentRecord] - ) extends FirehoseEvent - -object MatchConsentEvent { - implicit val format: OFormat[MatchConsentEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[MatchConsentEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[MatchConsentRecord] - )(MatchConsentEvent.apply, (e: MatchConsentEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class MatchListEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[MatchListRecord] - ) extends FirehoseEvent - -object MatchListEvent { - implicit val format: OFormat[MatchListEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[MatchListEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[MatchListRecord] - )(MatchListEvent.apply, (e: MatchListEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class MatchRequestEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[MatchRequestRecord] - ) extends FirehoseEvent - -object MatchRequestEvent { - implicit val format: OFormat[MatchRequestEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[MatchRequestEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[MatchRequestRecord] - )(MatchRequestEvent.apply, (e: MatchRequestEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class StrProfileEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[StrProfileRecord] - ) extends FirehoseEvent - -object StrProfileEvent { - implicit val format: OFormat[StrProfileEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[StrProfileEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[StrProfileRecord] - )(StrProfileEvent.apply, (e: StrProfileEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class HaplogroupAncestralStrEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[HaplogroupAncestralStrRecord] - ) extends FirehoseEvent - -object HaplogroupAncestralStrEvent { - implicit val format: OFormat[HaplogroupAncestralStrEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[HaplogroupAncestralStrEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[HaplogroupAncestralStrRecord] - )(HaplogroupAncestralStrEvent.apply, (e: HaplogroupAncestralStrEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class WorkspaceEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[WorkspaceRecord] - ) extends FirehoseEvent - -object WorkspaceEvent { - implicit val format: OFormat[WorkspaceEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[WorkspaceEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[WorkspaceRecord] - )(WorkspaceEvent.apply, (e: WorkspaceEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class GroupProjectEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[GroupProjectRecord] - ) extends FirehoseEvent - -object GroupProjectEvent { - implicit val format: OFormat[GroupProjectEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[GroupProjectEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[GroupProjectRecord] - )(GroupProjectEvent.apply, (e: GroupProjectEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class ProjectMembershipEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[ProjectMembershipRecord] - ) extends FirehoseEvent - -object ProjectMembershipEvent { - implicit val format: OFormat[ProjectMembershipEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[ProjectMembershipEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[ProjectMembershipRecord] - )(ProjectMembershipEvent.apply, (e: ProjectMembershipEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -case class HaplogroupReconciliationEvent( - atUri: String, - atCid: Option[String], - action: FirehoseAction, - payload: Option[HaplogroupReconciliationRecord] - ) extends FirehoseEvent - -object HaplogroupReconciliationEvent { - implicit val format: OFormat[HaplogroupReconciliationEvent] = Json.format - implicit val formatWithDiscriminator: OFormat[HaplogroupReconciliationEvent] = ( - (JsPath \ "atUri").format[String] and - (JsPath \ "atCid").formatNullable[String] and - (JsPath \ "action").format[FirehoseAction] and - (JsPath \ "payload").formatNullable[HaplogroupReconciliationRecord] - )(HaplogroupReconciliationEvent.apply, (e: HaplogroupReconciliationEvent) => (e.atUri, e.atCid, e.action, e.payload)) -} - -object FirehoseEvent { - implicit val firehoseEventReads: Reads[FirehoseEvent] = new Reads[FirehoseEvent] { - override def reads(json: JsValue): JsResult[FirehoseEvent] = { - (json \ "_type").asOpt[String] match { - case Some("BiosampleEvent") => json.validate[BiosampleEvent](BiosampleEvent.formatWithDiscriminator) - case Some("SequenceRunEvent") => json.validate[SequenceRunEvent](SequenceRunEvent.formatWithDiscriminator) - case Some("AlignmentEvent") => json.validate[AlignmentEvent](AlignmentEvent.formatWithDiscriminator) - case Some("GenotypeEvent") => json.validate[GenotypeEvent](GenotypeEvent.formatWithDiscriminator) - case Some("ImputationEvent") => json.validate[ImputationEvent](ImputationEvent.formatWithDiscriminator) - case Some("AtmosphereProjectEvent") => json.validate[AtmosphereProjectEvent](AtmosphereProjectEvent.formatWithDiscriminator) - case Some("PopulationBreakdownEvent") => json.validate[PopulationBreakdownEvent](PopulationBreakdownEvent.formatWithDiscriminator) - case Some("InstrumentObservationEvent") => json.validate[InstrumentObservationEvent](InstrumentObservationEvent.formatWithDiscriminator) - case Some("MatchConsentEvent") => json.validate[MatchConsentEvent](MatchConsentEvent.formatWithDiscriminator) - case Some("MatchListEvent") => json.validate[MatchListEvent](MatchListEvent.formatWithDiscriminator) - case Some("MatchRequestEvent") => json.validate[MatchRequestEvent](MatchRequestEvent.formatWithDiscriminator) - case Some("StrProfileEvent") => json.validate[StrProfileEvent](StrProfileEvent.formatWithDiscriminator) - case Some("HaplogroupAncestralStrEvent") => json.validate[HaplogroupAncestralStrEvent](HaplogroupAncestralStrEvent.formatWithDiscriminator) - case Some("WorkspaceEvent") => json.validate[WorkspaceEvent](WorkspaceEvent.formatWithDiscriminator) - case Some("GroupProjectEvent") => json.validate[GroupProjectEvent](GroupProjectEvent.formatWithDiscriminator) - case Some("ProjectMembershipEvent") => json.validate[ProjectMembershipEvent](ProjectMembershipEvent.formatWithDiscriminator) - case Some("HaplogroupReconciliationEvent") => json.validate[HaplogroupReconciliationEvent](HaplogroupReconciliationEvent.formatWithDiscriminator) - case Some(unknown) => JsError(s"Unknown FirehoseEvent type: $unknown") - case None => JsError("Missing '_type' discriminator field for FirehoseEvent") - } - } - } - - implicit val firehoseEventWrites: Writes[FirehoseEvent] = Writes { - case e: BiosampleEvent => Json.toJsObject(e)(BiosampleEvent.formatWithDiscriminator) + ("_type" -> JsString("BiosampleEvent")) - case e: SequenceRunEvent => Json.toJsObject(e)(SequenceRunEvent.formatWithDiscriminator) + ("_type" -> JsString("SequenceRunEvent")) - case e: AlignmentEvent => Json.toJsObject(e)(AlignmentEvent.formatWithDiscriminator) + ("_type" -> JsString("AlignmentEvent")) - case e: GenotypeEvent => Json.toJsObject(e)(GenotypeEvent.formatWithDiscriminator) + ("_type" -> JsString("GenotypeEvent")) - case e: ImputationEvent => Json.toJsObject(e)(ImputationEvent.formatWithDiscriminator) + ("_type" -> JsString("ImputationEvent")) - case e: AtmosphereProjectEvent => Json.toJsObject(e)(AtmosphereProjectEvent.formatWithDiscriminator) + ("_type" -> JsString("AtmosphereProjectEvent")) - case e: PopulationBreakdownEvent => Json.toJsObject(e)(PopulationBreakdownEvent.formatWithDiscriminator) + ("_type" -> JsString("PopulationBreakdownEvent")) - case e: InstrumentObservationEvent => Json.toJsObject(e)(InstrumentObservationEvent.formatWithDiscriminator) + ("_type" -> JsString("InstrumentObservationEvent")) - case e: MatchConsentEvent => Json.toJsObject(e)(MatchConsentEvent.formatWithDiscriminator) + ("_type" -> JsString("MatchConsentEvent")) - case e: MatchListEvent => Json.toJsObject(e)(MatchListEvent.formatWithDiscriminator) + ("_type" -> JsString("MatchListEvent")) - case e: MatchRequestEvent => Json.toJsObject(e)(MatchRequestEvent.formatWithDiscriminator) + ("_type" -> JsString("MatchRequestEvent")) - case e: StrProfileEvent => Json.toJsObject(e)(StrProfileEvent.formatWithDiscriminator) + ("_type" -> JsString("StrProfileEvent")) - case e: HaplogroupAncestralStrEvent => Json.toJsObject(e)(HaplogroupAncestralStrEvent.formatWithDiscriminator) + ("_type" -> JsString("HaplogroupAncestralStrEvent")) - case e: WorkspaceEvent => Json.toJsObject(e)(WorkspaceEvent.formatWithDiscriminator) + ("_type" -> JsString("WorkspaceEvent")) - case e: GroupProjectEvent => Json.toJsObject(e)(GroupProjectEvent.formatWithDiscriminator) + ("_type" -> JsString("GroupProjectEvent")) - case e: ProjectMembershipEvent => Json.toJsObject(e)(ProjectMembershipEvent.formatWithDiscriminator) + ("_type" -> JsString("ProjectMembershipEvent")) - case e: HaplogroupReconciliationEvent => Json.toJsObject(e)(HaplogroupReconciliationEvent.formatWithDiscriminator) + ("_type" -> JsString("HaplogroupReconciliationEvent")) - } - - implicit val firehoseEventFormat: Format[FirehoseEvent] = Format(firehoseEventReads, firehoseEventWrites) -} diff --git a/app/services/firehose/FirehoseResult.scala b/app/services/firehose/FirehoseResult.scala deleted file mode 100644 index b9a2c33b..00000000 --- a/app/services/firehose/FirehoseResult.scala +++ /dev/null @@ -1,28 +0,0 @@ -package services.firehose - -import java.util.UUID - -/** - * Result of processing a FirehoseEvent. - * Provides a consistent result type regardless of the event source. - */ -sealed trait FirehoseResult { - def atUri: String -} - -object FirehoseResult { - case class Success( - atUri: String, - newAtCid: String, - sampleGuid: Option[UUID] = None, - message: String = "OK" - ) extends FirehoseResult - - case class NotFound(atUri: String) extends FirehoseResult - - case class Conflict(atUri: String, message: String) extends FirehoseResult - - case class ValidationError(atUri: String, message: String) extends FirehoseResult - - case class Error(atUri: String, message: String, cause: Option[Throwable] = None) extends FirehoseResult -} diff --git a/app/services/genomics/GenomeRegionIngestionService.scala b/app/services/genomics/GenomeRegionIngestionService.scala deleted file mode 100644 index e4621bd0..00000000 --- a/app/services/genomics/GenomeRegionIngestionService.scala +++ /dev/null @@ -1,152 +0,0 @@ -package services.genomics - -import config.GenomicsConfig -import htsjdk.samtools.liftover.LiftOver -import htsjdk.samtools.util.Interval -import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.{GenomeRegion, RegionCoordinate} -import play.api.Logging -import play.api.libs.json.{JsObject, Json} -import repositories.GenomeRegionsRepository - -import java.io.BufferedInputStream -import java.net.URL -import java.util.zip.GZIPInputStream -import scala.concurrent.{ExecutionContext, Future} -import scala.io.Source -import scala.util.{Failure, Success, Using} - -@Singleton -class GenomeRegionIngestionService @Inject()( - repository: GenomeRegionsRepository, - genomicsConfig: GenomicsConfig -)(implicit ec: ExecutionContext) extends Logging { - - // Source URLs (hs1 / CHM13v2.0) - private val sources = Map( - "Cytoband" -> "https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0_cytobands_allchrs.bed", - "CenSat" -> "https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0_censat_v2.1.bed", - "Telomere" -> "https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0_telomere.bed", - "SequenceClass" -> "https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0_chrXY_sequence_class_v1.bed", - "InvertedRepeat" -> "https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0Y_inverted_repeats_v1.bed", - "Amplicon" -> "https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0Y_amplicons_v1.bed", - "Y_Region" -> "https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/annotation/chm13v2.0Y_AZF_DYZ_v1.bed" - ) - - /** - * Bootstraps the genome_region_v2 table from the configured URLs. - */ - def bootstrap(): Future[Unit] = { - logger.info("Starting Genome Region bootstrapping...") - - // Load liftovers for hs1 -> GRCh38 and hs1 -> GRCh37 - val liftovers = loadLiftovers("hs1", Seq("GRCh38", "GRCh37")) - - // Process each source sequentially - val tasks = sources.map { case (regionType, url) => - () => ingestUrl(url, regionType, liftovers) - } - - tasks.foldLeft(Future.successful(())) { (f, task) => - f.flatMap(_ => task()) - }.map { _ => - logger.info("Genome Region bootstrapping completed successfully.") - } - } - - private def ingestUrl(url: String, regionType: String, liftovers: Map[String, LiftOver]): Future[Unit] = { - logger.info(s"Ingesting $regionType from $url") - - val regions = Future { - Using.resource(Source.fromURL(url)) { source => - source.getLines() - .filterNot(_.startsWith("#")) - .filterNot(_.trim.isEmpty) - .flatMap(line => parseBedLine(line, regionType, liftovers)) - .toSeq - } - } - - regions.flatMap { parsedRegions => - if (parsedRegions.nonEmpty) { - val batches = parsedRegions.grouped(1000).toSeq - batches.foldLeft(Future.successful(())) { (acc, batch) => - acc.flatMap(_ => repository.bulkCreateRegions(batch).map(_ => ())) - }.map { _ => - logger.info(s"Ingested ${parsedRegions.size} regions for $regionType") - } - } else { - logger.warn(s"No regions found for $regionType") - Future.successful(()) - } - } - } - - private def parseBedLine(line: String, regionType: String, liftovers: Map[String, LiftOver]): Option[GenomeRegion] = { - val cols = line.split("\t") - if (cols.length < 3) return None - - val contig = cols(0) - // BED is 0-based start, 1-based exclusive end. - // Our DB/HTSJDK Interval is 1-based inclusive. - val start = cols(1).toLong + 1 - val end = cols(2).toLong - - val rawName = if (cols.length > 3) Some(cols(3)) else None - - // Uniqueness Fix: - // Cytoband names (p11.1) are repeated per chromosome -> chr1_p11.1 - // Repeats/Amplicons (IR3) can be repeated on same chromosome, and PAR1 starts at 0 on both X and Y. - // Use ${contig}_${n}_${start} to ensure global uniqueness. - val name = regionType match { - case "Cytoband" => rawName.map(n => s"${contig}_$n") - case "InvertedRepeat" | "Amplicon" | "Y_Region" | "CenSat" | "SequenceClass" => rawName.map(n => s"${contig}_${n}_$start") - case _ => rawName - } - - // Properties - val props = regionType match { - case "Cytoband" if cols.length > 4 => Json.obj("stain" -> cols(4)) - case "InvertedRepeat" if cols.length > 4 => Json.obj("score" -> cols(4)) // Just guessing useful fields - case _ => Json.obj() - } - - // HS1 Coordinate - val hs1Coord = RegionCoordinate(contig, start, end) - - // Lift to targets - val liftedCoords = liftovers.flatMap { - case (targetGenome, liftOver) => - val interval = new Interval(contig, start.toInt, end.toInt) - val lifted = liftOver.liftOver(interval) - if (lifted != null) { - Some(targetGenome -> RegionCoordinate(lifted.getContig, lifted.getStart, lifted.getEnd)) - } else None - } - - val allCoords = liftedCoords + ("hs1" -> hs1Coord) - - Some(GenomeRegion( - regionType = regionType, - name = name, - coordinates = allCoords, - properties = props - )) - } - - private def loadLiftovers(source: String, targets: Seq[String]): Map[String, LiftOver] = { - targets.flatMap { - case target => - genomicsConfig.getLiftoverChainFile(source, target).flatMap { - case file => - if (file.exists()) { - logger.info(s"Loaded liftover chain for $source->$target: ${file.getPath}") - Some(target -> new LiftOver(file)) - } else { - logger.warn(s"Liftover chain $source->$target configured but not found at ${file.getPath}") - None - } - } - }.toMap - } -} diff --git a/app/services/genomics/HipStrReferenceIngestionService.scala b/app/services/genomics/HipStrReferenceIngestionService.scala deleted file mode 100644 index 759d4fd1..00000000 --- a/app/services/genomics/HipStrReferenceIngestionService.scala +++ /dev/null @@ -1,180 +0,0 @@ -package services.genomics - -import config.GenomicsConfig -import htsjdk.samtools.liftover.LiftOver -import htsjdk.samtools.util.Interval -import jakarta.inject.{Inject, Singleton} -import models.domain.genomics.{MutationType, NamingStatus, StrCoordinates, VariantAliases, VariantV2} -import play.api.Logging -import play.api.libs.json.Json -import repositories.VariantV2Repository - -import java.io.{BufferedInputStream, BufferedReader, File, FileOutputStream, InputStreamReader} -import java.net.{HttpURLConnection, URI} -import java.util.zip.GZIPInputStream -import scala.concurrent.{ExecutionContext, Future} -import scala.util.{Failure, Success, Try} - -@Singleton -class HipStrReferenceIngestionService @Inject()( - variantRepository: VariantV2Repository, - genomicsConfig: GenomicsConfig -)(implicit ec: ExecutionContext) extends Logging { - - /** - * Bootstraps STRs from the HipSTR reference catalog. - */ - def bootstrap(): Future[Int] = { - val targetFile = genomicsConfig.hipstrStoragePath - - downloadFile(genomicsConfig.hipstrUrl, targetFile).flatMap { _ => - ingestHipStrBed(targetFile) - } - } - - private def downloadFile(urlStr: String, targetFile: File): Future[Unit] = Future { - // Cache check (24h) - val cacheDuration = 24 * 60 * 60 * 1000L - if (targetFile.exists() && (System.currentTimeMillis() - targetFile.lastModified() < cacheDuration)) { - logger.info(s"Local HipSTR file is fresh (< 24 hours old), skipping download: ${targetFile.getAbsolutePath}") - } else { - val url = URI.create(urlStr).toURL - logger.info(s"Downloading HipSTR reference from $url to ${targetFile.getAbsolutePath}") - - val parentDir = targetFile.getParentFile - if (parentDir != null && !parentDir.exists()) parentDir.mkdirs() - - val tempFile = new File(targetFile.getAbsolutePath + ".tmp") - val conn = url.openConnection().asInstanceOf[HttpURLConnection] - conn.setConnectTimeout(30000) - conn.setReadTimeout(300000) - - try { - val in = new BufferedInputStream(conn.getInputStream) - val out = new FileOutputStream(tempFile) - val buffer = new Array[Byte](8192) - Iterator.continually(in.read(buffer)).takeWhile(_ != -1).foreach(out.write(buffer, 0, _)) - in.close() - out.close() - - if (targetFile.exists()) targetFile.delete() - if (!tempFile.renameTo(targetFile)) throw new RuntimeException(s"Failed to rename $tempFile to $targetFile") - - logger.info(s"Downloaded HipSTR reference to ${targetFile.getAbsolutePath}") - } finally { - conn.disconnect() - } - } - } - - private def ingestHipStrBed(file: File): Future[Int] = { - logger.info(s"Starting HipSTR ingestion from ${file.getPath}") - - // Load liftovers: GRCh38 -> hs1, GRCh38 -> GRCh37 - val liftovers = loadLiftovers("GRCh38", Seq("hs1", "GRCh37")) - - // Using simple recursive batching to avoid blocking - // GZIP reader - val reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new java.io.FileInputStream(file)))) - - // Iterator for lines - val iterator = Iterator.continually(reader.readLine()).takeWhile(_ != null).filterNot(_.startsWith("#")) - val batchSize = 100 - - def processNextBatch(count: Int): Future[Int] = { - val batchLines = iterator.take(batchSize).toSeq - - if (batchLines.isEmpty) { - reader.close() - logger.info(s"HipSTR ingestion complete. Processed $count variants.") - Future.successful(count) - } else { - val variants = batchLines.flatMap(line => parseHipStrLine(line, liftovers)) - - variantRepository.upsertBatch(variants).flatMap { _ => - val newCount = count + batchLines.size // Count lines processed - if (newCount % 1000 == 0) logger.info(s"Processed $newCount HipSTR records...") - processNextBatch(newCount) - } - } - } - - processNextBatch(0).transform { - result => - Try(reader.close()) - result - } - } - - private def parseHipStrLine(line: String, liftovers: Map[String, LiftOver]): Option[VariantV2] = { - // BED columns: chrom, start, end, period, ref_repeats, id, motif, [structure] - val cols = line.split("\t") - if (cols.length < 6) return None - - val rawContig = cols(0) - val contig = if (rawContig.matches("^[0-9XY]+$")) s"chr$rawContig" else rawContig - val start = cols(1).toLong + 1 // BED 0-based -> 1-based inclusive - val end = cols(2).toLong - val period = cols(3).toInt - val refRepeats = cols(4).toDouble.toInt // Sometimes float? - val name = cols(5) - val motif = if (cols.length > 6) Some(cols(6)) else None - - // Source coordinates (GRCh38) - val grch38Coords = Json.toJson(StrCoordinates( - contig = contig, - start = start, - end = end, - period = period, - repeatMotif = motif, - referenceRepeats = Some(refRepeats) - )) - - // Lift over - val liftedCoords = liftovers.flatMap { - case (targetGenome, liftOver) => - val interval = new Interval(contig, start.toInt, end.toInt) - val lifted = liftOver.liftOver(interval) - if (lifted != null) { - Some(targetGenome -> Json.toJson(StrCoordinates( - contig = lifted.getContig, - start = lifted.getStart, - end = lifted.getEnd, - period = period, - repeatMotif = motif, - referenceRepeats = Some(refRepeats) // Approximation, technically assumes reference length similarity - ))) - } else None - } - - val allCoords = liftedCoords + ("GRCh38" -> grch38Coords) - val combinedCoordsJson = allCoords.foldLeft(Json.obj()) { case (acc, (k, v)) => acc + (k -> v) } - - val aliases = Json.toJson(VariantAliases( - commonNames = Seq(name), - sources = Map("HipSTR" -> Seq(name)) - )) - - Some(VariantV2( - canonicalName = Some(name), - mutationType = MutationType.STR, - namingStatus = NamingStatus.Named, - aliases = aliases, - coordinates = combinedCoordsJson, - notes = Some("Imported from HipSTR catalog") - )) - } - - private def loadLiftovers(source: String, targets: Seq[String]): Map[String, LiftOver] = { - targets.flatMap { - target => - genomicsConfig.getLiftoverChainFile(source, target).flatMap { - file => - if (file.exists()) { - logger.info(s"Loaded liftover chain for $source->$target") - Some(target -> new LiftOver(file)) - } else None - } - }.toMap - } -} diff --git a/app/services/genomics/SequencerInstrumentService.scala b/app/services/genomics/SequencerInstrumentService.scala deleted file mode 100644 index 29998e05..00000000 --- a/app/services/genomics/SequencerInstrumentService.scala +++ /dev/null @@ -1,89 +0,0 @@ -package services.genomics - -import jakarta.inject.{Inject, Singleton} -import models.api.{PendingProposalSummary, SequencerLabLookupResponse} -import models.api.genomics.AssociateLabWithInstrumentResponse -import repositories.{InstrumentObservationRepository, InstrumentProposalRepository, SequencerInstrumentRepository} - -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class SequencerInstrumentService @Inject()( - instrumentRepository: SequencerInstrumentRepository, - proposalRepository: InstrumentProposalRepository, - observationRepository: InstrumentObservationRepository - )(implicit ec: ExecutionContext) { - - def getAllLabInstrumentAssociations: Future[Seq[models.api.SequencerLabInfo]] = { - instrumentRepository.findAllLabInstrumentAssociations() - } - - def lookupLab(instrumentId: String): Future[Option[SequencerLabLookupResponse]] = { - for { - confirmedOpt <- instrumentRepository.findLabByInstrumentId(instrumentId) - proposalOpt <- proposalRepository.findActiveByInstrumentId(instrumentId) - obsCount <- observationRepository.findByInstrumentId(instrumentId).map(_.size) - } yield { - confirmedOpt match { - case Some(labInfo) => - val pendingSummary = proposalOpt.flatMap { p => - if (p.proposedLabName != labInfo.labName) { - Some(PendingProposalSummary( - proposalId = p.id.getOrElse(0), - proposedLabName = p.proposedLabName, - observationCount = p.observationCount, - confidenceScore = p.confidenceScore, - status = p.status.dbValue - )) - } else None - } - Some(SequencerLabLookupResponse( - instrumentId = labInfo.instrumentId, - labName = Some(labInfo.labName), - isD2c = Some(labInfo.isD2c), - manufacturer = labInfo.manufacturer, - model = labInfo.model, - websiteUrl = labInfo.websiteUrl, - source = "CURATOR", - confidenceScore = 1.0, - observationCount = obsCount, - pendingProposal = pendingSummary - )) - case None => - proposalOpt.map { proposal => - SequencerLabLookupResponse( - instrumentId = instrumentId, - labName = Some(proposal.proposedLabName), - manufacturer = proposal.proposedManufacturer, - model = proposal.proposedModel, - source = "CONSENSUS", - confidenceScore = proposal.confidenceScore, - observationCount = proposal.observationCount, - pendingProposal = Some(PendingProposalSummary( - proposalId = proposal.id.getOrElse(0), - proposedLabName = proposal.proposedLabName, - observationCount = proposal.observationCount, - confidenceScore = proposal.confidenceScore, - status = proposal.status.dbValue - )) - ) - } - } - } - } - - def associateLabWithInstrument( - instrumentId: String, - labName: String, - manufacturer: Option[String] = None, - model: Option[String] = None - ): Future[AssociateLabWithInstrumentResponse] = { - if (instrumentId.isBlank) { - Future.failed(new IllegalArgumentException("Instrument ID cannot be empty")) - } else if (labName.isBlank) { - Future.failed(new IllegalArgumentException("Lab name cannot be empty")) - } else { - instrumentRepository.associateLabWithInstrument(instrumentId, labName, manufacturer, model) - } - } -} diff --git a/app/services/genomics/SpecimanDonorService.scala b/app/services/genomics/SpecimanDonorService.scala deleted file mode 100644 index f570cb6e..00000000 --- a/app/services/genomics/SpecimanDonorService.scala +++ /dev/null @@ -1,197 +0,0 @@ -package services.genomics - -import jakarta.inject.{Inject, Singleton} -import models.api.genomics.{MergeConflict, MergeStrategy, SpecimenDonorMergeRequest, SpecimenDonorMergeResult} -import models.domain.genomics.SpecimenDonor -import play.api.Logging -import repositories.SpecimenDonorRepository - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service interface for managing and merging specimen donor data. - * - * This trait defines the contract for merging multiple donor records into a - * single unified donor record as per the provided merge request details. - */ -trait SpecimenDonorService { - /** - * Merges multiple donor records into a single unified donor record based on the specified merge strategy. - * - * @param request the merge request containing the target donor ID, a list of source donor IDs to be merged, - * and the merge strategy to resolve conflicts or handle the merging process. - * @return a future containing the result of the merge operation, which includes the ID of the merged donor, - * the count of updated biosamples, a list of removed donor IDs, and any merge conflicts encountered. - */ - def mergeDonors(request: SpecimenDonorMergeRequest): Future[SpecimenDonorMergeResult] -} - -@Singleton -class SpecimenDonorServiceImpl @Inject()(donorRepo: SpecimenDonorRepository) - (implicit ec: ExecutionContext) extends SpecimenDonorService with Logging { - - /** - * Merges a target donor with multiple source donors based on the details specified in the request. - * The operation involves transferring biosamples from source donors to the target donor, deleting the source donors, - * and resolving potential conflicts according to a specified merge strategy. - * - * @param request the merge request containing the target donor ID, a list of source donor IDs to merge, - * and the merge strategy to be applied - * @return a future containing the result of the merge operation, which includes the target donor ID, - * count of updated biosamples, list of removed donor IDs, and any conflicts encountered during the merge - */ - def mergeDonors(request: SpecimenDonorMergeRequest): Future[SpecimenDonorMergeResult] = { - for { - // 1. Get all donors involved - targetDonorOpt <- donorRepo.findById(request.targetId) - sourceDonors <- Future.sequence(request.sourceIds.map(donorRepo.findById)) - - // 2. Validate all donors exist - result <- (targetDonorOpt, sourceDonors.flatten) match { - case (Some(targetDonor), sources) if sources.length == request.sourceIds.length => - processMerge(targetDonor, sources, request) - case _ => - Future.failed(new IllegalArgumentException("One or more donors not found")) - } - } yield result - } - - /** - * Processes the merging of a target donor with multiple source donors based on the provided merge request. - * This involves updating the target donor with merged data, transferring biosamples from the source donors - * to the target donor, deleting the source donors, and identifying any merge conflicts. - * - * @param targetDonor the primary donor to which data and biosamples from the source donors will be merged - * @param sourceDonors a sequence of donors whose data and biosamples are to be merged into the target donor - * @param request the merge request containing the target donor ID, a list of source donor IDs, and the strategy used for merging - * @return a future containing the result of the merge operation, which includes details about the merged donor ID, - * number of updated biosamples, removed donor IDs, and any conflicts encountered during the merge - */ - private def processMerge( - targetDonor: SpecimenDonor, - sourceDonors: Seq[SpecimenDonor], - request: SpecimenDonorMergeRequest - ): Future[SpecimenDonorMergeResult] = { - - // Merge donor data according to strategy - val mergedDonor = mergeDonorData(targetDonor, sourceDonors, request.mergeStrategy) - - for { - // Update target donor with merged data - _ <- donorRepo.update(mergedDonor) - - // Transfer all biosamples to target donor - updatedCount <- donorRepo.transferBiosamples(request.sourceIds, request.targetId) - - // Delete source donors - _ <- donorRepo.deleteMany(request.sourceIds) - - } yield SpecimenDonorMergeResult( - mergedDonorId = request.targetId, - updatedBiosamples = updatedCount, - removedDonors = request.sourceIds, - conflicts = collectMergeConflicts(targetDonor, sourceDonors.toList, mergedDonor) - ) - } - - /** - * Merges donor data from a sequence of source donors into the target donor according to the specified merge strategy. - * - * @param target the primary donor object to which data will be merged - * @param sources a sequence of source donors whose data will be used in the merge process - * @param strategy the merge strategy determining how conflicts between target and source donors are resolved - * (PreferTarget, PreferSource, or MostComplete) - * @return a new SpecimenDonor instance representing the result of merging the target and source donors - */ - private def mergeDonorData(target: SpecimenDonor, sources: Seq[SpecimenDonor], strategy: MergeStrategy): SpecimenDonor = { - strategy match { - case MergeStrategy.PreferTarget => target - case MergeStrategy.PreferSource => sources.head.copy(id = target.id) - case MergeStrategy.MostComplete => - sources.foldLeft(target) { (acc, source) => - SpecimenDonor( - id = acc.id, - donorIdentifier = acc.donorIdentifier, - originBiobank = acc.originBiobank, - donorType = (source.donorType, acc.donorType) match { - case (null, accType) => accType - case (sourceType, _) => sourceType - }, - sex = source.sex.orElse(acc.sex), - geocoord = source.geocoord.orElse(acc.geocoord), - pgpParticipantId = source.pgpParticipantId.orElse(acc.pgpParticipantId), - atUri = source.atUri.orElse(acc.atUri), - dateRangeStart = source.dateRangeStart.orElse(acc.dateRangeStart), - dateRangeEnd = source.dateRangeEnd.orElse(acc.dateRangeEnd) - ) - } - } - } - - /** - * Identifies and collects merge conflicts between a target donor and multiple source donors - * in the context of a merge operation. A conflict arises when the values of corresponding - * fields in the target and source donors differ. - * - * @param target the primary donor whose values are compared against the source donors - * @param sources a list of source donors whose values are compared against the target donor - * @param result the merged donor, used to determine the resolved value for conflicting fields - * @return a list of `MergeConflict` instances representing the fields where conflicts were detected, - * including details of the conflicting values and the chosen resolution - */ - private def collectMergeConflicts(target: SpecimenDonor, sources: List[SpecimenDonor], result: SpecimenDonor): List[MergeConflict] = { - logger.info(s"Starting merge conflict detection for target donor ${target.id} with ${sources.length} source donors") - - // Helper function to compare values and create conflict if they differ - def checkField[T](fieldName: String, targetValue: Option[T], sourceValue: Option[T], resultValue: Option[T]): Option[MergeConflict] = { - (targetValue, sourceValue) match { - case (Some(tv), Some(sv)) if tv != sv => - val conflict = MergeConflict( - field = fieldName, - targetValue = tv.toString, - sourceValue = sv.toString, - resolution = resultValue.map(_.toString).getOrElse("No value") - ) - logger.debug(s"Found conflict in field '$fieldName': target='$tv', source='$sv', resolved to='${resultValue.getOrElse("No value")}'") - Some(conflict) - case _ => None - } - } - - // For multiple sources, we'll compare against the first different value we find - val conflictingSources = sources.filter(_ != target) - if (conflictingSources.isEmpty) { - logger.info("No conflicting sources found - all sources match target") - return Nil - } - - logger.debug(s"Found ${conflictingSources.length} conflicting source donors") - - val conflicts = conflictingSources.flatMap { source => - logger.debug(s"Checking conflicts between target donor ${target.id} and source donor ${source.id}") - - val fieldConflicts = List( - checkField("donorType", Option(target.donorType), Option(source.donorType), Option(result.donorType)), - checkField("sex", target.sex, source.sex, result.sex), - checkField("geocoord", target.geocoord, source.geocoord, result.geocoord), - checkField("pgpParticipantId", target.pgpParticipantId, source.pgpParticipantId, result.pgpParticipantId), - checkField("atUri", target.atUri, source.atUri, result.atUri), - checkField("dateRangeStart", target.dateRangeStart, source.dateRangeStart, result.dateRangeStart), - checkField("dateRangeEnd", target.dateRangeEnd, source.dateRangeEnd, result.dateRangeEnd) - ).flatten - - if (fieldConflicts.isEmpty) { - logger.debug(s"No field conflicts found between target donor ${target.id} and source donor ${source.id}") - } - - fieldConflicts - }.distinct - - logger.info(s"Completed merge conflict detection: found ${conflicts.length} unique conflicts") - if (conflicts.nonEmpty) { - logger.debug(s"Conflict summary: ${conflicts.map(c => s"${c.field}: ${c.targetValue} vs ${c.sourceValue}").mkString(", ")}") - } - - conflicts - } -} \ No newline at end of file diff --git a/app/services/genomics/YBrowseVariantIngestionService.scala b/app/services/genomics/YBrowseVariantIngestionService.scala deleted file mode 100644 index f7938700..00000000 --- a/app/services/genomics/YBrowseVariantIngestionService.scala +++ /dev/null @@ -1,465 +0,0 @@ -package services.genomics - -import config.GenomicsConfig -import htsjdk.samtools.liftover.LiftOver -import htsjdk.samtools.reference.{ReferenceSequenceFile, ReferenceSequenceFileFactory} -import htsjdk.samtools.util.Interval -import htsjdk.variant.variantcontext.VariantContext -import htsjdk.variant.vcf.VCFFileReader -import jakarta.inject.{Inject, Singleton} -import models.dal.domain.genomics.* -import models.domain.genomics.{MutationType, NamingStatus, VariantAliases, VariantV2} -import play.api.Logging -import play.api.libs.json.{JsObject, Json} -import repositories.VariantV2Repository - -import java.io.File -import scala.concurrent.{ExecutionContext, Future} -import scala.jdk.CollectionConverters.* -import scala.io.Source -import scala.util.{Failure, Success, Try, Using} -import scala.collection.AbstractIterator - -/** - * Service for ingesting Y-DNA variants from YBrowse VCF and GFF files. - * - * Creates consolidated VariantV2 records with JSONB coordinates for multiple - * reference genomes. Performs liftover to add coordinates for additional - * assemblies (hs1, GRCh37, etc.). - */ -@Singleton -class YBrowseVariantIngestionService @Inject()( - variantV2Repository: VariantV2Repository, - genomicsConfig: GenomicsConfig -)(implicit ec: ExecutionContext) extends Logging { - - // Lazy-load ReferenceSequenceFile for each configured reference genome - private val referenceFastaFiles: Map[String, ReferenceSequenceFile] = genomicsConfig.fastaPaths.flatMap { - case (genome, fastaFile) => - if (fastaFile.exists()) { - logger.info(s"Loading reference FASTA for $genome from ${fastaFile.getPath}") - Some(genome -> ReferenceSequenceFileFactory.getReferenceSequenceFile(fastaFile)) - } else { - logger.warn(s"Reference FASTA file for $genome not found at ${fastaFile.getPath}. Normalization might be incomplete.") - None - } - } - - /** - * Ingests variants from a YBrowse GFF3 file. - * Groups adjacent records with same coordinates to handle aliases. - * - * @param gffFile The GFF3 file to ingest. - * @param sourceGenome The reference genome of the input GFF (default: "GRCh38"). - * @return A Future containing the number of variants ingested. - */ - def ingestGff(gffFile: File, sourceGenome: String = "GRCh38"): Future[Int] = { - logger.info(s"Starting GFF ingestion from ${gffFile.getPath} ($sourceGenome)") - - val canonicalSource = genomicsConfig.resolveReferenceName(sourceGenome) - val targetGenomes = genomicsConfig.supportedReferences.filter(_ != canonicalSource) - - // Load liftovers - val liftovers: Map[String, LiftOver] = targetGenomes.flatMap { target => - genomicsConfig.getLiftoverChainFile(canonicalSource, target).flatMap { file => - if (file.exists()) Some(target -> new LiftOver(file)) else None - } - }.toMap - - val batchSize = 100 - val source = Source.fromFile(gffFile) - - try { - val lines = source.getLines().filterNot(_.startsWith("#")) - - // Custom grouping iterator that groups adjacent lines with same Chr/Pos/Ref/Alt - val groupedIterator = new AbstractIterator[Seq[Map[String, String]]] { - private var buffer: Option[Map[String, String]] = None - - override def hasNext: Boolean = buffer.isDefined || lines.hasNext - - override def next(): Seq[Map[String, String]] = { - if (!hasNext) throw new NoSuchElementException("next on empty iterator") - - val currentGroup = scala.collection.mutable.ArrayBuffer[Map[String, String]]() - - // Initialize with buffer or next line - val first = buffer.getOrElse(parseGffLine(lines.next())) - buffer = None // Clear buffer - - if (first.isEmpty) return next() // Skip malformed/empty lines - - currentGroup += first - - // Key to identify the group (Chr, Start, End) - val groupKey = (first("seqid"), first("start"), first("end")) - - // Peek ahead - var keepingGoing = true - while (keepingGoing && lines.hasNext) { - val nextLine = parseGffLine(lines.next()) - if (nextLine.nonEmpty) { - val nextKey = (nextLine("seqid"), nextLine("start"), nextLine("end")) - if (nextKey == groupKey) { - currentGroup += nextLine - } else { - buffer = Some(nextLine) - keepingGoing = false - } - } - } - - currentGroup.toSeq - } - } - - def processNextBatch(accumulatedCount: Int): Future[Int] = { - // Synchronously take a batch from the iterator to avoid blocking the thread later - // (Iterator access is fast, processing is slow) - val batchGroups = scala.collection.mutable.ArrayBuffer[Seq[Map[String, String]]]() - var taken = 0 - while (taken < batchSize && groupedIterator.hasNext) { - batchGroups += groupedIterator.next() - taken += 1 - } - - if (batchGroups.isEmpty) { - logger.info(s"GFF ingestion complete. Total variants: $accumulatedCount") - Future.successful(accumulatedCount) - } else { - // Process batch as a whole using optimized batch upsert - val variantsToProcess = batchGroups.flatMap(group => createVariantV2FromGffGroup(group, sourceGenome, liftovers)).toSeq - - variantV2Repository.upsertBatch(variantsToProcess).flatMap { resultIds => - // Log the number of records *processed* in this batch, not just newly created/updated. - // resultIds.size is the number of variants that were actually inserted or updated. - // variantsToProcess.size is the total number of items from the GFF batch. - val batchCount = variantsToProcess.size // Number of GFF records processed in this iteration - val newTotal = accumulatedCount + batchCount - if (newTotal % 100000 == 0) { // Log every 1000 records processed - logger.info(s"Processed $newTotal GFF records...") - } - processNextBatch(newTotal) - } - } - } - - processNextBatch(0).andThen { case _ => - source.close() - } - } catch { - case e: Exception => - source.close() - Future.failed(e) - } - } - - private def mergeAliases(existing: play.api.libs.json.JsValue, incoming: play.api.libs.json.JsValue): play.api.libs.json.JsValue = { - import play.api.libs.json.* - - val eCommon = (existing \ "common_names").asOpt[Seq[String]].getOrElse(Seq.empty) - val iCommon = (incoming \ "common_names").asOpt[Seq[String]].getOrElse(Seq.empty) - val mergedCommon = (eCommon ++ iCommon).distinct - - val eRs = (existing \ "rs_ids").asOpt[Seq[String]].getOrElse(Seq.empty) - val iRs = (incoming \ "rs_ids").asOpt[Seq[String]].getOrElse(Seq.empty) - val mergedRs = (eRs ++ iRs).distinct - - // Deep merge sources is harder, simple merge for now - val eSources = (existing \ "sources").asOpt[JsObject].getOrElse(Json.obj()) - val iSources = (incoming \ "sources").asOpt[JsObject].getOrElse(Json.obj()) - // For source arrays, we really should merge the arrays, but standard ++ overwrites keys. - // A robust merge would iterate keys. - // Let's do a slightly better merge for sources - val mergedSources = iSources.fields.foldLeft(eSources) { case (acc, (key, newVal)) => - val oldVal = (acc \ key).asOpt[Seq[String]].getOrElse(Seq.empty) - val nextVal = newVal.asOpt[Seq[String]].getOrElse(Seq.empty) - acc + (key -> Json.toJson((oldVal ++ nextVal).distinct)) - } - - Json.obj( - "common_names" -> mergedCommon, - "rs_ids" -> mergedRs, - "sources" -> mergedSources - ) - } - - private def parseGffLine(line: String): Map[String, String] = { - val cols = line.split("\t") - if (cols.length < 9) return Map.empty - - val attributes = cols(8).split(";").map { kv => - val parts = kv.split("=", 2) - if (parts.length == 2) parts(0) -> parts(1) else "" -> "" - }.toMap.filter(_._1.nonEmpty) - - Map( - "seqid" -> cols(0), - "source" -> cols(1), - "type" -> cols(2), - "start" -> cols(3), - "end" -> cols(4), - "score" -> cols(5), - "strand" -> cols(6), - "phase" -> cols(7) - ) ++ attributes - } - - private def createVariantV2FromGffGroup( - group: Seq[Map[String, String]], - sourceGenome: String, - liftovers: Map[String, LiftOver] - ): Option[VariantV2] = { - // First record determines canonical info - val primary = group.head - val name = primary.getOrElse("Name", primary.getOrElse("ID", "Unknown")) - - // Parse coordinates - val contig = primary("seqid") - val start = primary("start").toInt - // GFF attributes for alleles - val ref = primary.getOrElse("allele_anc", primary.getOrElse("ref_allele", primary.getOrElse("reference_allele", ""))) - val alt = primary.getOrElse("allele_der", primary.getOrElse("alt_allele", primary.getOrElse("derived_allele", ""))) - - if (ref.isEmpty || alt.isEmpty) { - if (Math.random() < 0.001) logger.warn(s"Missing alleles for GFF record (sampling): $primary") - return None // Skip if alleles missing - } - - // Normalize - val refSeq = referenceFastaFiles.get(sourceGenome) - val (normPos, normRef, normAlt) = normalizeVariant( - contig, start, ref, alt, refSeq - ) - - // Build coordinates JSONB - val sourceCoords = Json.obj( - "contig" -> contig, - "position" -> normPos, - "ref" -> normRef, - "alt" -> normAlt - ) - - // Lift over - val liftedCoords = liftovers.flatMap { case (targetGenome, liftOver) => - val interval = new Interval(contig, start, primary("end").toInt) - val lifted = liftOver.liftOver(interval) - - if (lifted != null) { - val targetRefSeq = referenceFastaFiles.get(targetGenome) - // Note: We use original ref/alt for normalization on target, - // assuming alleles translate directly (which is true for homology map). - // A more robust way would be to fetch ref from target fasta. - val (lPos, lRef, lAlt) = normalizeVariant( - lifted.getContig, lifted.getStart, ref, alt, targetRefSeq - ) - - Some(targetGenome -> Json.obj( - "contig" -> lifted.getContig, - "position" -> lPos, - "ref" -> lRef, - "alt" -> lAlt - )) - } else None - } - - val allCoordinates = (liftedCoords + (sourceGenome -> sourceCoords)).foldLeft(Json.obj()) { - case (acc, (genome, coords)) => acc + (genome -> coords) - } - - // Collect Metadata - val commonNames = group.flatMap(_.get("Name")).distinct - val rsIds = group.flatMap(_.get("Name")).filter(_.startsWith("rs")).distinct // Naive check - val ybrowseIds = group.flatMap(_.get("ID")).distinct - - // Sources map: source -> [names] - // Use 'ref' attribute from GFF as source attribution - val sourceMap = group.groupBy(_.getOrElse("ref", "ybrowse")).map { case (src, records) => - src -> records.flatMap(_.get("Name")).distinct - } - - val aliases = Json.obj( - "common_names" -> commonNames, - "rs_ids" -> rsIds, - "sources" -> (Json.toJsObject(sourceMap) + ("ybrowse_id" -> Json.toJson(ybrowseIds))) - ) - - // Evidence - val tested = primary.get("count_tested").map(_.toInt).getOrElse(0) - val derived = primary.get("count_derived").map(_.toInt).getOrElse(0) - - // External Placements (Haplogroups) - val rawPlacements = Json.obj( - "ycc" -> primary.get("ycc_haplogroup"), - "isogg" -> primary.get("isogg_haplogroup"), - "yfull" -> primary.get("yfull_node") // User clarified this is a haplogroup placement - ) - - val placements = JsObject(rawPlacements.fields.filterNot { case (_, v) => - v match { - case play.api.libs.json.JsString(s) => s == "." || s == "not listed" || s == "unknown" - case _ => false - } - }) - - val evidence = Json.obj( - "yseq_tested" -> tested, - "yseq_derived" -> derived, - "external_placements" -> placements - ) - - // Primers - val primers = if (primary.contains("primer_f")) { - Json.obj( - "yseq_f" -> primary.getOrElse("primer_f", ""), - "yseq_r" -> primary.getOrElse("primer_r", "") - ) - } else Json.obj() - - // Notes - val notes = primary.get("comment").filter(_ != ".") - - Some(VariantV2( - canonicalName = Some(name), - mutationType = MutationType.SNP, // GFF type 'point'/'snp' usually implies SNP - namingStatus = NamingStatus.Named, - aliases = aliases, - coordinates = allCoordinates, - evidence = evidence, - primers = primers, - notes = notes - )) - } - - /** - * Normalizes a variant by performing VCF-style left-alignment. - * - * The algorithm: - * 1. Right-trim: Remove common suffix bases from ref and alt alleles - * 2. Pad: If either allele becomes empty, prepend the preceding reference base - * 3. Left-trim: Remove common prefix bases (keeping at least 1 base on each) - */ - private def normalizeVariant( - contig: String, - pos: Int, - ref: String, - alt: String, - refSeq: Option[ReferenceSequenceFile] - ): (Int, String, String) = { - // Expand compressed repeat notation (e.g., "3T" -> "TTT") - val expandedRef = expandRepeatNotation(ref) - val expandedAlt = expandRepeatNotation(alt) - - // Skip normalization for SNPs (single base, same length) - if (expandedRef.length == 1 && expandedAlt.length == 1) { - return (pos, expandedRef, expandedAlt) - } - - var currRef = expandedRef - var currAlt = expandedAlt - var currPos = pos - - // Step 1: Right-trim common suffix bases - while (currRef.nonEmpty && currAlt.nonEmpty && currRef.last == currAlt.last) { - currRef = currRef.dropRight(1) - currAlt = currAlt.dropRight(1) - } - - // Step 2: Pad with preceding base if either allele is empty - if (currRef.isEmpty || currAlt.isEmpty) { - currPos -= 1 - val paddingBase = refSeq match { - case Some(rs) => - try { - new String(rs.getSubsequenceAt(contig, currPos, currPos).getBases, "UTF-8") - } catch { - case _: Exception => "N" - } - case None => "N" - } - currRef = paddingBase + currRef - currAlt = paddingBase + currAlt - } - - // Step 3: Left-trim common prefix bases (keeping at least 1 base) - while (currRef.length > 1 && currAlt.length > 1 && currRef.head == currAlt.head) { - currRef = currRef.tail - currAlt = currAlt.tail - currPos += 1 - } - - (currPos, currRef, currAlt) - } - - /** - * Expands compressed repeat notation (e.g., "3T" -> "TTT", "2AG" -> "AGAG"). - * Returns the input unchanged if it's already a valid nucleotide sequence. - */ - private def expandRepeatNotation(allele: String): String = { - if (allele.forall(c => "ACGTN".contains(c.toUpper))) { - allele - } else { - val (digits, bases) = allele.partition(_.isDigit) - if (digits.nonEmpty && bases.nonEmpty) { - bases * digits.toInt - } else { - bases - } - } - } - /** - * Lifts a variant to all other supported reference genomes and adds coordinates. - * - * @param variantId The variant to update with additional coordinates - * @param sourceGenome The source reference genome - * @return Future containing the number of coordinates added - */ - def addLiftedCoordinates(variantId: Int, sourceGenome: String): Future[Int] = { - variantV2Repository.findById(variantId).flatMap { - case Some(variant) => - val sourceCoords = variant.getCoordinates(sourceGenome) - sourceCoords match { - case Some(coords) => - val contig = (coords \ "contig").asOpt[String].getOrElse("") - val position = (coords \ "position").asOpt[Int].getOrElse(0) - val ref = (coords \ "ref").asOpt[String].getOrElse("") - val alt = (coords \ "alt").asOpt[String].getOrElse("") - - val canonicalSource = genomicsConfig.resolveReferenceName(sourceGenome) - val targetGenomes = genomicsConfig.supportedReferences.filter(_ != canonicalSource) - - val liftedFutures = targetGenomes.flatMap { targetGenome => - genomicsConfig.getLiftoverChainFile(canonicalSource, targetGenome) match { - case Some(chainFile) if chainFile.exists() => - val liftOver = new LiftOver(chainFile) - val interval = new Interval(contig, position, position) - val lifted = liftOver.liftOver(interval) - - if (lifted != null) { - val liftedCoords = Json.obj( - "contig" -> lifted.getContig, - "position" -> lifted.getStart, - "ref" -> ref, - "alt" -> alt - ) - Some(variantV2Repository.addCoordinates(variantId, targetGenome, liftedCoords)) - } else { - None - } - case _ => None - } - } - - Future.sequence(liftedFutures).map(_.count(_ == true)) - - case None => - logger.warn(s"Variant $variantId has no coordinates for $sourceGenome") - Future.successful(0) - } - - case None => - logger.warn(s"Variant $variantId not found") - Future.successful(0) - } - } -} diff --git a/app/services/ibd/IbdRelaySessionManager.scala b/app/services/ibd/IbdRelaySessionManager.scala deleted file mode 100644 index b79fd90d..00000000 --- a/app/services/ibd/IbdRelaySessionManager.scala +++ /dev/null @@ -1,125 +0,0 @@ -package services.ibd - -import jakarta.inject.{Inject, Singleton} -import org.apache.pekko.actor.ActorSystem -import org.apache.pekko.stream.scaladsl.{BroadcastHub, Keep, MergeHub, Source} -import org.apache.pekko.stream.{KillSwitches, Materializer, UniqueKillSwitch} -import play.api.{Configuration, Logging} - -import java.time.Instant -import java.util.UUID -import java.util.concurrent.ConcurrentHashMap -import scala.concurrent.ExecutionContext -import scala.concurrent.duration.* -import scala.jdk.CollectionConverters.* - -case class RelaySession( - sessionId: String, - matchRequestUri: String, - participantA: String, - participantB: String, - createdAt: Instant, - expiresAt: Instant, - bus: RelayMessageBus -) - -case class RelayMessage( - fromDid: String, - payload: String, - timestamp: Instant = Instant.now() -) - -class RelayMessageBus(implicit mat: Materializer) { - private val (sink, source) = - MergeHub.source[RelayMessage](perProducerBufferSize = 16) - .toMat(BroadcastHub.sink[RelayMessage](bufferSize = 256))(Keep.both) - .run() - - def publishSink = sink - def subscribeTo(forDid: String): Source[RelayMessage, ?] = - source.filter(_.fromDid != forDid) -} - -@Singleton -class IbdRelaySessionManager @Inject()( - system: ActorSystem, - configuration: Configuration -)(implicit ec: ExecutionContext, mat: Materializer) extends Logging { - - private val sessionTimeoutMinutes: Long = - configuration.getOptional[Long]("decodingus.matching.relay.session-timeout-minutes").getOrElse(10) - private val maxConcurrentSessions: Int = - configuration.getOptional[Int]("decodingus.matching.relay.max-concurrent-sessions").getOrElse(100) - private val cleanupIntervalSeconds: Long = - configuration.getOptional[Long]("decodingus.matching.relay.stale-cleanup-interval-seconds").getOrElse(60) - - private val sessions = new ConcurrentHashMap[String, RelaySession]() - - // Schedule periodic cleanup - system.scheduler.scheduleWithFixedDelay( - cleanupIntervalSeconds.seconds, - cleanupIntervalSeconds.seconds - )(() => cleanupStaleSessions()) - - def createSession(matchRequestUri: String, participantA: String, participantB: String): Option[RelaySession] = { - if (sessions.size() >= maxConcurrentSessions) { - logger.warn(s"Max concurrent sessions ($maxConcurrentSessions) reached, rejecting new session") - return None - } - - // Check if a session already exists for this match request - val existing = sessions.values().asScala.find(_.matchRequestUri == matchRequestUri) - if (existing.isDefined) { - logger.debug(s"Session already exists for match request $matchRequestUri") - return existing - } - - val sessionId = UUID.randomUUID().toString - val now = Instant.now() - val session = RelaySession( - sessionId = sessionId, - matchRequestUri = matchRequestUri, - participantA = participantA, - participantB = participantB, - createdAt = now, - expiresAt = now.plusSeconds(sessionTimeoutMinutes * 60), - bus = new RelayMessageBus() - ) - - sessions.put(sessionId, session) - logger.info(s"Created relay session $sessionId for match request $matchRequestUri between $participantA and $participantB") - Some(session) - } - - def getSession(sessionId: String): Option[RelaySession] = { - Option(sessions.get(sessionId)).filter(s => Instant.now().isBefore(s.expiresAt)) - } - - def findSessionForRequest(matchRequestUri: String): Option[RelaySession] = { - sessions.values().asScala.find(s => - s.matchRequestUri == matchRequestUri && Instant.now().isBefore(s.expiresAt) - ) - } - - def isAuthorizedParticipant(sessionId: String, did: String): Boolean = { - getSession(sessionId).exists(s => s.participantA == did || s.participantB == did) - } - - def removeSession(sessionId: String): Boolean = { - Option(sessions.remove(sessionId)).isDefined - } - - def activeSessions: Int = sessions.size() - - private def cleanupStaleSessions(): Unit = { - val now = Instant.now() - val expired = sessions.entrySet().asScala.filter(e => now.isAfter(e.getValue.expiresAt)) - expired.foreach { e => - sessions.remove(e.getKey) - logger.info(s"Cleaned up expired relay session ${e.getKey}") - } - if (expired.nonEmpty) { - logger.info(s"Cleaned up ${expired.size} expired relay sessions, ${sessions.size()} active") - } - } -} diff --git a/app/services/ibd/MatchDiscoveryService.scala b/app/services/ibd/MatchDiscoveryService.scala deleted file mode 100644 index 03f5e7cb..00000000 --- a/app/services/ibd/MatchDiscoveryService.scala +++ /dev/null @@ -1,154 +0,0 @@ -package services.ibd - -import jakarta.inject.{Inject, Singleton} -import models.domain.ibd.{MatchConsentTracking, MatchRequestTracking, MatchSuggestion} -import play.api.libs.json.{JsValue, Json} -import play.api.{Configuration, Logging} -import repositories.* - -import java.time.ZonedDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait MatchDiscoveryService { - // Discovery - def getSuggestions(sampleGuid: UUID, suggestionType: Option[String], limit: Int): Future[Seq[MatchSuggestion]] - def dismissSuggestion(id: Long): Future[Boolean] - def generateSuggestions(): Future[Int] - - // Match requests - def createMatchRequest(request: MatchRequestTracking): Future[MatchRequestTracking] - def getMatchRequest(atUri: String): Future[Option[MatchRequestTracking]] - def getPendingRequests(sampleGuid: UUID): Future[Seq[MatchRequestTracking]] - def getSentRequests(did: String): Future[Seq[MatchRequestTracking]] - def cancelRequest(atUri: String): Future[Boolean] - - // Consent - def trackConsent(consent: MatchConsentTracking): Future[MatchConsentTracking] - def getConsentStatus(requestUri: String): Future[Option[ConsentStatus]] - def revokeConsent(atUri: String): Future[Boolean] -} - -case class ConsentStatus( - requestUri: String, - requesterConsented: Boolean, - targetConsented: Boolean, - mutualConsent: Boolean -) - -object ConsentStatus { - implicit val format: play.api.libs.json.OFormat[ConsentStatus] = Json.format[ConsentStatus] -} - -@Singleton -class MatchDiscoveryServiceImpl @Inject()( - suggestionRepo: MatchSuggestionRepository, - requestRepo: MatchRequestTrackingRepository, - consentRepo: MatchConsentTrackingRepository, - overlapScoreRepo: PopulationOverlapScoreRepository, - breakdownCacheRepo: PopulationBreakdownCacheRepository, - configuration: Configuration -)(implicit ec: ExecutionContext) extends MatchDiscoveryService with Logging { - - private val populationOverlapThreshold = configuration.getOptional[Double]("decodingus.matching.discovery.population-overlap-threshold").getOrElse(0.6) - private val maxSuggestionsPerUser = configuration.getOptional[Int]("decodingus.matching.discovery.max-suggestions-per-user").getOrElse(100) - private val suggestionExpiryDays = configuration.getOptional[Int]("decodingus.matching.discovery.suggestion-expiry-days").getOrElse(90) - - // --- Discovery --- - - override def getSuggestions(sampleGuid: UUID, suggestionType: Option[String], limit: Int): Future[Seq[MatchSuggestion]] = - suggestionRepo.findByTargetSample(sampleGuid, suggestionType, limit) - - override def dismissSuggestion(id: Long): Future[Boolean] = - suggestionRepo.dismiss(id) - - override def generateSuggestions(): Future[Int] = { - for { - allGuids <- breakdownCacheRepo.findAllSampleGuids() - count <- generatePopulationOverlapSuggestions(allGuids) - expired <- suggestionRepo.expireOld(ZonedDateTime.now()) - } yield { - logger.info(s"Generated $count suggestions, expired $expired old suggestions") - count - } - } - - private def generatePopulationOverlapSuggestions(sampleGuids: Seq[UUID]): Future[Int] = { - Future.traverse(sampleGuids) { guid => - overlapScoreRepo.findBySample(guid, populationOverlapThreshold).flatMap { overlaps => - suggestionRepo.countByTargetSample(guid).flatMap { currentCount => - val available = maxSuggestionsPerUser - currentCount - if (available <= 0) Future.successful(0) - else { - val newSuggestions = overlaps.take(available).map { overlap => - val otherGuid = if (overlap.sampleGuid1 == guid) overlap.sampleGuid2 else overlap.sampleGuid1 - MatchSuggestion( - id = None, - targetSampleGuid = guid, - suggestedSampleGuid = otherGuid, - suggestionType = "POPULATION_OVERLAP", - score = overlap.overlapScore, - metadata = Some(Json.obj("overlapScore" -> overlap.overlapScore)), - status = "ACTIVE", - createdAt = ZonedDateTime.now(), - expiresAt = Some(ZonedDateTime.now().plusDays(suggestionExpiryDays)) - ) - } - if (newSuggestions.nonEmpty) { - suggestionRepo.createBatch(newSuggestions).map(_.size).recover { - case e: Exception => - logger.warn(s"Some suggestions may already exist for $guid: ${e.getMessage}") - 0 - } - } else Future.successful(0) - } - } - } - }.map(_.sum) - } - - // --- Match Requests --- - - override def createMatchRequest(request: MatchRequestTracking): Future[MatchRequestTracking] = - requestRepo.create(request) - - override def getMatchRequest(atUri: String): Future[Option[MatchRequestTracking]] = - requestRepo.findByAtUri(atUri) - - override def getPendingRequests(sampleGuid: UUID): Future[Seq[MatchRequestTracking]] = - requestRepo.findPendingForSample(sampleGuid) - - override def getSentRequests(did: String): Future[Seq[MatchRequestTracking]] = - requestRepo.findSentByDid(did) - - override def cancelRequest(atUri: String): Future[Boolean] = - requestRepo.updateStatus(atUri, "CANCELLED") - - // --- Consent --- - - override def trackConsent(consent: MatchConsentTracking): Future[MatchConsentTracking] = - consentRepo.upsertFromFirehose(consent) - - override def getConsentStatus(requestUri: String): Future[Option[ConsentStatus]] = { - requestRepo.findByAtUri(requestUri).flatMap { - case None => Future.successful(None) - case Some(request) => - for { - fromConsent <- consentRepo.findActiveConsentForSample(request.fromSampleGuid) - toConsent <- consentRepo.findActiveConsentForSample(request.toSampleGuid) - } yield { - val requesterConsented = fromConsent.isDefined - val targetConsented = toConsent.isDefined - Some(ConsentStatus( - requestUri = requestUri, - requesterConsented = requesterConsented, - targetConsented = targetConsented, - mutualConsent = requesterConsented && targetConsented - )) - } - } - } - - override def revokeConsent(atUri: String): Future[Boolean] = - consentRepo.revoke(atUri) -} diff --git a/app/services/ibd/PopulationAnalysisService.scala b/app/services/ibd/PopulationAnalysisService.scala deleted file mode 100644 index e1d2b85b..00000000 --- a/app/services/ibd/PopulationAnalysisService.scala +++ /dev/null @@ -1,110 +0,0 @@ -package services.ibd - -import jakarta.inject.{Inject, Singleton} -import models.domain.ibd.{PopulationBreakdownCache, PopulationOverlapScore} -import play.api.libs.json.{JsValue, Json} -import play.api.Logging -import repositories.{PopulationBreakdownCacheRepository, PopulationOverlapScoreRepository} - -import java.security.MessageDigest -import java.time.ZonedDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -trait PopulationAnalysisService { - def cacheBreakdown(sampleGuid: UUID, breakdown: JsValue, sourceAtUri: Option[String]): Future[PopulationBreakdownCache] - def getBreakdown(sampleGuid: UUID): Future[Option[PopulationBreakdownCache]] - def computeOverlap(guid1: UUID, guid2: UUID): Future[Option[Double]] - def getOverlapScore(guid1: UUID, guid2: UUID): Future[Option[PopulationOverlapScore]] - def computeAllOverlapScores(): Future[Int] - def removeBreakdown(sampleGuid: UUID): Future[Boolean] -} - -@Singleton -class PopulationAnalysisServiceImpl @Inject()( - breakdownCacheRepo: PopulationBreakdownCacheRepository, - overlapScoreRepo: PopulationOverlapScoreRepository -)(implicit ec: ExecutionContext) extends PopulationAnalysisService with Logging { - - override def cacheBreakdown(sampleGuid: UUID, breakdown: JsValue, sourceAtUri: Option[String]): Future[PopulationBreakdownCache] = { - val hash = sha256(Json.stringify(breakdown)) - val entry = PopulationBreakdownCache( - id = None, - sampleGuid = sampleGuid, - breakdown = breakdown, - breakdownHash = hash, - cachedAt = ZonedDateTime.now(), - sourceAtUri = sourceAtUri - ) - breakdownCacheRepo.upsert(entry) - } - - override def getBreakdown(sampleGuid: UUID): Future[Option[PopulationBreakdownCache]] = - breakdownCacheRepo.findBySampleGuid(sampleGuid) - - override def computeOverlap(guid1: UUID, guid2: UUID): Future[Option[Double]] = { - for { - bd1 <- breakdownCacheRepo.findBySampleGuid(guid1) - bd2 <- breakdownCacheRepo.findBySampleGuid(guid2) - } yield { - for { - b1 <- bd1 - b2 <- bd2 - } yield calculateOverlapScore(b1.breakdown, b2.breakdown) - } - } - - override def getOverlapScore(guid1: UUID, guid2: UUID): Future[Option[PopulationOverlapScore]] = - overlapScoreRepo.findByPair(guid1, guid2) - - override def computeAllOverlapScores(): Future[Int] = { - breakdownCacheRepo.findAll().flatMap { allBreakdowns => - val pairs = for { - i <- allBreakdowns.indices - j <- (i + 1) until allBreakdowns.size - } yield (allBreakdowns(i), allBreakdowns(j)) - - Future.traverse(pairs) { case (bd1, bd2) => - val score = calculateOverlapScore(bd1.breakdown, bd2.breakdown) - val overlapScore = PopulationOverlapScore( - id = None, - sampleGuid1 = bd1.sampleGuid, - sampleGuid2 = bd2.sampleGuid, - overlapScore = score, - computedAt = ZonedDateTime.now() - ) - overlapScoreRepo.upsert(overlapScore) - }.map(_.size) - } - } - - override def removeBreakdown(sampleGuid: UUID): Future[Boolean] = - breakdownCacheRepo.deleteBySampleGuid(sampleGuid) - - private[ibd] def calculateOverlapScore(breakdown1: JsValue, breakdown2: JsValue): Double = { - val map1 = breakdownToMap(breakdown1) - val map2 = breakdownToMap(breakdown2) - val allPops = map1.keySet ++ map2.keySet - allPops.toSeq.map { pop => - math.min(map1.getOrElse(pop, 0.0), map2.getOrElse(pop, 0.0)) - }.sum - } - - private def breakdownToMap(breakdown: JsValue): Map[String, Double] = { - breakdown.asOpt[Map[String, Double]].getOrElse { - breakdown.asOpt[Seq[Map[String, JsValue]]].map { components => - components.flatMap { comp => - for { - pop <- (comp.get("population").orElse(comp.get("name"))).flatMap(_.asOpt[String]) - pct <- (comp.get("percentage").orElse(comp.get("fraction"))).flatMap(_.asOpt[Double]) - } yield pop -> pct - }.toMap - }.getOrElse(Map.empty) - } - } - - private def sha256(input: String): String = { - val digest = MessageDigest.getInstance("SHA-256") - digest.digest(input.getBytes("UTF-8")).map("%02x".format(_)).mkString - } -} diff --git a/app/services/mappers/GenomicStudyMappers.scala b/app/services/mappers/GenomicStudyMappers.scala deleted file mode 100644 index 5eb925ac..00000000 --- a/app/services/mappers/GenomicStudyMappers.scala +++ /dev/null @@ -1,143 +0,0 @@ -package services.mappers - -import com.vividsolutions.jts.geom.{Coordinate, GeometryFactory} -import models.domain.genomics.{BiologicalSex, Biosample, BiosampleType, SpecimenDonor} -import models.domain.publications.{GenomicStudy, StudySource} -import services.ena.{EnaBiosampleData, EnaStudyData} -import services.ncbi.{SraBiosampleData, SraStudyData} - -import java.util.UUID - -object GenomicStudyMappers { - private val ValidSexValues = Set("male", "female", "intersex") - private val geometryFactory = new GeometryFactory() - - // Existing genomic study mapping methods remain unchanged - def enaToGenomicStudy(ena: EnaStudyData): GenomicStudy = GenomicStudy( - id = None, - accession = ena.accession, - title = ena.title.take(255), - centerName = ena.centerName, - studyName = ena.studyName, - details = ena.details, - source = StudySource.ENA, - submissionDate = None, - bioProjectId = None, - lastUpdate = None, - molecule = None, - topology = None, - taxonomyId = None, - version = None - ) - - def sraToGenomicStudy(sra: SraStudyData): GenomicStudy = GenomicStudy( - id = None, - accession = sra.studyName, - title = sra.title.take(255), - centerName = sra.centerName, - studyName = sra.studyName, - details = sra.description, - source = StudySource.NCBI_BIOPROJECT, - submissionDate = None, - bioProjectId = sra.bioProjectId, - lastUpdate = None, - molecule = None, - topology = None, - taxonomyId = None, - version = None - ) - - case class BiosampleMappingResult( - biosample: Biosample, - specimenDonor: Option[SpecimenDonor] - ) - - def enaToBiosample(ena: EnaBiosampleData): BiosampleMappingResult = { - val geoCoord = (ena.latitude, ena.longitude) match { - case (Some(lat), Some(lon)) => - Some(geometryFactory.createPoint(new Coordinate(lon, lat))) - case _ => None - } - - val donorId = UUID.randomUUID().toString - - val specimenDonor = if (ena.sex.isDefined || geoCoord.isDefined) { - Some(SpecimenDonor( - id = None, - donorIdentifier = donorId, - originBiobank = ena.centerName, - donorType = BiosampleType.Standard, - sex = ena.sex.map(BiologicalSex.valueOf), - geocoord = geoCoord, - pgpParticipantId = None, - atUri = None, - dateRangeStart = None, - dateRangeEnd = None - )) - } else None - - val biosample = Biosample( - id = None, - sampleGuid = UUID.randomUUID(), - sampleAccession = ena.sampleAccession, - description = ena.description, - alias = ena.alias, - centerName = ena.centerName, - specimenDonorId = None, // Will be set after donor is created - locked = false, - sourcePlatform = None - ) - - BiosampleMappingResult(biosample, specimenDonor) - } - - def sraToBiosample(sra: SraBiosampleData): BiosampleMappingResult = { - val sex = validateSex(sra.attributes.get("sex")) - val coordinates = for { - lat <- sra.attributes.get("latitude") - .orElse(sra.attributes.get("lat")) - .flatMap(_.toDoubleOption) - lon <- sra.attributes.get("longitude") - .orElse(sra.attributes.get("lon")) - .flatMap(_.toDoubleOption) - } yield geometryFactory.createPoint(new Coordinate(lon, lat)) - - val donorId = UUID.randomUUID().toString - - val specimenDonor = if (sex.isDefined || coordinates.isDefined) { - Some(SpecimenDonor( - id = None, - donorIdentifier = donorId, - originBiobank = sra.centerName, - donorType = BiosampleType.Standard, - sex = sex.map(BiologicalSex.valueOf), - geocoord = coordinates, - pgpParticipantId = None, - atUri = None, - dateRangeStart = None, - dateRangeEnd = None - )) - } else None - - val biosample = Biosample( - id = None, - sampleGuid = UUID.randomUUID(), - sampleAccession = sra.sampleAccession, - description = sra.description, - alias = sra.alias, - centerName = sra.centerName, - specimenDonorId = None, // Will be set after donor is created - locked = false, - sourcePlatform = None - ) - - BiosampleMappingResult(biosample, specimenDonor) - } - - private def validateSex(sex: Option[String]): Option[String] = { - sex.flatMap { s => - val normalized = s.toLowerCase.trim - Some(normalized).filter(ValidSexValues.contains) - } - } -} \ No newline at end of file diff --git a/app/services/mappers/OpenAlexMapper.scala b/app/services/mappers/OpenAlexMapper.scala deleted file mode 100644 index 1537b0da..00000000 --- a/app/services/mappers/OpenAlexMapper.scala +++ /dev/null @@ -1,185 +0,0 @@ -package services.mappers - -import models.domain.publications.{Publication, PublicationCandidate} -import play.api.Logging -import play.api.libs.json.{JsArray, JsValue} - -import java.time.LocalDate -import java.time.LocalDateTime -import java.time.format.DateTimeParseException - -/** - * Object responsible for mapping OpenAlex JSON data to domain models. - * Provides methods to extract structured information and transform - * the JSON into a `Publication` object with relevant details. - */ -object OpenAlexMapper extends Logging { - private case class BasicInfo( - openAlexId: Option[String], - pubmedId: Option[String], - title: String - ) - - private case class PublishingInfo( - journal: Option[String], - publisher: Option[String], - publicationDate: Option[LocalDate] - ) - - private case class AccessInfo( - openAccessStatus: Option[String], - openAccessUrl: Option[String], - determinedUrl: Option[String] - ) - - private case class Metrics( - citedByCount: Option[Int], - citationNormalizedPercentile: Option[Float] - ) - - private case class ClassificationInfo( - primaryTopic: Option[String], - publicationType: Option[String] - ) - - private def extractBasicInfo(json: JsValue): BasicInfo = { - BasicInfo( - openAlexId = (json \ "id").asOpt[String].map(_.split("/").last), - pubmedId = (json \ "ids" \ "pmid").asOpt[String].map(_.replace("https://pubmed.ncbi.nlm.nih.gov/", "")), - title = (json \ "title").asOpt[String].getOrElse("Untitled") - ) - } - - private def extractAuthors(json: JsValue): Option[String] = { - (json \ "authorships").asOpt[JsArray].map { jsArray => - val authors = jsArray.value.flatMap { authorship => - (authorship \ "author" \ "display_name").asOpt[String] - } - authors.toList match { - case Nil => "No authors listed" - case List(a) => a - case List(a, b) => s"$a and $b" - case List(a, b, c) => s"$a, $b and $c" - case List(a, b, c, _*) => s"$a, $b, $c et al." - } - } - } - - private def extractAbstract(json: JsValue): Option[String] = { - (json \ "abstract_inverted_index").asOpt[Map[String, JsArray]].map { invertedIndex => - val wordsWithPositions = invertedIndex.flatMap { case (word, positionsArray) => - positionsArray.as[List[Int]].map(pos => (pos, word)) - } - wordsWithPositions.toSeq.sortBy(_._1).map(_._2).mkString(" ") - } - } - - private def extractPublishingInfo(json: JsValue, doi: String): PublishingInfo = { - val journal = (json \ "primary_location" \ "source" \ "display_name").asOpt[String] - - val publisherFromPrimary = (json \ "primary_location" \ "source" \ "host_organization_name").asOpt[String] - val publisherFromBestOa = (json \ "best_oa_location" \ "source" \ "host_organization_name").asOpt[String] - val publisher = publisherFromPrimary.orElse(publisherFromBestOa) - - val publicationDate = (json \ "publication_date").asOpt[String].flatMap { dateString => - try { - Some(LocalDate.parse(dateString)) - } catch { - case e: DateTimeParseException => - logger.warn(s"Failed to parse publication_date '$dateString' for DOI '$doi': ${e.getMessage}") - None - } - } - - PublishingInfo(journal, publisher, publicationDate) - } - - private def extractAccessInfo(json: JsValue, doi: Option[String]): AccessInfo = { - val openAccessStatus = (json \ "open_access" \ "oa_status").asOpt[String] - val openAccessUrl = (json \ "best_oa_location" \ "pdf_url").asOpt[String] - val determinedUrl = openAccessUrl.orElse(doi.map(d => s"https://doi.org/$d")) - - AccessInfo(openAccessStatus, openAccessUrl, determinedUrl) - } - - private def extractMetrics(json: JsValue): Metrics = { - Metrics( - citedByCount = (json \ "cited_by_count").asOpt[Int], - citationNormalizedPercentile = (json \ "citation_normalized_percentile" \ "value").asOpt[Float] - ) - } - - private def extractClassification(json: JsValue): ClassificationInfo = { - ClassificationInfo( - primaryTopic = (json \ "primary_topic" \ "display_name").asOpt[String], - publicationType = (json \ "type").asOpt[String] - ) - } - - /** - * Converts a JSON representation of a publication and its DOI into a `Publication` object. - * - * @param json The JSON structure containing the publication data. - * @param doi A string representing the DOI (Digital Object Identifier) of the publication. - * @return A `Publication` object populated with data extracted from the provided JSON and DOI. - */ - def jsonToPublication(json: JsValue, doi: String): Publication = { - val basicInfo = extractBasicInfo(json) - val authors = extractAuthors(json) - val abstractSummary = extractAbstract(json) - val publishingInfo = extractPublishingInfo(json, doi) - val accessInfo = extractAccessInfo(json, Some(doi)) - val metrics = extractMetrics(json) - val classification = extractClassification(json) - - Publication( - id = None, - openAlexId = basicInfo.openAlexId, - pubmedId = basicInfo.pubmedId, - doi = Some(doi), - title = basicInfo.title, - authors = authors, - abstractSummary = abstractSummary, - journal = publishingInfo.journal, - publicationDate = publishingInfo.publicationDate, - url = accessInfo.determinedUrl, - citationNormalizedPercentile = metrics.citationNormalizedPercentile, - citedByCount = metrics.citedByCount, - openAccessStatus = accessInfo.openAccessStatus, - openAccessUrl = accessInfo.openAccessUrl, - primaryTopic = classification.primaryTopic, - publicationType = classification.publicationType, - publisher = publishingInfo.publisher - ) - } - - /** - * Converts a JSON representation of a work from OpenAlex into a `PublicationCandidate` object. - * - * @param json The JSON structure containing the work data. - * @return A `PublicationCandidate` object. - */ - def jsonToPublicationCandidate(json: JsValue): PublicationCandidate = { - val basicInfo = extractBasicInfo(json) - val abstractSummary = extractAbstract(json) - val doi = (json \ "doi").asOpt[String].map(_.replace("https://doi.org/", "")) - val publishingInfo = extractPublishingInfo(json, doi.getOrElse("")) - - PublicationCandidate( - id = None, - openAlexId = basicInfo.openAlexId.getOrElse("unknown"), - doi = doi, - title = basicInfo.title, - `abstract` = abstractSummary, - publicationDate = publishingInfo.publicationDate, - journalName = publishingInfo.journal, - relevanceScore = None, // To be calculated by the service - discoveryDate = LocalDateTime.now(), - status = "pending", - reviewedBy = None, - reviewedAt = None, - rejectionReason = None, - rawMetadata = Some(json) - ) - } -} \ No newline at end of file diff --git a/app/services/ncbi/NcbiApiClient.scala b/app/services/ncbi/NcbiApiClient.scala deleted file mode 100644 index 9d38a0dc..00000000 --- a/app/services/ncbi/NcbiApiClient.scala +++ /dev/null @@ -1,245 +0,0 @@ -package services.ncbi - -import org.apache.pekko.stream.scaladsl.{Keep, Sink, Source} -import org.apache.pekko.stream.{Materializer, OverflowStrategy} -import play.api.Logging -import play.api.libs.json.{JsArray, JsObject, JsValue} -import play.api.libs.ws.{WSClient, WSRequest, WSResponse} - -import javax.inject.{Inject, Singleton} -import scala.concurrent.duration.* -import scala.concurrent.{ExecutionContext, Future, Promise} -import scala.util.{Failure, Success} - -case class NcbiRateLimitException(message: String) extends Exception(message) - -case class SraStudyData( - title: String, - centerName: String, - studyName: String, - description: String, - bioProjectId: Option[String], - biosampleIds: Seq[String] - ) - -case class SraBiosampleData( - sampleAccession: String, - description: String, - alias: Option[String], - centerName: String, - attributes: Map[String, String] - ) - -/** - * A client for interacting with the NCBI Entrez Programming Utilities (E-utilities) API. - * This client supports querying and fetching study and biosample data from NCBI databases, - * while handling API rate limits and retries as specified by NCBI's usage guidelines. - * - * @constructor Initializes the `NcbiApiClient` with the necessary injected dependencies. - * @param ws The `WSClient` used for making HTTP requests. - * @param ec The execution context for asynchronous operations. - * @param mat The materializer for Akka streams. - */ -@Singleton -class NcbiApiClient @Inject()(ws: WSClient)(implicit ec: ExecutionContext, mat: Materializer) extends Logging { - private val baseUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" - private implicit val system: org.apache.pekko.actor.ActorSystem = mat.system.classicSystem - - // Create a queue that processes requests with rate limiting - private val (queue, _) = Source.queue[(WSRequest, Promise[WSResponse])]( - bufferSize = 100, - overflowStrategy = OverflowStrategy.backpressure - ).throttle(2, 1.second) // NCBI's limit - .mapAsync(1) { case (request, promise) => - request.get() - .map { response => - if (response.status == 429) { - promise.failure(NcbiRateLimitException(response.body)) - throw NcbiRateLimitException(response.body) - } else { - promise.success(response) - response - } - } - .recover { case e => - promise.failure(e) - throw e - } - } - .toMat(Sink.ignore)(Keep.both) - .run() - - private def makeRequest(request: WSRequest, retries: Int = 3): Future[WSResponse] = { - request.get().flatMap { response => - (response.json \ "error").asOpt[String] match { - case Some(error) if error.contains("API rate limit exceeded") && retries > 0 => - org.apache.pekko.pattern.after(1500.millis)(makeRequest(request, retries - 1)) - case Some(error) => - Future.failed(NcbiRateLimitException(error)) - case None => - Future.successful(response) - } - } - } - - - /** - * Retrieves detailed information about an SRA study or BioProject based on its accession identifier. - * For BioProject accessions, the details are directly retrieved from the BioProject database. - * For SRA accessions, the method first resolves the corresponding BioProject, then retrieves its details. - * - * @param accession The unique accession identifier for the SRA study or BioProject. - * For BioProject, this typically starts with "PRJNA". For SRA accessions, - * additional queries are performed to resolve the corresponding BioProject. - * @return A Future containing an Option of SraStudyData. The Option is None if no details are found. - * The SraStudyData contains metadata such as title, center name, description, and associated biosamples. - */ - def getSraStudyDetails(accession: String): Future[Option[SraStudyData]] = { - if (accession.startsWith("PRJNA")) { - // Direct BioProject query - val bioProjectRequest = ws.url(s"$baseUrl/esummary.fcgi") - .withQueryStringParameters( - "db" -> "bioproject", - "id" -> accession.substring(5), // Remove "PRJNA" prefix - "retmode" -> "json" - ) - - makeRequest(bioProjectRequest).map { response => - for { - result <- (response.json \ "result").asOpt[JsObject] - data <- (result \ "uids").asOpt[JsArray].flatMap(_.value.headOption) - .flatMap(uid => (result \ uid.as[String]).asOpt[JsObject]) - } yield { - SraStudyData( - title = (data \ "project_title").asOpt[String].getOrElse(""), - centerName = (data \ "organization").asOpt[String].getOrElse("N/A"), - studyName = accession, - description = (data \ "project_description").asOpt[String].getOrElse(""), - bioProjectId = Some(accession), - biosampleIds = Seq.empty // We'll get these in a separate call - ) - } - } - } else { - // For SRA accessions, first get the BioProject ID, then get its details - val searchRequest = ws.url(s"$baseUrl/esearch.fcgi") - .withQueryStringParameters( - "db" -> "sra", - "term" -> accession, - "retmode" -> "json" - ) - - makeRequest(searchRequest).flatMap { searchResponse => - val ids = (searchResponse.json \\ "idlist").headOption - .map(_.as[Seq[String]]) - .getOrElse(Seq.empty) - - if (ids.isEmpty) { - Future.successful(None) - } else { - val summaryRequest = ws.url(s"$baseUrl/esummary.fcgi") - .withQueryStringParameters( - "db" -> "sra", - "id" -> ids.head, - "retmode" -> "json" - ) - - makeRequest(summaryRequest).flatMap { summaryResponse => - val bioProjectIdOpt = for { - result <- (summaryResponse.json \ "result").asOpt[JsObject] - docsum <- result.value.get(ids.head).flatMap(_.asOpt[JsObject]) - expXmlStr <- docsum.value.get("expxml").flatMap(_.asOpt[String]) - xml = scala.xml.XML.loadString(s"${expXmlStr.trim}") - bioProjectId <- (xml \\ "Bioproject").headOption.map(_.text) - } yield bioProjectId - - bioProjectIdOpt match { - case Some(bioProjectId) => - org.apache.pekko.pattern.after(1500.millis)(getSraStudyDetails(bioProjectId)) - case None => Future.successful(None) - } - } - } - } - } - } - - - /** - * Retrieves a list of biosample metadata associated with a given SRA BioProject accession. - * The function queries the NCBI Entrez API to collect biosample data, including attributes, - * aliases, and descriptions. - * - * @param accession The unique accession identifier for the SRA BioProject. Typically starts with "PRJNA". - * @return A Future containing a sequence of SraBiosampleData. If no biosample data is identified, an empty sequence is returned. - */ - def getSraBiosamples(accession: String): Future[Seq[SraBiosampleData]] = { - val searchRequest = ws.url(s"$baseUrl/esearch.fcgi") - .withQueryStringParameters( - "db" -> "sra", - "term" -> s"$accession[BioProject]", - "retmode" -> "json" - ) - - makeRequest(searchRequest).flatMap { searchResponse => - val ids = (searchResponse.json \\ "idlist").headOption - .map(_.as[Seq[String]]) - .getOrElse(Seq.empty) - - if (ids.isEmpty) { - Future.successful(Seq.empty) - } else { - // Get all experiment details in one call - val summaryRequest = ws.url(s"$baseUrl/esummary.fcgi") - .withQueryStringParameters( - "db" -> "sra", - "id" -> ids.mkString(","), - "retmode" -> "json" - ) - - makeRequest(summaryRequest).map { summaryResponse => - val result = for { - resultObj <- (summaryResponse.json \ "result").asOpt[JsObject] - // Remove the uids key which contains duplicate data - experiments = resultObj.value.view.filterKeys(_ != "uids").toMap - } yield { - experiments.flatMap { case (_, expJson) => - try { - val expXmlStr = (expJson \ "expxml").as[String] - val xml = scala.xml.XML.loadString(s"${expXmlStr.trim}") - - (xml \\ "Biosample").headOption.map(_.text).filter(_.nonEmpty).map { sampleAccession => - val attributes = (xml \\ "Attributes" \\ "Attribute").map { attr => - ((attr \ "@name").text, attr.text) - }.toMap - - // Try to get sample name from various possible locations - val sampleName = (xml \\ "Sample" \ "@alias").headOption.map(_.text) - .orElse((xml \\ "Sample_Name").headOption.map(_.text)) - .orElse((xml \\ "SAMPLE_NAME").headOption.map(_.text)) - .orElse((xml \\ "Sample" \ "SAMPLE_NAME").headOption.map(_.text)) - - SraBiosampleData( - sampleAccession = sampleAccession, - description = (xml \\ "Summary" \\ "Title").headOption.map(_.text) - .getOrElse("No description available"), - alias = sampleName.orElse((xml \\ "Library_descriptor" \\ "LIBRARY_NAME").headOption.map(_.text)), - centerName = (xml \\ "Submitter" \\ "@center_name").headOption.map(_.text) - .getOrElse("N/A"), - attributes = attributes - ) - } - } catch { - case e: Exception => - logger.error(s"Error parsing experiment XML: ${e.getMessage}") - None - } - } - } - - result.getOrElse(Seq.empty).toSeq - } - } - } - } -} \ No newline at end of file diff --git a/app/services/social/ReputationGuard.scala b/app/services/social/ReputationGuard.scala deleted file mode 100644 index 8fe5fe20..00000000 --- a/app/services/social/ReputationGuard.scala +++ /dev/null @@ -1,27 +0,0 @@ -package services.social - -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} -import java.util.UUID - -@Singleton -class ReputationGuard @Inject()( - reputationService: ReputationService - )(implicit ec: ExecutionContext) { - - private val POST_FEED_THRESHOLD = 10 - private val INITIATE_DM_THRESHOLD = 20 - private val CREATE_GROUP_THRESHOLD = 50 - - def canPostToFeed(userId: UUID): Future[Boolean] = { - reputationService.getScore(userId).map(_ >= POST_FEED_THRESHOLD) - } - - def canInitiateDM(userId: UUID): Future[Boolean] = { - reputationService.getScore(userId).map(_ >= INITIATE_DM_THRESHOLD) - } - - def canCreateGroup(userId: UUID): Future[Boolean] = { - reputationService.getScore(userId).map(_ >= CREATE_GROUP_THRESHOLD) - } -} diff --git a/app/services/social/ReputationService.scala b/app/services/social/ReputationService.scala deleted file mode 100644 index fb444dc0..00000000 --- a/app/services/social/ReputationService.scala +++ /dev/null @@ -1,86 +0,0 @@ -package services.social - -import models.dal.DatabaseSchema -import models.domain.social.{ReputationEvent, UserReputationScore} -import play.api.Logging -import play.api.db.slick.{DatabaseConfigProvider, HasDatabaseConfigProvider} -import slick.jdbc.PostgresProfile - -import java.time.LocalDateTime -import java.util.UUID -import javax.inject.{Inject, Singleton} -import scala.concurrent.{ExecutionContext, Future} - -@Singleton -class ReputationService @Inject()( - protected val dbConfigProvider: DatabaseConfigProvider - )(implicit ec: ExecutionContext) extends HasDatabaseConfigProvider[PostgresProfile] with Logging { - - import profile.api.* - - private val events = DatabaseSchema.domain.social.reputationEvents - private val eventTypes = DatabaseSchema.domain.social.reputationEventTypes - private val scores = DatabaseSchema.domain.social.userReputationScores - - /** - * Records a reputation event and updates the user's score transactionally. - * - * @param userId The ID of the user receiving the reputation change. - * @param eventTypeName The unique name of the event type (e.g., "ACCOUNT_VERIFIED"). - * @param relatedEntity Optional tuple of (EntityType, EntityId) to link the event to a specific object. - * @param sourceUserId Optional ID of the user who triggered the event (e.g., the upvoter). - * @param notes Optional notes or reasoning. - * @return A Future containing the user's new total score. - */ - def recordEvent( - userId: UUID, - eventTypeName: String, - relatedEntity: Option[(String, UUID)] = None, - sourceUserId: Option[UUID] = None, - notes: Option[String] = None - ): Future[Long] = { - - val action = for { - // 1. Lookup Event Type - eventTypeOpt <- eventTypes.filter(_.name === eventTypeName).result.headOption - eventType = eventTypeOpt.getOrElse(throw new IllegalArgumentException(s"ReputationEventType '$eventTypeName' not found")) - - // 2. Create Event Record - event = ReputationEvent( - userId = userId, - eventTypeId = eventType.id.get, - actualPointsChange = eventType.defaultPointsChange, - sourceUserId = sourceUserId, - relatedEntityType = relatedEntity.map(_._1), - relatedEntityId = relatedEntity.map(_._2), - notes = notes, - createdAt = LocalDateTime.now() - ) - _ <- events += event - - // 3. Update User Score (Upsert) - // Lock the row for update to prevent race conditions if needed, but for simple increments, atomic SQL is often enough. - // Here we'll fetch, calculate, and update within the transaction. - currentScoreOpt <- scores.filter(_.userId === userId).result.headOption - newScoreVal = currentScoreOpt.map(_.score).getOrElse(0L) + eventType.defaultPointsChange - _ <- scores.insertOrUpdate(UserReputationScore(userId, newScoreVal, LocalDateTime.now())) - - } yield newScoreVal - - db.run(action.transactionally).recover { - case e: Exception => - logger.error(s"Failed to record reputation event '$eventTypeName' for user $userId", e) - throw e - } - } - - /** - * Retrieves the current reputation score for a user. - * - * @param userId The user's ID. - * @return Future[Long] representing the score (defaults to 0 if not found). - */ - def getScore(userId: UUID): Future[Long] = { - db.run(scores.filter(_.userId === userId).map(_.score).result.headOption).map(_.getOrElse(0L)) - } -} diff --git a/app/services/tree/TreeMergeAlgorithmService.scala b/app/services/tree/TreeMergeAlgorithmService.scala deleted file mode 100644 index 39e4ba01..00000000 --- a/app/services/tree/TreeMergeAlgorithmService.scala +++ /dev/null @@ -1,876 +0,0 @@ -package services.tree - -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.api.haplogroups.* -import models.domain.haplogroups.{ExistingTree, ExistingTreeNode, Haplogroup, HaplogroupProvenance, MergeAccumulator, MergeCase, MergeContext, VariantCache, VariantIndex} -import play.api.Logging -import play.api.libs.json.Json -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantV2Repository} -import services.{TreeMergeStagingHelper, TreeVersioningService} - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -/** - * Callback trait for change set tracking. - * - * The orchestrator (HaplogroupTreeMergeService) provides implementations - * of these callbacks to track changes without the algorithm service - * needing direct access to TreeVersioningService. - */ -trait ChangeSetCallbacks { - /** Record creation of a new haplogroup */ - def recordCreate(haplogroupJson: String, parentId: Option[Int]): Unit - - /** Record reparenting of an existing haplogroup */ - def recordReparent(haplogroupId: Int, oldParentId: Option[Int], newParentId: Int): Unit -} - -/** - * Service implementing the core Identify-Match-Graft tree merge algorithm. - * - * This service contains the algorithmic logic for merging external haplogroup trees - * into the DecodingUs baseline tree. It accepts callbacks from the orchestrator for - * change set tracking, keeping concerns separated. - * - * == Algorithm Overview == - * - * The Identify-Match-Graft algorithm operates in four phases: - * - * Phase 1 (Normalization): Both trees converted to standardized format with: - * - U(N): Unique SNP set - variants defined at this node only - * - C(N): Cumulative SNP set - all variants from root to N - * - * Phase 2 (Alignment): Parallel tree traversal using SNP set intersection. - * - * Phase 3 (Conflict Resolution): Four-way classification: - * - Case A (FULL_MATCH): U(T₁) = U(T₀) - * - Case B (SOURCE_IS_ANCESTOR): U(T₁) ⊂ U(T₀) - * - Case C (SOURCE_IS_DESCENDANT): U(T₁) ⊃ U(T₀) - * - Case D (DISJOINT_BRANCH): partial or no overlap - * - * Phase 4 (Grafting): Node contraction when T₁ provides finer resolution. - */ -@Singleton -class TreeMergeAlgorithmService @Inject()( - haplogroupRepository: HaplogroupCoreRepository, - haplogroupVariantRepository: HaplogroupVariantRepository, - variantV2Repository: VariantV2Repository, - stagingHelper: TreeMergeStagingHelper, - provenanceService: TreeMergeProvenanceService, - variantMatchingService: VariantMatchingService -)(implicit ec: ExecutionContext) extends Logging { - - // ============================================================================ - // Helper methods for VariantInput - // ============================================================================ - - /** Extract all variant names (primary + aliases) from a VariantInput */ - private def allVariantNames(variant: VariantInput): List[String] = - variant.name :: variant.aliases - - /** Extract all variant names from a list of VariantInput */ - private def allVariantNames(variants: List[VariantInput]): List[String] = - variants.flatMap(allVariantNames) - - /** Extract just the primary variant names from a list of VariantInput */ - private def primaryVariantNames(variants: List[VariantInput]): List[String] = - variants.map(_.name) - - /** Recursively collect all variant names from a PhyloNodeInput tree */ - private def collectAllVariantNames(node: PhyloNodeInput): List[String] = { - val nodeVariants = allVariantNames(node.variants) - val childVariants = node.children.flatMap(collectAllVariantNames) - nodeVariants ++ childVariants - } - - /** Count total nodes in a tree */ - private def countNodes(node: PhyloNodeInput): Int = { - 1 + node.children.map(countNodes).sum - } - - /** - * Recursively fetch all descendants of a haplogroup. - */ - def getDescendantsRecursive(haplogroupId: Int): Future[Seq[Haplogroup]] = { - haplogroupRepository.getDescendants(haplogroupId) - } - - // ============================================================================ - // Core Algorithm Entry Point - // ============================================================================ - - /** - * Perform the actual merge operation using parallel tree traversal. - * - * Key insight: We traverse both trees (source and existing) in parallel, - * only matching source children against existing children of already-matched parents. - * This prevents cross-branch mismatches that would cause incorrect reparenting. - * - * @param haplogroupType Y or mtDNA - * @param anchorId Optional parent ID for subtree merges - * @param sourceTree The incoming tree structure - * @param sourceName Name of the source (e.g., "ISOGG") - * @param priorityConfig Source priority configuration - * @param conflictStrategy How to resolve conflicts - * @param preloadedIndex Optional pre-built variant index (for subtree merges) - * @param context Merge context with change set info - * @param callbacks Optional callbacks for change set tracking - * @return TreeMergeResponse with statistics and any conflicts/ambiguities - */ - def performMerge( - haplogroupType: HaplogroupType, - anchorId: Option[Int], - sourceTree: PhyloNodeInput, - sourceName: String, - priorityConfig: SourcePriorityConfig, - conflictStrategy: ConflictStrategy, - preloadedIndex: Option[VariantIndex], - context: MergeContext, - callbacks: Option[ChangeSetCallbacks] - ): Future[TreeMergeResponse] = { - val now = context.timestamp - val nodeCount = countNodes(sourceTree) - - // Reset placeholder counter for this merge operation - stagingHelper.resetPlaceholderCounter() - - logger.info(s"Starting merge for source '$sourceName' with $nodeCount nodes (stagingMode: ${context.stagingMode})") - - for { - // Phase 1a: Build in-memory tree of existing haplogroups with indexes - existingTreeOpt <- variantMatchingService.buildExistingTree(haplogroupType).map(_.map(ExistingTree.fromRoot)) - _ = logger.info(s"Existing tree built with indexes: ${existingTreeOpt.map(t => s"${t.byName.size} nodes").getOrElse("no root found")}") - - // Phase 1b: Preload all variants from the source tree - allVariantNamesInTree = collectAllVariantNames(sourceTree).distinct - _ = logger.info(s"Preloading ${allVariantNamesInTree.size} distinct variant names...") - variantLookup <- { - val startTime = System.currentTimeMillis() - variantV2Repository.searchByNames(allVariantNamesInTree).map { result => - val elapsed = System.currentTimeMillis() - startTime - logger.info(s"Variant lookup completed in ${elapsed}ms, found ${result.size} unique variants") - result - } - } - variantCache = VariantCache( - nameToVariantId = variantLookup.flatMap { case (name, variants) => - variants.flatMap(_.variantId).headOption.map(id => name -> id) - } - ) - _ = logger.info(s"Variant cache built with ${variantCache.nameToVariantId.size} entries") - - // Phase 2: Compute root's cumulative variants and find matching node - // For ROOT node: try variant match first, then fall back to name match - // (Root nodes often have no variants, just a name like "Y") - rootSourceVariants = sourceTree.variants.flatMap(v => v.name :: v.aliases).map(_.toUpperCase).toSet - matchedExistingNode = existingTreeOpt.flatMap { tree => - tree.findMatchByVariants(rootSourceVariants).orElse { - // Fallback: match root by name if no variants - if (rootSourceVariants.isEmpty) tree.findByName(sourceTree.name) else None - } - } - _ = logger.info(s"Source root '${sourceTree.name}' (variants: ${rootSourceVariants.take(5).mkString(",")}) matched to existing: ${matchedExistingNode.map(_.haplogroup.name).getOrElse("none (will create)")}") - - // Phase 3: Perform merge with cumulative variant tracking - result <- mergeWithIndexedTree( - sourceNode = sourceTree, - sourceCumulativeVariants = rootSourceVariants, - existingNode = matchedExistingNode, - existingTree = existingTreeOpt, - parentId = anchorId, - context = context, - variantCache = variantCache, - accumulator = MergeAccumulator.empty, - callbacks = callbacks - ) - _ = logger.info(s"Merge completed: ${result.statistics}") - - // Write ambiguity report if needed and capture the path - ambiguityReportPath = if (result.ambiguities.nonEmpty) { - logger.warn(s"AMBIGUITIES DETECTED: ${result.ambiguities.size} placement(s) require curator review") - val path = provenanceService.writeAmbiguityReport(result.ambiguities, result.statistics, sourceName, haplogroupType, now) - path match { - case Some(p) => logger.info(s"Ambiguity report written to: $p") - case None => logger.warn("Failed to write ambiguity report") - } - path - } else None - } yield TreeMergeResponse( - success = result.errors.isEmpty, - message = if (result.errors.isEmpty) { - if (result.ambiguities.nonEmpty) - s"Merge completed with ${result.ambiguities.size} ambiguous placement(s) requiring review" - else - "Merge completed successfully" - } else "Merge completed with errors", - statistics = result.statistics, - conflicts = result.conflicts, - splits = result.splits, - ambiguities = result.ambiguities, - errors = result.errors, - ambiguityReportPath = ambiguityReportPath - ) - } - - // ============================================================================ - // Recursive Merge Implementation - // ============================================================================ - - /** - * Core recursive merge function implementing the Identify-Match-Graft algorithm. - * - * == Formal Role: Recursive Descent with Parallel Tree Traversal == - * - * This method implements Phase 2 (BFS Alignment) and Phase 3 (Conflict Resolution) - * of the formal algorithm. It processes source tree T₁ recursively while maintaining - * alignment with existing tree T₀ via the existingNode parameter. - * - * == Traverser State == - * - * The method maintains two conceptual traversers (D0/D1 pointers): - * - D₁ = sourceNode + sourceCumulativeVariants (position in incoming tree T₁) - * - D₀ = existingNode (position in baseline tree T₀, if aligned) - * - * == Case Dispatch == - * - * Based on MergeCase classification: - * - FULL_MATCH / DESCENDANT: Update existing node, continue to children - * - NO_EXISTING_MATCH: Create new node, may trigger grafting - * - SOURCE_IS_ANCESTOR: Node contraction (handled via grafting in createNodeWithIndexedLookup) - * - * @param sourceNode Current node in T₁ being processed - * @param sourceCumulativeVariants C(T₁) - cumulative variants from source root to this node - * @param existingNode D₀ - matched node in T₀ (if any) - * @param existingTree Full T₀ tree for global lookups - * @param parentId Database ID of parent in T₀ (for new node creation) - * @param context Merge configuration (source name, priorities, conflict strategy) - * @param variantCache Pre-loaded variant name → ID mapping - * @param accumulator Statistics and results collector - * @param callbacks Optional callbacks for change set tracking - */ - private def mergeWithIndexedTree( - sourceNode: PhyloNodeInput, - sourceCumulativeVariants: Set[String], - existingNode: Option[ExistingTreeNode], - existingTree: Option[ExistingTree], - parentId: Option[Int], - context: MergeContext, - variantCache: VariantCache, - accumulator: MergeAccumulator, - callbacks: Option[ChangeSetCallbacks] - ): Future[MergeAccumulator] = { - val processed = accumulator.statistics.nodesProcessed - val sourceNodeVariants = sourceNode.variants.flatMap(v => v.name :: v.aliases).map(_.toUpperCase).toSet - - // Classify merge case using formal algorithm - val mergeCase = MergeCase.classify(sourceNodeVariants, existingNode) - - if (processed == 0) { - logger.info(s"mergeWithIndexedTree called for root: ${sourceNode.name}, cumulative variants: ${sourceCumulativeVariants.size}, case=${mergeCase.description}") - } - - // Dispatch based on case classification, detecting ambiguities - mergeCase match { - case MergeCase.FullMatch(_, _) | MergeCase.SourceIsDescendant(_, _) => - // Case A/C: Source matches or extends existing - update and continue - updateExistingNode(sourceNode, sourceCumulativeVariants, existingNode.get, existingTree, parentId, context, variantCache, accumulator, callbacks) - - case MergeCase.SourceIsAncestor(_, _, existing) => - // Case B: Source is ancestor - this typically triggers node contraction - // The existing node will be grafted under the newly created source node - // This is handled by createNodeWithIndexedLookup's findPhylogeneticMatch - updateExistingNode(sourceNode, sourceCumulativeVariants, existing, existingTree, parentId, context, variantCache, accumulator, callbacks) - - case MergeCase.DisjointBranch(srcVariants, sharedVariants) => - // Case D: Disjoint branch - existing node matched but variants differ - // This is a PARTIAL MATCH that may indicate data quality issues - existingNode match { - case Some(existing) => - // Record ambiguity for curator review - val conflictingVariants = (srcVariants -- sharedVariants) ++ (existing.nodeVariants -- sharedVariants) - val confidence = if (sharedVariants.nonEmpty) sharedVariants.size.toDouble / math.max(srcVariants.size, existing.nodeVariants.size) else 0.0 - - val ambiguity = PlacementAmbiguity( - nodeName = sourceNode.name, - ambiguityType = PlacementAmbiguity.PARTIAL_MATCH, - description = s"Partial variant overlap with ${existing.haplogroup.name}: " + - s"${sharedVariants.size} shared, ${conflictingVariants.size} conflicting. " + - s"May indicate sequencing errors, recurrent mutations, or nomenclature mismatch.", - sharedVariants = sharedVariants.toList, - conflictingVariants = conflictingVariants.toList, - candidateMatches = List(existing.haplogroup.name), - resolution = s"Proceeded with match to ${existing.haplogroup.name} (best available)", - confidence = confidence - ) - - if (conflictingVariants.nonEmpty) { - logger.warn(s"AMBIGUITY: Partial match for ${sourceNode.name} -> ${existing.haplogroup.name} " + - s"(${sharedVariants.size} shared, ${conflictingVariants.size} conflicting, confidence=${f"$confidence%.2f"})") - } - - val accWithAmbiguity = accumulator.copy(ambiguities = ambiguity :: accumulator.ambiguities) - updateExistingNode(sourceNode, sourceCumulativeVariants, existing, existingTree, parentId, context, variantCache, accWithAmbiguity, callbacks) - - case None => - createNodeWithIndexedLookup(sourceNode, sourceCumulativeVariants, existingTree, parentId, context, variantCache, accumulator, callbacks) - } - - case MergeCase.NoExistingMatch(_) => - // No match in T₀ - create new node - // This may trigger Phase 4 (Grafting) if descendants exist - createNodeWithIndexedLookup(sourceNode, sourceCumulativeVariants, existingTree, parentId, context, variantCache, accumulator, callbacks) - } - } - - /** - * Update an existing node and process its children. - * - * == Formal Role: Case A/C Handler - Full Match or Source Is Descendant == - * - * This method handles the cases where source node T₁ matches or extends - * existing node T₀: - * - * - Case A (FULL_MATCH): U(T₁) = U(T₀) - merge metadata, continue - * - Case C (SOURCE_IS_DESCENDANT): U(T₁) ⊃ U(T₀) - update and extend - * - * For children, we implement parallel tree traversal: - * 1. First check direct children of T₀ for matches (no reparenting needed) - * 2. Then search within depth for granularity mismatches (may trigger grafting) - * 3. If no match, create new node (may trigger Node Contraction) - * - * == Adjacency List Update == - * - * When grafting occurs (existing node moved to new position), this performs - * an Adjacency List update - changing the parent_id in the database. - */ - private def updateExistingNode( - sourceNode: PhyloNodeInput, - sourceCumulativeVariants: Set[String], - existingNode: ExistingTreeNode, - existingTree: Option[ExistingTree], - parentId: Option[Int], - context: MergeContext, - variantCache: VariantCache, - accumulator: MergeAccumulator, - callbacks: Option[ChangeSetCallbacks] - ): Future[MergeAccumulator] = { - val existing = existingNode.haplogroup - val processed = accumulator.statistics.nodesProcessed - val nodeVariantCount = existingNode.nodeVariants.size - val sourceVariantCount = sourceNode.variants.size - - // Log large bottlenecks (30+ SNPs) - more informative than stride logging - if (nodeVariantCount >= 30 || sourceVariantCount >= 30) { - logger.info(s"BOTTLENECK MATCH: ${sourceNode.name} -> ${existing.name} (source: $sourceVariantCount SNPs, existing: $nodeVariantCount SNPs)") - } else if (processed % 500 == 0) { - logger.info(s"Processing node $processed: ${existing.name}") - } - - val variantIds = sourceNode.variants.flatMap { vi => - allVariantNames(vi).flatMap(name => variantCache.nameToVariantId.get(name.toUpperCase)) - }.distinct - - for { - existingHaplogroupVariantIds <- haplogroupVariantRepository.getHaplogroupVariantIds(existing.id.get) - - // Add variants - routes to WIP table in staging mode, production otherwise - newlyAssociatedIds <- stagingHelper.addVariantsStaged(existing.id.get, variantIds, context) - - addedVariantIds = newlyAssociatedIds.diff(existingHaplogroupVariantIds) - - // Update provenance (only in non-staging mode - we don't modify production nodes in staging) - _ <- if (!context.stagingMode) provenanceService.updateProvenance(existing, sourceNode.variants, context) - else Future.successful(()) - - updatedStats = accumulator.statistics.copy( - nodesProcessed = accumulator.statistics.nodesProcessed + 1, - nodesUnchanged = accumulator.statistics.nodesUnchanged + 1, - variantsAdded = accumulator.statistics.variantsAdded + addedVariantIds.size - ) - - // Process children with grafting/repositioning approach - // When we find a match deeper in the tree, we REPARENT it to be under current node - childrenResult <- sourceNode.children.foldLeft(Future.successful(accumulator.copy(statistics = updatedStats))) { (accFuture, child) => - accFuture.flatMap { acc => - val childNodeVariants = child.variants.flatMap(v => v.name :: v.aliases).map(_.toUpperCase).toSet - val childCumulativeVariants = sourceCumulativeVariants ++ childNodeVariants - - // First check direct children (no reparenting needed) - val directMatch = existingNode.children.find { c => - c.haplogroup.name.equalsIgnoreCase(child.name) || - (childNodeVariants.nonEmpty && c.nodeVariants.intersect(childNodeVariants).nonEmpty) - } - - directMatch match { - case Some(matched) => - // Direct child match - no reparenting needed - if (childNodeVariants.size >= 30 || matched.nodeVariants.size >= 30) { - logger.info(s"BOTTLENECK DIRECT: ${child.name} matched to ${matched.haplogroup.name}") - } - mergeWithIndexedTree(child, childCumulativeVariants, Some(matched), existingTree, existing.id, context, variantCache, acc, callbacks) - - case None => - // Check deeper - if found, will need reparenting (depth-limited search for granularity mismatch) - val deepMatch = existingNode.findMatchWithinDepth(child.name, childNodeVariants) - - deepMatch match { - case Some(matched) => - // ============================================================ - // DEPTH GRAFT: Found node deeper in tree - reparent to current position - // ============================================================ - // This handles granularity mismatches where T₀ has finer resolution - // than T₁ at this point, but T₁ matches a deeper node. - logger.info(s"DEPTH_GRAFT: Repositioning ${matched.haplogroup.name} from depth to be under ${existing.name}") - - // Record as split operation - val splitOp = SplitOperation( - parentName = existing.name, - newIntermediateName = child.name, - variantsRedistributed = childNodeVariants.toList, - childrenReassigned = List(matched.haplogroup.name), - source = context.sourceName - ) - - for { - // Perform Adjacency List update - routes to WIP table in staging mode - _ <- stagingHelper.reparentStaged(matched.haplogroup.id.get, None, existing.id.get, context) - - // Record REPARENT change via callback (only in non-staging mode) - _ = if (!context.stagingMode) { - callbacks.foreach(_.recordReparent(matched.haplogroup.id.get, None, existing.id.get)) - } - - reparentedStats = acc.statistics.copy( - relationshipsUpdated = acc.statistics.relationshipsUpdated + 1, - splitOperations = acc.statistics.splitOperations + 1 - ) - updatedAcc = acc.copy( - statistics = reparentedStats, - splits = splitOp :: acc.splits - ) - result <- mergeWithIndexedTree(child, childCumulativeVariants, Some(matched), existingTree, existing.id, context, variantCache, updatedAcc, callbacks) - } yield result - - case None => - // No match anywhere - create new node (Case D: DISJOINT_BRANCH) - if (childNodeVariants.size >= 30) { - logger.info(s"DISJOINT_BRANCH: Creating ${child.name} (${childNodeVariants.size} SNPs) under ${existing.name}") - } - mergeWithIndexedTree(child, childCumulativeVariants, None, existingTree, existing.id, context, variantCache, acc, callbacks) - } - } - } - } - } yield childrenResult - } - - /** - * Create a new node and process its children. - * - * == Formal Role: Phase 4 - Node Contraction and Grafting == - * - * This method handles Case D (NO_EXISTING_MATCH) and implements the critical - * Node Contraction operation when T₁ provides finer resolution than T₀. - * - * == Node Contraction (Injecting Median Nodes) == - * - * When T₀ has path A→C but T₁ has A→B→C: - * 1. B is created as a new node under A (this method) - * 2. findPhylogeneticMatch finds C as a descendant candidate - * 3. C is reparented under B (Adjacency List update) - * 4. A SplitOperation is recorded for audit - * - * This "contracts" the virtual edge A→C by injecting B as an intermediate node. - * - * == Phylogenetic Compatibility Check == - * - * Before grafting, we validate the Set Inclusion Property: - * C(ancestral) ⊆ C(candidate) - * - * This prevents cross-branch mismatches from recurrent SNPs. - * - * == SplitOperation Recording == - * - * When grafting occurs, we record: - * - parentName: The parent of the newly created intermediate node - * - newIntermediateName: The node being injected (source node) - * - childrenReassigned: Nodes reparented under the new intermediate - * - variantsRedistributed: Variants now associated with the new split - */ - private def createNodeWithIndexedLookup( - sourceNode: PhyloNodeInput, - sourceCumulativeVariants: Set[String], - existingTree: Option[ExistingTree], - parentId: Option[Int], - context: MergeContext, - variantCache: VariantCache, - accumulator: MergeAccumulator, - callbacks: Option[ChangeSetCallbacks] - ): Future[MergeAccumulator] = { - // SAFETY CHECK: Before creating, verify no node with this name already exists - // This handles cases where variant matching failed but names match (e.g., nomenclature differences) - val existingByName = existingTree.flatMap(_.findByName(sourceNode.name)) - if (existingByName.isDefined) { - val existing = existingByName.get - val sourceNodeVariants = sourceNode.variants.flatMap(v => v.name :: v.aliases).map(_.toUpperCase).toSet - - // Record NAME_VARIANT_MISMATCH ambiguity - only for SIGNIFICANT mismatches - // Minor variant differences are common and expected in phylogenetics due to: - // - Different sources using different naming conventions - // - Intermediate nodes often having few/no variants defined - // - Terminal nodes accumulating more specific variants - // - // We only flag when there's a SIGNIFICANT concern: - // 1. Both have variants defined AND there's NO overlap (complete mismatch) - // 2. The confidence drops below 0.2 (less than 20% overlap when both have variants) - val variantOverlap = sourceNodeVariants.intersect(existing.nodeVariants) - val variantDifference = (sourceNodeVariants -- existing.nodeVariants) ++ (existing.nodeVariants -- sourceNodeVariants) - - val ambiguity = if (sourceNodeVariants.nonEmpty && existing.nodeVariants.nonEmpty) { - val confidence = variantOverlap.size.toDouble / math.max(sourceNodeVariants.size, existing.nodeVariants.size) - - // Only flag if there's NO overlap OR very low confidence (< 20%) - // This filters out normal nomenclature differences while catching true data quality issues - if (variantOverlap.isEmpty || confidence < 0.2) { - Some(PlacementAmbiguity( - nodeName = sourceNode.name, - ambiguityType = PlacementAmbiguity.NAME_VARIANT_MISMATCH, - description = s"Name '${sourceNode.name}' matches existing node '${existing.haplogroup.name}' " + - s"but variants have ${if (variantOverlap.isEmpty) "NO" else "minimal"} overlap. " + - s"Source: ${sourceNodeVariants.size} variants, existing: ${existing.nodeVariants.size}. " + - s"May indicate nomenclature collision or data error.", - sharedVariants = variantOverlap.toList, - conflictingVariants = variantDifference.toList, - candidateMatches = List(existing.haplogroup.name), - resolution = s"Using existing node ${existing.haplogroup.name} (matched by name)", - confidence = confidence - )) - } else None - } else None // Don't flag when either side has no variants - that's expected - - val accWithAmbiguity = ambiguity match { - case Some(amb) => - logger.warn(s"AMBIGUITY: Name-variant mismatch for ${sourceNode.name} -> ${existing.haplogroup.name} " + - s"(${variantOverlap.size} shared, ${variantDifference.size} differ)") - accumulator.copy(ambiguities = amb :: accumulator.ambiguities) - case None => - logger.info(s"Node ${sourceNode.name} already exists (found by name), updating instead of creating") - accumulator - } - - return updateExistingNode(sourceNode, sourceCumulativeVariants, existing, existingTree, parentId, context, variantCache, accWithAmbiguity, callbacks) - } - - // ======================================================================== - // GLOBAL VARIANT MATCH: Look-ahead to prevent duplicate tree creation - // ======================================================================== - // - // Before creating a new node, check if ANY existing node matches by variants, - // regardless of tree position. This prevents the "premature branch creation" - // bug where intermediate ISOGG nodes (R1b1, R1b1a, etc.) create a parallel - // tree structure because they don't match existing nodes, causing their - // descendants (like R1b-L21) to never find their true matches (R1b1a1b1a1a2c1). - // - // The phylogenetic check is intentionally RELAXED here because: - // 1. The source tree may have different intermediate structure than existing tree - // 2. We want to find the REAL match even if tree topology differs - // 3. If we find a match, we merge there and let the parent-child relationship - // be established by the source tree structure (which may be more detailed) - // - // RECURRENT SNP GUARD: - // To prevent cross-lineage false matches (e.g., R1b-L21 matching I-L21), - // we require SOME cumulative variant overlap. The source's cumulative variants - // include its ancestry (R1b's M343, etc.), so a valid match in the same lineage - // will share those ancestral markers. A different lineage (I) won't. - val sourceNodeVariants = sourceNode.variants.flatMap(v => v.name :: v.aliases).map(_.toUpperCase).toSet - val globalVariantMatch = if (sourceNodeVariants.nonEmpty) { - existingTree.flatMap(_.findGlobalVariantMatch(sourceNodeVariants, sourceCumulativeVariants)) - } else None - - globalVariantMatch match { - case Some(matchedNode) => - val overlap = matchedNode.nodeVariants.intersect(sourceNodeVariants).size - logger.info(s"GLOBAL_VARIANT_MATCH: ${sourceNode.name} matches existing ${matchedNode.haplogroup.name} " + - s"by $overlap shared node variants (with lineage verification)") - return updateExistingNode(sourceNode, sourceCumulativeVariants, matchedNode, existingTree, parentId, context, variantCache, accumulator, callbacks) - case None => - // No global match - proceed with creating new node - } - - val processed = accumulator.statistics.nodesProcessed - if (processed % 100 == 0) { - logger.info(s"Processing node $processed: ${sourceNode.name} (new, ${sourceCumulativeVariants.size} cumulative variants)") - } - - val variantNames = primaryVariantNames(sourceNode.variants) - val provenance = HaplogroupProvenance.forNewNode(context.sourceName, variantNames) - - val newHaplogroup = Haplogroup( - id = None, - name = sourceNode.name, - lineage = None, - description = None, - haplogroupType = context.haplogroupType, - revisionId = 1, - source = context.sourceName, - confidenceLevel = "medium", - validFrom = context.timestamp, - validUntil = None, - formedYbp = sourceNode.formedYbp, - formedYbpLower = sourceNode.formedYbpLower, - formedYbpUpper = sourceNode.formedYbpUpper, - tmrcaYbp = sourceNode.tmrcaYbp, - tmrcaYbpLower = sourceNode.tmrcaYbpLower, - tmrcaYbpUpper = sourceNode.tmrcaYbpUpper, - ageEstimateSource = Some(context.sourceName), - provenance = Some(provenance) - ) - - val variantIds = sourceNode.variants.flatMap { vi => - allVariantNames(vi).flatMap(name => variantCache.nameToVariantId.get(name.toUpperCase)) - }.distinct - - for { - // Create haplogroup - routes to WIP table in staging mode - (newId, _) <- stagingHelper.createHaplogroupStaged(newHaplogroup, parentId, context) - - // Record CREATE change via callback (only in non-staging mode) - _ = if (!context.stagingMode) { - callbacks.foreach { cb => - val haplogroupJson = Json.obj( - "name" -> sourceNode.name, - "haplogroupType" -> context.haplogroupType.toString, - "source" -> context.sourceName, - "variants" -> sourceNode.variants.map(_.name) - ).toString() - cb.recordCreate(haplogroupJson, parentId) - } - } - - // Add variants - routes to WIP table in staging mode - haplogroupVariantIds <- stagingHelper.addVariantsStaged(newId, variantIds, context) - - // ======================================================================== - // VARIANT DOWNFLOW: Move variants from parent to new intermediate - // ======================================================================== - // - // When a higher-resolution source (ISOGG) provides finer tree structure, - // we may create intermediate nodes (e.g., A00-T between Y and its children). - // The existing parent (Y) may have variants that actually belong to the - // new intermediate (A00-T). - // - // Example: - // - DecodingUs: Y has variants V60, V168 (human-Neanderthal split markers) - // - ISOGG: Y → A00-T (defines V60, V168) → ... - // - After creating A00-T, we need to MOVE V60, V168 from Y to A00-T - // - // This "downflow" ensures variants are associated with their most specific - // defining haplogroup, not ancestors that happened to have them before - // finer structure was added. - // - // NOTE: In staging mode, we skip variant downflow because we can't modify - // production variant associations. This will be handled during Apply phase. - variantsRedistributed <- if (!context.stagingMode && parentId.isDefined && variantIds.nonEmpty && !stagingHelper.isPlaceholder(parentId.get)) { - for { - parentVariantIds <- haplogroupVariantRepository.getVariantIdsForHaplogroup(parentId.get) - variantIdsSet = variantIds.toSet - overlappingVariants = parentVariantIds.filter(variantIdsSet.contains) - removed <- if (overlappingVariants.nonEmpty) { - logger.info(s"VARIANT_DOWNFLOW: Moving ${overlappingVariants.size} variants from parent(id=${parentId.get}) to new node ${sourceNode.name}(id=$newId)") - haplogroupVariantRepository.bulkRemoveVariantsFromHaplogroup(parentId.get, overlappingVariants) - } else Future.successful(0) - } yield removed - } else Future.successful(0) - - // ======================================================================== - // SUBTREE LOOK-AHEAD: Reparent existing siblings that belong in this subtree - // ======================================================================== - // - // When creating a new intermediate node (e.g., A0000 under Y), we need to - // check if any of the parent's existing children should be moved under - // this new intermediate. This happens when ISOGG provides finer structure - // than the existing tree. - // - // Strategy: Collect ALL variants from the source subtree (this node and all - // descendants). If an existing sibling's nodeVariants overlap with any - // variant in the subtree, that sibling belongs somewhere in this subtree - // and should be reparented under this new node. - // - // Example: - // - Existing: Y → A0 - // - ISOGG: Y → A0000 → A000-T → A000 → ... → A0 - // - A0's variants match something in the A0000 subtree - // - So A0 should be reparented under A0000 - // - Later, when processing A000-T, A000, etc., A0 may move further down - // NOTE: In staging mode with placeholder parent, we can't look up existing siblings from - // the in-memory tree because the parent is a newly created WIP node. The reparenting will - // be handled during the Apply phase when placeholders are resolved to real IDs. - subtreeLookAheadReparents <- parentId.flatMap(pid => if (stagingHelper.isPlaceholder(pid)) None else existingTree.flatMap(_.findById(pid))) match { - case Some(parentNode) => - val subtreeVariants = collectAllVariantNames(sourceNode).map(_.toUpperCase).toSet - val siblingsToReparent = parentNode.children.filter { sibling => - // Don't reparent the node we just matched/created - sibling.haplogroup.id != Some(newId) && - // Check if sibling's nodeVariants overlap with ANY variant in the source subtree - sibling.nodeVariants.intersect(subtreeVariants).nonEmpty - } - - if (siblingsToReparent.nonEmpty) { - val siblingNames = siblingsToReparent.map(_.haplogroup.name) - logger.info(s"SUBTREE_LOOK_AHEAD: Reparenting ${siblingNames.mkString(", ")} under ${sourceNode.name} (subtree has ${subtreeVariants.size} variants)") - - Future.sequence(siblingsToReparent.map { sibling => - for { - // Use staged reparent - routes to WIP table in staging mode - _ <- stagingHelper.reparentStaged(sibling.haplogroup.id.get, parentId, newId, context) - // Record REPARENT change via callback (only in non-staging mode) - _ = if (!context.stagingMode) { - callbacks.foreach(_.recordReparent(sibling.haplogroup.id.get, parentId, newId)) - } - } yield sibling.haplogroup.name - }).map(_.size) - } else { - Future.successful(0) - } - case None => - Future.successful(0) - } - - updatedStats = accumulator.statistics.copy( - nodesProcessed = accumulator.statistics.nodesProcessed + 1, - nodesCreated = accumulator.statistics.nodesCreated + 1, - variantsAdded = accumulator.statistics.variantsAdded + haplogroupVariantIds.size, - relationshipsCreated = if (parentId.isDefined) accumulator.statistics.relationshipsCreated + 1 else accumulator.statistics.relationshipsCreated, - relationshipsUpdated = accumulator.statistics.relationshipsUpdated + subtreeLookAheadReparents - ) - - // ======================================================================== - // Phase 4: Process Children with Node Contraction / Grafting - // ======================================================================== - // - // For each child in T₁, we use phylogenetically-aware search to find - // existing nodes in T₀ that should be reparented under the newly created node. - // - // KEY: Since we just CREATED this node (sourceNode), existing nodes won't have its - // variants in their cumulative. We need to check against the ANCESTRAL lineage - // (the path up to but NOT including this new node). - // - // This implements the Set Inclusion Property check: - // C(ancestral) ⊆ C(candidate) - currentNodeVariants = sourceNode.variants.flatMap(v => v.name :: v.aliases).map(_.toUpperCase).toSet - ancestralCumulativeVariants = sourceCumulativeVariants -- currentNodeVariants - - childrenResult <- sourceNode.children.foldLeft(Future.successful(accumulator.copy(statistics = updatedStats))) { (accFuture, child) => - accFuture.flatMap { acc => - val childNodeVariants = child.variants.flatMap(v => v.name :: v.aliases).map(_.toUpperCase).toSet - val childCumulativeVariants = sourceCumulativeVariants ++ childNodeVariants - - // ============================================================ - // STAR CLUSTER SIBLING SWEEP - // ============================================================ - // Find ALL phylogenetically compatible siblings, not just the best match. - // This handles star clusters where T₀ has multiple siblings that T₁ groups - // under a new intermediate node. - // - // Example: T₀ has A → {B1, B2, B3, C1} - // T₁ has A → B → {B1, B2, B3} - // We need to reparent ALL of B1, B2, B3 under B (not just B1) - val allMatches = existingTree.map(_.findAllPhylogeneticMatches(ancestralCumulativeVariants, childNodeVariants)).getOrElse(Seq.empty) - - if (allMatches.nonEmpty) { - // ============================================================ - // NODE CONTRACTION with SIBLING SWEEP - // ============================================================ - // Sort by overlap to get the "primary" match for recursive merge - val sortedMatches = allMatches.map { node => - val overlap = node.nodeVariants.intersect(childNodeVariants).size - (node, overlap) - }.sortBy(-_._2) - - val (primaryMatch, primaryOverlap) = sortedMatches.head - val siblingMatches = sortedMatches.tail.map(_._1) - - val allMatchNames = allMatches.map(_.haplogroup.name) - - // Record MULTIPLE_CANDIDATES ambiguity when star cluster has competing matches - // This alerts curators that the algorithm chose one primary match among several options - val starClusterAmbiguity = if (siblingMatches.nonEmpty) { - logger.info(s"STAR_CLUSTER_SWEEP: Reparenting ${allMatchNames.mkString(", ")} under ${sourceNode.name} (primary: ${primaryMatch.haplogroup.name}, ${primaryOverlap} overlapping variants)") - - // Calculate confidence: higher when primary has significantly more overlap than others - val secondBestOverlap = sortedMatches.lift(1).map(_._2).getOrElse(0) - val confidence = if (primaryOverlap > 0) { - val overlapDiff = (primaryOverlap - secondBestOverlap).toDouble / primaryOverlap - 0.5 + (overlapDiff * 0.5) // Range: 0.5 (equal) to 1.0 (dominant primary) - } else 0.3 - - Some(PlacementAmbiguity( - nodeName = child.name, - ambiguityType = PlacementAmbiguity.MULTIPLE_CANDIDATES, - description = s"Star cluster resolution: ${allMatches.size} existing nodes match child '${child.name}'. " + - s"Primary match '${primaryMatch.haplogroup.name}' has $primaryOverlap overlapping variants. " + - s"All ${allMatchNames.mkString(", ")} will be reparented under '${sourceNode.name}'.", - sharedVariants = primaryMatch.nodeVariants.intersect(childNodeVariants).toList, - conflictingVariants = List.empty, // No conflict - just multiple valid options - candidateMatches = allMatchNames.toList, - resolution = s"Chose ${primaryMatch.haplogroup.name} as primary (highest overlap), reparented all siblings", - confidence = confidence - )) - } else { - logger.info(s"NODE_CONTRACTION: Reparenting ${primaryMatch.haplogroup.name} under ${sourceNode.name} (${primaryOverlap} overlapping variants)") - None - } - - // Record the split operation for audit trail (includes all siblings) - val splitOp = SplitOperation( - parentName = sourceNode.name, - newIntermediateName = sourceNode.name, - variantsRedistributed = currentNodeVariants.toList, - childrenReassigned = allMatchNames.toList, - source = context.sourceName - ) - - for { - // Perform Adjacency List updates for ALL matching siblings - routes to WIP in staging mode - _ <- Future.sequence(allMatches.map { node => - stagingHelper.reparentStaged(node.haplogroup.id.get, None, newId, context) - }) - - // Record REPARENT changes via callbacks (only in non-staging mode) - _ = if (!context.stagingMode) { - callbacks.foreach { cb => - allMatches.foreach { node => - cb.recordReparent(node.haplogroup.id.get, None, newId) - } - } - } - - reparentedStats = acc.statistics.copy( - relationshipsUpdated = acc.statistics.relationshipsUpdated + allMatches.size, - splitOperations = acc.statistics.splitOperations + 1 - ) - // Add ambiguity if present - accWithAmbiguity = starClusterAmbiguity match { - case Some(amb) => acc.copy(ambiguities = amb :: acc.ambiguities) - case None => acc - } - updatedAcc = accWithAmbiguity.copy( - statistics = reparentedStats, - splits = splitOp :: accWithAmbiguity.splits - ) - // Continue recursive merge with the primary (best) match only - // Siblings are reparented but not recursively merged (they keep their subtrees) - result <- mergeWithIndexedTree(child, childCumulativeVariants, Some(primaryMatch), existingTree, Some(newId), context, variantCache, updatedAcc, callbacks) - } yield result - - } else { - // No phylogenetically compatible node found - create new branch (Case D: DISJOINT_BRANCH) - mergeWithIndexedTree(child, childCumulativeVariants, None, existingTree, Some(newId), context, variantCache, acc, callbacks) - } - } - } - } yield childrenResult - } -} diff --git a/app/services/tree/TreeMergePreviewService.scala b/app/services/tree/TreeMergePreviewService.scala deleted file mode 100644 index 519356f7..00000000 --- a/app/services/tree/TreeMergePreviewService.scala +++ /dev/null @@ -1,127 +0,0 @@ -package services.tree - -import jakarta.inject.{Inject, Singleton} -import models.api.haplogroups.* -import models.domain.haplogroups.VariantIndex -import play.api.Logging - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service responsible for preview/simulation of tree merge operations. - * - * Provides dry-run analysis that: - * - Identifies which nodes would be created, updated, or unchanged - * - Detects conflicts (e.g., conflicting age estimates) - * - Collects statistics without modifying the database - */ -@Singleton -class TreeMergePreviewService @Inject()( - variantMatchingService: VariantMatchingService, - provenanceService: TreeMergeProvenanceService -)(implicit ec: ExecutionContext) extends Logging { - - /** - * Simulate merge without applying changes. - * - * @param sourceTree The incoming tree to merge - * @param sourceName Name of the source (e.g., "ISOGG", "ytree.net") - * @param existingIndex Variant index of existing haplogroups - * @param priorityConfig Source priority configuration - * @return Preview response with statistics and identified changes - */ - def simulateMerge( - sourceTree: PhyloNodeInput, - sourceName: String, - existingIndex: VariantIndex, - priorityConfig: SourcePriorityConfig - ): Future[MergePreviewResponse] = { - // Recursively analyze the tree - val (stats, conflicts, splits, ambiguities, newNodes, updatedNodes, unchangedNodes) = - analyzeTree(sourceTree, existingIndex, sourceName, priorityConfig) - - Future.successful(MergePreviewResponse( - statistics = stats, - conflicts = conflicts, - splits = splits, - ambiguities = ambiguities, - newNodes = newNodes, - updatedNodes = updatedNodes, - unchangedNodes = unchangedNodes - )) - } - - /** - * Analyze tree structure for preview without making changes. - * - * Recursively traverses the source tree, matching against the existing index - * and collecting statistics about what would happen during a real merge. - * - * @param node Current source node being analyzed - * @param index Variant index for matching - * @param sourceName Name of the source - * @param priorityConfig Priority configuration for conflict resolution - * @return Tuple of (statistics, conflicts, splits, ambiguities, newNodes, updatedNodes, unchangedNodes) - */ - private def analyzeTree( - node: PhyloNodeInput, - index: VariantIndex, - sourceName: String, - priorityConfig: SourcePriorityConfig - ): (MergeStatistics, List[MergeConflict], List[SplitOperation], List[PlacementAmbiguity], List[String], List[String], List[String]) = { - - val existingMatch = variantMatchingService.findExistingMatch(node, index) - val conflicts = scala.collection.mutable.ListBuffer.empty[MergeConflict] - val splits = scala.collection.mutable.ListBuffer.empty[SplitOperation] - val ambiguities = scala.collection.mutable.ListBuffer.empty[PlacementAmbiguity] - val newNodes = scala.collection.mutable.ListBuffer.empty[String] - val updatedNodes = scala.collection.mutable.ListBuffer.empty[String] - val unchangedNodes = scala.collection.mutable.ListBuffer.empty[String] - - var stats = existingMatch match { - case Some(existing) => - val existingSource = existing.provenance.map(_.primaryCredit).getOrElse(existing.source) - val shouldUpdate = provenanceService.getPriority(sourceName, priorityConfig) < provenanceService.getPriority(existingSource, priorityConfig) - - // Check for conflicts - if (node.formedYbp.isDefined && existing.formedYbp.isDefined && node.formedYbp != existing.formedYbp) { - conflicts += MergeConflict( - haplogroupName = existing.name, - field = "formedYbp", - existingValue = existing.formedYbp.get.toString, - newValue = node.formedYbp.get.toString, - resolution = if (shouldUpdate) "will_update" else "will_keep_existing", - existingSource = existingSource, - newSource = sourceName - ) - } - - if (shouldUpdate && conflicts.nonEmpty) { - updatedNodes += existing.name - MergeStatistics(1, 0, 1, 0, 0, 0, 0, 0, 0) - } else { - unchangedNodes += existing.name - MergeStatistics(1, 0, 0, 1, 0, 0, 0, 0, 0) - } - - case None => - newNodes += node.name - MergeStatistics(1, 1, 0, 0, node.variants.size, 0, 1, 0, 0) - } - - // Process children - node.children.foreach { child => - val (childStats, childConflicts, childSplits, childAmbiguities, childNew, childUpdated, childUnchanged) = - analyzeTree(child, index, sourceName, priorityConfig) - stats = MergeStatistics.combine(stats, childStats) - conflicts ++= childConflicts - splits ++= childSplits - ambiguities ++= childAmbiguities - newNodes ++= childNew - updatedNodes ++= childUpdated - unchangedNodes ++= childUnchanged - } - - (stats, conflicts.toList, splits.toList, ambiguities.toList, newNodes.toList, updatedNodes.toList, unchangedNodes.toList) - } -} diff --git a/app/services/tree/TreeMergeProvenanceService.scala b/app/services/tree/TreeMergeProvenanceService.scala deleted file mode 100644 index d551afd4..00000000 --- a/app/services/tree/TreeMergeProvenanceService.scala +++ /dev/null @@ -1,270 +0,0 @@ -package services.tree - -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.api.haplogroups.* -import models.domain.haplogroups.{Haplogroup, HaplogroupProvenance, MergeContext} -import play.api.Logging -import repositories.HaplogroupCoreRepository - -import java.io.{File, PrintWriter} -import java.time.LocalDateTime -import java.time.format.DateTimeFormatter -import scala.concurrent.{ExecutionContext, Future} -import scala.util.{Try, Using} - -/** - * Service responsible for provenance tracking, age estimate management, - * and ambiguity report generation during tree merge operations. - * - * Handles: - * - Source attribution and credit assignment (priority-based) - * - Age estimate merging from multiple sources - * - Markdown report generation for merge ambiguities - */ -@Singleton -class TreeMergeProvenanceService @Inject()( - haplogroupRepository: HaplogroupCoreRepository -)(implicit ec: ExecutionContext) extends Logging { - - /** - * Update provenance for an existing haplogroup. - * - * Uses priority-based credit assignment: - * - ISOGG and other protected sources are always preserved - * - Higher priority sources (lower index) take precedence - * - * @param existing The existing haplogroup to update - * @param newVariants New variants being added (for variant provenance tracking) - * @param context Merge context with source name and priority config - * @return Future[Boolean] indicating success - */ - def updateProvenance( - existing: Haplogroup, - newVariants: List[VariantInput], - context: MergeContext - ): Future[Boolean] = { - val existingProvenance = existing.provenance.getOrElse( - HaplogroupProvenance(primaryCredit = existing.source, nodeProvenance = Set(existing.source)) - ) - - // Determine primary credit based on priority and preservation rules - val currentCredit = existingProvenance.primaryCredit - val newSource = context.sourceName - - val primaryCredit = if (HaplogroupProvenance.shouldPreserveCredit(currentCredit)) { - currentCredit // Always preserve ISOGG (or other protected sources) - } else { - // Check priority: lower index = higher priority - val currentPriority = getPriority(currentCredit, context.priorityConfig) - val newPriority = getPriority(newSource, context.priorityConfig) - - if (newPriority < currentPriority) { - newSource // Update to higher priority source - } else { - currentCredit // Keep existing - } - } - - // Add new source to node provenance - val updatedNodeProv = existingProvenance.nodeProvenance + context.sourceName - - // Add variant provenance for new variants (primary names only for provenance tracking) - val variantNames = primaryVariantNames(newVariants) - val updatedVariantProv = variantNames.foldLeft(existingProvenance.variantProvenance) { (prov, variant) => - prov.updatedWith(variant) { - case Some(sources) => Some(sources + context.sourceName) - case None => Some(Set(context.sourceName)) - } - } - - val updatedProvenance = HaplogroupProvenance( - primaryCredit = primaryCredit, - nodeProvenance = updatedNodeProv, - variantProvenance = updatedVariantProv, - lastMergedAt = Some(context.timestamp), - lastMergedFrom = Some(context.sourceName) - ) - - haplogroupRepository.updateProvenance(existing.id.get, updatedProvenance) - } - - /** - * Update age estimates for a haplogroup. - * - * Incoming age estimates fill in missing values but don't overwrite existing ones. - * - * @param haplogroupId ID of the haplogroup to update - * @param node Source node with age estimate data - * @param sourceName Name of the source providing the estimates - * @return Future[Boolean] indicating success - */ - def updateAgeEstimates( - haplogroupId: Int, - node: PhyloNodeInput, - sourceName: String - ): Future[Boolean] = { - haplogroupRepository.findById(haplogroupId).flatMap { - case Some(existing) => - val updated = existing.copy( - formedYbp = node.formedYbp.orElse(existing.formedYbp), - formedYbpLower = node.formedYbpLower.orElse(existing.formedYbpLower), - formedYbpUpper = node.formedYbpUpper.orElse(existing.formedYbpUpper), - tmrcaYbp = node.tmrcaYbp.orElse(existing.tmrcaYbp), - tmrcaYbpLower = node.tmrcaYbpLower.orElse(existing.tmrcaYbpLower), - tmrcaYbpUpper = node.tmrcaYbpUpper.orElse(existing.tmrcaYbpUpper), - ageEstimateSource = Some(sourceName) - ) - haplogroupRepository.update(updated) - case None => - Future.successful(false) - } - } - - /** - * Get priority for a source (lower = higher priority). - * - * @param source Source name to look up - * @param config Priority configuration - * @return Priority index (lower is higher priority) - */ - def getPriority(source: String, config: SourcePriorityConfig): Int = { - config.sourcePriorities.indexOf(source) match { - case -1 => config.defaultPriority - case idx => idx - } - } - - /** - * Check if node has any age estimates. - */ - def hasAgeEstimates(node: PhyloNodeInput): Boolean = { - node.formedYbp.isDefined || node.tmrcaYbp.isDefined - } - - /** - * Extract primary variant names from a list of VariantInput. - */ - private def primaryVariantNames(variants: List[VariantInput]): List[String] = - variants.map(_.name) - - /** - * Write a markdown report of merge ambiguities for curator review. - * - * Generates a structured report at logs/merge-reports/ containing: - * - Merge statistics - * - Ambiguity summary by type - * - Detailed entries sorted by confidence (lowest first) - * - * @param ambiguities List of placement ambiguities detected during merge - * @param statistics Merge statistics for context - * @param sourceName Name of the source being merged (e.g., "ISOGG") - * @param haplogroupType Y or MT - * @param timestamp When the merge occurred - * @return Path to the written report, or None if writing failed - */ - def writeAmbiguityReport( - ambiguities: List[PlacementAmbiguity], - statistics: MergeStatistics, - sourceName: String, - haplogroupType: HaplogroupType, - timestamp: LocalDateTime - ): Option[String] = { - if (ambiguities.isEmpty) return None - - Try { - // Create reports directory if it doesn't exist - val reportsDir = new File("logs/merge-reports") - if (!reportsDir.exists()) { - reportsDir.mkdirs() - } - - // Generate filename with timestamp - val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss") - val timestampStr = timestamp.format(formatter) - val sanitizedSource = sourceName.replaceAll("[^a-zA-Z0-9_-]", "_") - val filename = s"ambiguity-report_${haplogroupType}_${sanitizedSource}_$timestampStr.md" - val reportFile = new File(reportsDir, filename) - - // Group ambiguities by type for organized reporting - val byType = ambiguities.groupBy(_.ambiguityType) - - Using(new PrintWriter(reportFile)) { writer => - writer.println(s"# Merge Ambiguity Report") - writer.println() - writer.println(s"**Source:** $sourceName") - writer.println(s"**Haplogroup Type:** $haplogroupType") - writer.println(s"**Timestamp:** $timestamp") - writer.println() - - // Summary statistics - writer.println("## Merge Statistics") - writer.println() - writer.println(s"| Metric | Count |") - writer.println(s"|--------|-------|") - writer.println(s"| Nodes Processed | ${statistics.nodesProcessed} |") - writer.println(s"| Nodes Created | ${statistics.nodesCreated} |") - writer.println(s"| Nodes Updated | ${statistics.nodesUpdated} |") - writer.println(s"| Nodes Unchanged | ${statistics.nodesUnchanged} |") - writer.println(s"| Variants Added | ${statistics.variantsAdded} |") - writer.println(s"| Relationships Created | ${statistics.relationshipsCreated} |") - writer.println(s"| Relationships Updated | ${statistics.relationshipsUpdated} |") - writer.println(s"| Split Operations | ${statistics.splitOperations} |") - writer.println() - - // Ambiguity summary - writer.println("## Ambiguity Summary") - writer.println() - writer.println(s"**Total Ambiguities:** ${ambiguities.size}") - writer.println() - writer.println("| Type | Count |") - writer.println("|------|-------|") - byType.toSeq.sortBy(-_._2.size).foreach { case (ambType, items) => - writer.println(s"| $ambType | ${items.size} |") - } - writer.println() - - // Detailed ambiguities by type - byType.toSeq.sortBy(-_._2.size).foreach { case (ambType, items) => - writer.println(s"## $ambType (${items.size})") - writer.println() - - // Sort by confidence (lowest first - most concerning) - items.sortBy(_.confidence).foreach { amb => - writer.println(s"### ${amb.nodeName}") - writer.println() - writer.println(s"**Confidence:** ${f"${amb.confidence}%.2f"}") - writer.println() - writer.println(s"**Description:** ${amb.description}") - writer.println() - writer.println(s"**Resolution:** ${amb.resolution}") - writer.println() - - if (amb.candidateMatches.nonEmpty) { - writer.println(s"**Candidate Matches:** ${amb.candidateMatches.mkString(", ")}") - writer.println() - } - - if (amb.sharedVariants.nonEmpty) { - writer.println(s"**Shared Variants (${amb.sharedVariants.size}):** ${amb.sharedVariants.take(20).mkString(", ")}${if (amb.sharedVariants.size > 20) " ..." else ""}") - writer.println() - } - - if (amb.conflictingVariants.nonEmpty) { - writer.println(s"**Conflicting Variants (${amb.conflictingVariants.size}):** ${amb.conflictingVariants.take(20).mkString(", ")}${if (amb.conflictingVariants.size > 20) " ..." else ""}") - writer.println() - } - - writer.println("---") - writer.println() - } - } - - writer.println() - writer.println("*Report generated by DecodingUs TreeMergeProvenanceService*") - }.get - - reportFile.getAbsolutePath - }.toOption - } -} diff --git a/app/services/tree/VariantMatchingService.scala b/app/services/tree/VariantMatchingService.scala deleted file mode 100644 index 5f6c483b..00000000 --- a/app/services/tree/VariantMatchingService.scala +++ /dev/null @@ -1,270 +0,0 @@ -package services.tree - -import jakarta.inject.{Inject, Singleton} -import models.HaplogroupType -import models.api.haplogroups.{PhyloNodeInput, VariantInput} -import models.domain.haplogroups.{ExistingTreeNode, Haplogroup, VariantIndex} -import play.api.Logging -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository} - -import scala.concurrent.{ExecutionContext, Future} - -/** - * Service responsible for tree building, indexing, and phylogenetic matching - * during tree merge operations. - * - * Handles: - * - Building ExistingTree structures with cumulative variant normalization - * - Creating variant-to-haplogroup indexes for O(1) lookups - * - Finding existing matches by name or variant overlap - * - Forest handling (multiple roots unified under virtual Super-Adam) - */ -@Singleton -class VariantMatchingService @Inject()( - haplogroupRepository: HaplogroupCoreRepository, - haplogroupVariantRepository: HaplogroupVariantRepository -)(implicit ec: ExecutionContext) extends Logging { - - /** - * Build an index of existing haplogroups by their variant names. - * This enables variant-based matching across different naming conventions. - * - * @param haplogroupType Y or MT haplogroup type - * @return VariantIndex with maps for variant-to-haplogroup and name-to-haplogroup lookups - */ - def buildVariantIndex(haplogroupType: HaplogroupType): Future[VariantIndex] = { - haplogroupRepository.getAllWithVariantNames(haplogroupType).map { haplogroupsWithVariants => - val variantToHaplogroup = haplogroupsWithVariants.flatMap { case (hg, variants) => - variants.map(v => v.toUpperCase -> hg) - }.groupMap(_._1)(_._2) - - val haplogroupByName = haplogroupsWithVariants.map { case (hg, _) => - hg.name.toUpperCase -> hg - }.toMap - - VariantIndex(variantToHaplogroup, haplogroupByName) - } - } - - /** - * Build a scoped variant index for a subset of haplogroups. - * Used for subtree merge operations. - * - * @param haplogroups The haplogroups to include in the index - * @return VariantIndex scoped to the provided haplogroups - */ - def buildVariantIndexForScope(haplogroups: Seq[Haplogroup]): Future[VariantIndex] = { - val haplogroupIds = haplogroups.flatMap(_.id) - - // Bulk fetch variants for all haplogroups in the scope - haplogroupVariantRepository.getVariantsForHaplogroups(haplogroupIds).map { variants => - // Group variants by haplogroup ID - val variantsByHaplogroupId = variants.groupMap(_._1)(_._2) - - // Associate haplogroups with their variant names - val hgsWithVariantNames = haplogroups.map { hg => - val associatedVariants = variantsByHaplogroupId.getOrElse(hg.id.get, Seq.empty) - (hg, associatedVariants.flatMap(_.canonicalName)) - } - - val variantToHaplogroup = hgsWithVariantNames.flatMap { case (hg, variantNames) => - variantNames.map(v => v.toUpperCase -> hg) - }.groupMap(_._1)(_._2) - - val haplogroupByName = hgsWithVariantNames.map { case (hg, _) => - hg.name.toUpperCase -> hg - }.toMap - - VariantIndex(variantToHaplogroup, haplogroupByName) - } - } - - /** - * Build an in-memory tree structure of existing haplogroups. - * - * == Formal Role: Phase 1 - Normalization of T₀ == - * - * This method loads the existing tree and normalizes each node with: - * - U(N): nodeVariants - variants defined at this node - * - C(N): cumulativeVariants - all variants from root to N - * - * == Forest Support == - * - * Y-DNA and mtDNA databases may contain multiple root trees: - * - The main tree (Y-Adam or mtEve) - * - Floating fragments from research papers - * - Orphan nodes not yet connected - * - * To handle this, we create a virtual "Super-Adam" root that parents all - * actual roots, enabling unified indexing and matching across all fragments. - * - * @param haplogroupType Y or MT haplogroup type - * @return Optional root node of the existing tree (may be virtual Super-Adam if forest) - */ - def buildExistingTree(haplogroupType: HaplogroupType): Future[Option[ExistingTreeNode]] = { - for { - // Bulk fetch all haplogroups with their variants - haplogroupsWithVariants <- haplogroupRepository.getAllWithVariantNames(haplogroupType) - _ = logger.info(s"Loaded ${haplogroupsWithVariants.size} existing haplogroups") - - // Bulk fetch all relationships - relationships <- haplogroupRepository.getAllRelationships(haplogroupType) - _ = logger.info(s"Loaded ${relationships.size} relationships") - - // Build a map of haplogroup ID -> (haplogroup, variant names) - hgMap = haplogroupsWithVariants.flatMap { case (hg, variants) => - hg.id.map(id => id -> (hg, variants.map(_.toUpperCase).toSet)) - }.toMap - - // Build parent -> children map from relationships - parentToChildren = relationships.groupMap(_._2)(_._1) // parentId -> Seq[childId] - - // Find roots (haplogroups with no parent) - childIds = relationships.map(_._1).toSet - rootIds = hgMap.keys.filterNot(childIds.contains).toSeq - rootNames = rootIds.flatMap(id => hgMap.get(id).map { case (hg, _) => s"${hg.name}(id=$id)" }) - _ = logger.info(s"Found ${rootIds.size} root haplogroup(s): ${rootNames.mkString(", ")}") - } yield { - // Recursively build tree nodes with cumulative variants - def buildNode(hgId: Int, inheritedVariants: Set[String]): Option[ExistingTreeNode] = { - hgMap.get(hgId).map { case (hg, nodeVariants) => - val cumulativeVariants = inheritedVariants ++ nodeVariants - val nodeChildIds = parentToChildren.getOrElse(hgId, Seq.empty) - val children = nodeChildIds.flatMap(childId => buildNode(childId, cumulativeVariants)) - ExistingTreeNode(hg, nodeVariants, cumulativeVariants, children) - } - } - - // Build ALL root trees (not just the largest) - val allRootTrees = rootIds.flatMap { rootId => - buildNode(rootId, Set.empty).map(tree => (tree, countTreeNodes(tree))) - }.sortBy(-_._2) // Sort by size descending - - if (allRootTrees.isEmpty) { - logger.info(s"No existing tree found") - None - } else if (allRootTrees.size == 1) { - // Single root - return directly - val (primaryRoot, nodeCount) = allRootTrees.head - logger.info(s"Selected root: ${primaryRoot.haplogroup.name} with $nodeCount nodes") - Some(primaryRoot) - } else { - // Multiple roots - create virtual "Super-Adam" to unify the forest - // This ensures all nodes from all fragments are indexed for matching - val (primaryRoot, primaryCount) = allRootTrees.head - val fragmentCount = allRootTrees.tail.map(_._2).sum - val fragmentNames = allRootTrees.tail.map(_._1.haplogroup.name) - - logger.info(s"FOREST DETECTED: Primary root ${primaryRoot.haplogroup.name} ($primaryCount nodes), " + - s"${allRootTrees.size - 1} fragment(s): ${fragmentNames.mkString(", ")} ($fragmentCount total nodes)") - - // Create virtual Super-Adam root with all actual roots as children - // The virtual root has no variants (empty sets) and a synthetic haplogroup - val virtualSuperAdam = Haplogroup( - id = Some(-1), // Synthetic ID (never persisted) - name = s"__SUPER_ADAM_${haplogroupType}__", - lineage = None, - description = Some("Virtual root unifying forest fragments"), - haplogroupType = haplogroupType, - revisionId = 0, - source = "SYSTEM", - confidenceLevel = "system", - validFrom = java.time.LocalDateTime.now(), - validUntil = None, - formedYbp = None, - formedYbpLower = None, - formedYbpUpper = None, - tmrcaYbp = None, - tmrcaYbpLower = None, - tmrcaYbpUpper = None, - ageEstimateSource = None, - provenance = None - ) - - val unifiedRoot = ExistingTreeNode( - haplogroup = virtualSuperAdam, - nodeVariants = Set.empty, - cumulativeVariants = Set.empty, - children = allRootTrees.map(_._1) - ) - - logger.info(s"Created unified forest with ${countTreeNodes(unifiedRoot)} total nodes") - Some(unifiedRoot) - } - } - } - - /** - * Find an existing haplogroup that matches the given node. - * - * Matching priority: - * 1. Exact name match (most reliable) - * 2. Variant-based match with name confirmation (variant match + similar name) - * 3. Variant-based match with multiple shared variants - * - * This avoids false matches where downstream haplogroups inherit ancestral variants. - * - * @param node The source node to match - * @param index The variant index to search - * @return Optional matching haplogroup - */ - def findExistingMatch(node: PhyloNodeInput, index: VariantIndex): Option[Haplogroup] = { - // First: try exact name match (most reliable) - val nameMatch = index.haplogroupByName.get(node.name.toUpperCase) - if (nameMatch.isDefined) { - return nameMatch - } - - // Second: try variant-based matching - val allNames = allVariantNames(node.variants) - if (allNames.isEmpty) { - return None - } - - val variantMatches = allNames - .flatMap(v => index.variantToHaplogroup.getOrElse(v.toUpperCase, Seq.empty)) - .groupBy(identity) - .view.mapValues(_.size) - .toSeq - .sortBy(-_._2) // Sort by match count descending - - // Require at least 2 matching variants to avoid false positives from inherited variants - // OR if there's only 1 variant in the input, require that the matched haplogroup name - // starts with the same letter as the input node name (basic lineage check) - variantMatches.headOption.flatMap { case (hg, matchCount) => - val inputNodePrefix = node.name.take(1).toUpperCase - val matchedPrefix = hg.name.take(1).toUpperCase - - if (matchCount >= 2) { - // Multiple variant matches - likely correct - Some(hg) - } else if (matchCount == 1 && inputNodePrefix == matchedPrefix) { - // Single variant match but same haplogroup lineage (e.g., both start with "R") - Some(hg) - } else { - // Single variant match with different lineage - likely false positive - logger.debug(s"Rejecting weak variant match: ${node.name} -> ${hg.name} (only $matchCount shared variants, different lineage)") - None - } - } - } - - /** - * Count nodes in an ExistingTreeNode tree. - */ - def countTreeNodes(node: ExistingTreeNode): Int = { - 1 + node.children.map(countTreeNodes).sum - } - - /** - * Extract all variant names (primary + aliases) from a VariantInput. - */ - private def allVariantNames(variant: VariantInput): List[String] = - variant.name :: variant.aliases - - /** - * Extract all variant names from a list of VariantInput. - */ - private def allVariantNames(variants: List[VariantInput]): List[String] = - variants.flatMap(allVariantNames) -} diff --git a/app/startup/SecurityStartupCheck.scala b/app/startup/SecurityStartupCheck.scala deleted file mode 100644 index 0e1a3cc7..00000000 --- a/app/startup/SecurityStartupCheck.scala +++ /dev/null @@ -1,24 +0,0 @@ -package startup - -import play.api.{Configuration, Environment, Logging, Mode} - -import javax.inject.{Inject, Singleton} - -/** - * Validates critical security configuration at application startup. - * Bound as an eager singleton so it runs immediately on boot. - */ -@Singleton -class SecurityStartupCheck @Inject()(config: Configuration, env: Environment) extends Logging { - - if (env.mode == Mode.Prod) { - val secret = config.get[String]("play.http.secret.key") - if (secret == "changeme" || secret.length < 32) { - throw new IllegalStateException( - "FATAL: APPLICATION_SECRET is not set or is too short. " + - "Set the APPLICATION_SECRET environment variable to a secure random value (>= 32 chars)." - ) - } - logger.info("Security startup check passed.") - } -} diff --git a/app/utils/Base58.scala b/app/utils/Base58.scala deleted file mode 100644 index 591b6dea..00000000 --- a/app/utils/Base58.scala +++ /dev/null @@ -1,40 +0,0 @@ -package utils - -object Base58 { - private val Alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" - private val Base = BigInt(58) - private val CharIndex: Map[Char, Int] = Alphabet.zipWithIndex.toMap - - def decode(input: String): Array[Byte] = { - if (input.isEmpty) return Array.empty[Byte] - - var bi = BigInt(0) - for (ch <- input) { - bi = bi * Base + CharIndex.getOrElse(ch, - throw new IllegalArgumentException(s"Invalid base58 character: $ch")) - } - - val bytes = if (bi == BigInt(0)) Array.empty[Byte] else { - val raw = bi.toByteArray - if (raw.head == 0) raw.tail else raw - } - - val leadingZeros = input.takeWhile(_ == '1').length - Array.fill(leadingZeros)(0.toByte) ++ bytes - } - - def encode(input: Array[Byte]): String = { - if (input.isEmpty) return "" - - var bi = BigInt(1, input) - val sb = new StringBuilder - while (bi > 0) { - val (div, mod) = bi /% Base - sb.append(Alphabet(mod.toInt)) - bi = div - } - - val leadingZeros = input.takeWhile(_ == 0).length - ("1" * leadingZeros) + sb.reverse.toString() - } -} diff --git a/app/utils/CuratorViewUtils.scala b/app/utils/CuratorViewUtils.scala deleted file mode 100644 index a7d61551..00000000 --- a/app/utils/CuratorViewUtils.scala +++ /dev/null @@ -1,32 +0,0 @@ -package utils - -import models.domain.support.MessageStatus - -object CuratorViewUtils { - def actionBadgeClass(action: String): String = { - action match { - case "create" => "bg-success" - case "update" => "bg-warning text-dark" - case "delete" => "bg-danger" - case _ => "bg-secondary" - } - } - - def changeTypeBadgeClass(changeType: String): String = { - changeType match { - case "add" => "bg-success" - case "remove" => "bg-danger" - case "update" => "bg-warning text-dark" - case _ => "bg-secondary" - } - } - - def statusBadgeClass(status: MessageStatus): String = { - status match { - case MessageStatus.New => "bg-primary" - case MessageStatus.Read => "bg-info" - case MessageStatus.Replied => "bg-success" - case MessageStatus.Closed => "bg-secondary" - } - } -} diff --git a/app/utils/DIDResolver.scala b/app/utils/DIDResolver.scala deleted file mode 100644 index c68b0952..00000000 --- a/app/utils/DIDResolver.scala +++ /dev/null @@ -1,41 +0,0 @@ -package utils - -/** - * Trait for handling Decentralized Identifier (DID) conversions. - * - * This trait provides methods for converting a source system and accession identifier - * to a DID format and extracting components from a given DID string. - */ -trait DIDResolver { - /** - * Converts a given source system and accession identifier into a Decentralized Identifier (DID) format. - * - * @param sourceSystem The source system identifier (e.g., "pgp", "evolbio"). - * @param accession The accession identifier associated with the entity. - * @return A formatted Decentralized Identifier (DID) string specific to the source system. - */ - def toDID(sourceSystem: String, accession: String): String = sourceSystem match { - case "pgp" => s"did:pgp:${accession.toLowerCase}" - case "evolbio" => s"did:evolbio:${accession}" - case _ => s"did:biosample:${accession}" - } - - /** - * Extracts the source system and accession identifier from a given Decentralized Identifier (DID) string. - * - * The input DID string is expected to follow the format `did::`. - * If the input string matches the pattern, the method returns an `Option` containing a tuple - * with the extracted `sourceSystem` and `accession`. Otherwise, it returns `None`. - * - * @param did The Decentralized Identifier (DID) string to be parsed. - * @return An `Option` containing a tuple `(sourceSystem, accession)` if parsing succeeds, or `None` if the input - * string does not match the expected DID format. - */ - def fromDID(did: String): Option[(String, String)] = { - val DIDPattern = "^did:([^:]+):(.+)$".r - did match { - case DIDPattern(system, acc) => Some((system, acc)) - case _ => None - } - } -} diff --git a/app/utils/GenomicsConstants.scala b/app/utils/GenomicsConstants.scala deleted file mode 100644 index 2f9ed1b1..00000000 --- a/app/utils/GenomicsConstants.scala +++ /dev/null @@ -1,9 +0,0 @@ -package utils - -/** - * Constants related to genomics domain, such as specific identifiers or platform names. - */ -object GenomicsConstants { - val DONOR_ID_PREFIX: String = "DONOR_" - val PGP_SOURCE_PLATFORM: String = "PGP" -} diff --git a/app/utils/GeometryUtils.scala b/app/utils/GeometryUtils.scala deleted file mode 100644 index ea5cc318..00000000 --- a/app/utils/GeometryUtils.scala +++ /dev/null @@ -1,32 +0,0 @@ -package utils - -import com.vividsolutions.jts.geom.{Coordinate, GeometryFactory, Point} -import models.api.GeoCoord - -/** - * Utility object providing geometric transformation functions. - */ -object GeometryUtils { - private val geometryFactory = new GeometryFactory() - - /** - * Converts a geographical coordinate to a geometric point. - * - * @param geoCoord the geographical coordinate containing latitude and longitude values - * @return a Point object representing the given geographical coordinate - */ - def geoCoordToPoint(geoCoord: GeoCoord): Point = { - geometryFactory.createPoint(new Coordinate(geoCoord.lon, geoCoord.lat)) - } - - /** - * Creates a geometric point from the given latitude and longitude. - * - * @param lat the latitude of the point - * @param lon the longitude of the point - * @return a Point object representing the specified latitude and longitude - */ - def createPoint(lat: Double, lon: Double): Point = { - geometryFactory.createPoint(new Coordinate(lon, lat)) - } -} \ No newline at end of file diff --git a/app/utils/IpAddressUtils.scala b/app/utils/IpAddressUtils.scala deleted file mode 100644 index 21bd085b..00000000 --- a/app/utils/IpAddressUtils.scala +++ /dev/null @@ -1,11 +0,0 @@ -package utils - -import java.security.MessageDigest - -object IpAddressUtils { - def hashIpAddress(ip: String): String = { - val digest = MessageDigest.getInstance("SHA-256") - val hash = digest.digest(ip.getBytes("UTF-8")) - hash.map("%02x".format(_)).mkString - } -} diff --git a/app/utils/MockRecaptchaVerifier.scala b/app/utils/MockRecaptchaVerifier.scala deleted file mode 100644 index 0d9a33c6..00000000 --- a/app/utils/MockRecaptchaVerifier.scala +++ /dev/null @@ -1,51 +0,0 @@ -package utils - -import com.nappin.play.recaptcha.{RecaptchaSettings, RecaptchaVerifier, ResponseParser, WidgetHelper} -import jakarta.inject.Inject -import play.api.data.Form -import play.api.data.FormBinding.Implicits.formBinding -import play.api.libs.ws.WSClient -import play.api.mvc.Request - -import scala.concurrent.{ExecutionContext, Future} - -/** - * A mock implementation of the `RecaptchaVerifier` class used to bypass Recaptcha validation during development - * or testing when Recaptcha is disabled. - * - * This class provides an alternative `RecaptchaVerifier` implementation, allowing form binding and verification - * without actual Recaptcha validation. It is typically used in scenarios where Recaptcha is not enabled, reducing - * dependency on external Recaptcha services. - * - * @constructor Initializes the `MockRecaptchaVerifier` with the necessary dependencies. - * @param settings the Recaptcha settings for configuration purposes - * @param parser the response parser for processing Recaptcha responses (unused in this implementation) - * @param wsClient the web service client for handling HTTP requests (unused in this implementation) - * @param ec the implicit execution context used for handling asynchronous operations - */ -class MockRecaptchaVerifier @Inject()( - settings: RecaptchaSettings, - parser: ResponseParser, - wsClient: WSClient - )(implicit ec: ExecutionContext) extends RecaptchaVerifier(settings, parser, wsClient) { - - /** - * Binds a form from an HTTP request and performs optional verification using a widget helper. - * - * This method allows binding request data to a form while enabling hooks for additional - * processing or validation via the provided widget helper. It is typically utilized - * when handling form submissions in scenarios where recaptcha or similar mechanisms need - * to be considered for verification. - * - * @param form the form to bind the request data to - * @param request the HTTP request containing the data to bind - * @param helper the widget helper used to assist with additional verification steps - * @return a future containing the bound form object - */ - def bindFromRequestAndVerify[A](form: Form[A], request: Request[_], helper: WidgetHelper): Future[Form[A]] = { - implicit val binding = formBinding - Future.successful(form.bindFromRequest()(request, binding)) - } -} - - diff --git a/app/utils/VariantViewUtils.scala b/app/utils/VariantViewUtils.scala deleted file mode 100644 index 66b3f68b..00000000 --- a/app/utils/VariantViewUtils.scala +++ /dev/null @@ -1,81 +0,0 @@ -package utils - -import models.domain.genomics.VariantV2 -import play.api.libs.json.{JsObject, JsValue} - -object VariantViewUtils { - def refGenomes(v: VariantV2): Seq[String] = { - v.coordinates.asOpt[Map[String, JsObject]].map(_.keys.toSeq.sorted).getOrElse(Seq.empty) - } - - def formatPosition(v: VariantV2, refGenome: String): String = { - v.getCoordinates(refGenome).map { coords => - val contig = (coords \ "contig").asOpt[String].getOrElse("?") - - (coords \ "position").asOpt[Int].map(p => s"$contig:$p") - .orElse { - (coords \ "start").asOpt[Long].map { start => - val end = (coords \ "end").asOpt[Long].getOrElse(start) - if (start == end) s"$contig:$start" else s"$contig:$start-$end" - } - } - .getOrElse(s"$contig:?") - }.getOrElse("-") - } - - private def extractAlleles(coords: JsValue): (String, String) = { - val ref = (coords \ "ref").asOpt[String] - val alt = (coords \ "alt").asOpt[String] - - if (ref.isDefined) { - (ref.get, alt.getOrElse("?")) - } else { - val rawMotif = (coords \ "repeatMotif").asOpt[String] - val motif = rawMotif.filterNot(_ == "N/A") - val refRepeats = (coords \ "referenceRepeats").asOpt[Int] - - if (motif.isDefined && refRepeats.isDefined) { - (s"${motif.get} x ${refRepeats.get}", "?") - } else if (refRepeats.isDefined) { - (s"(repeats: ${refRepeats.get})", "?") - } else { - ("?", "?") - } - } - } - - def formatAlleles(v: VariantV2, refGenome: String): String = { - val (ref, alt) = formatAllelesTuple(v, refGenome) - s"$ref→$alt" - } - - def formatAllelesTuple(v: VariantV2, refGenome: String): (String, String) = { - v.getCoordinates(refGenome).map { coords => - extractAlleles(coords) - }.getOrElse(("?", "?")) - } - - def primaryAlleles(v: VariantV2): (String, String) = { - val coords = v.coordinates.asOpt[Map[String, JsObject]].getOrElse(Map.empty) - val primary = coords.get("hs1").orElse(coords.get("GRCh38")).orElse(coords.headOption.map(_._2)) - primary.map(extractAlleles).getOrElse(("?", "?")) - } - - def buildBadgeClass(refGenome: String): String = { - refGenome match { - case "GRCh37" => "bg-warning text-dark" - case "GRCh38" => "bg-info" - case "hs1" => "bg-success" - case _ => "bg-secondary" - } - } - - def shortRefGenome(refGenome: String): String = { - refGenome match { - case "GRCh37" => "GRCh37" - case "GRCh38" => "GRCh38" - case "hs1" => "hs1" - case other => other - } - } -} \ No newline at end of file diff --git a/app/views/_footer.scala.html b/app/views/_footer.scala.html deleted file mode 100644 index a0d4dc1f..00000000 --- a/app/views/_footer.scala.html +++ /dev/null @@ -1,23 +0,0 @@ -@()(implicit messages: Messages) - \ No newline at end of file diff --git a/app/views/_navbar.scala.html b/app/views/_navbar.scala.html deleted file mode 100644 index 9018f2c8..00000000 --- a/app/views/_navbar.scala.html +++ /dev/null @@ -1,89 +0,0 @@ -@()(implicit messages: Messages, request: RequestHeader) - \ No newline at end of file diff --git a/app/views/admin/genomics/dashboard.scala.html b/app/views/admin/genomics/dashboard.scala.html deleted file mode 100644 index a24d9292..00000000 --- a/app/views/admin/genomics/dashboard.scala.html +++ /dev/null @@ -1,193 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@()(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Genomics Admin") { -
-

Genomics Admin

- - @views.html.fragments.flashMessages(request.flash) - -
-
-
-
-
YBrowse Variant Update
-
-
-

- Download and ingest the latest Y-DNA variant data from YBrowse. - This will fetch the GFF file, and update the variant database. - This process takes several hours. Monitor progress in the server logs. -

- - -
-
-
- -
-
-
-
HipSTR Reference Update
-
-
-

- Download and ingest the latest HipSTR (GRCh38) STR reference data. - This will include liftover to hs1 and GRCh37. - This process takes a few minutes. Monitor progress in the server logs. -

- - -
-
-
- -
-
-
-
Genome Regions Bootstrap
-
-
-

- Bootstrap the genome region database (Cytobands, Centromeres, Telomeres, etc.) from hs1 (T2T-CHM13) source files. - Includes liftover to GRCh38 and GRCh37. - This is a one-time setup operation. -

- - -
-
-
-
-
- - -} diff --git a/app/views/auth/appPasswordHelp.scala.html b/app/views/auth/appPasswordHelp.scala.html deleted file mode 100644 index 0295447a..00000000 --- a/app/views/auth/appPasswordHelp.scala.html +++ /dev/null @@ -1,37 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@()(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main(messages("auth.appPasswordHelp.title")) { -
- @views.html.fragments.flashMessages(request.flash) -
-
-
-
-

@messages("auth.appPasswordHelp.heading")

- -

@Html(messages("auth.appPasswordHelp.p1"))

-

@Html(messages("auth.appPasswordHelp.p2"))

-
    -
  • @Html(messages("auth.appPasswordHelp.li1"))
  • -
  • @Html(messages("auth.appPasswordHelp.li2"))
  • -
- -

@messages("auth.appPasswordHelp.generateSteps")

-
    -
  1. @Html(messages("auth.appPasswordHelp.step1"))
  2. -
  3. @Html(messages("auth.appPasswordHelp.step2"))
  4. -
  5. @Html(messages("auth.appPasswordHelp.step3"))
  6. -
  7. @Html(messages("auth.appPasswordHelp.step4"))
  8. -
  9. @Html(messages("auth.appPasswordHelp.step5"))
  10. -
-

@Html(messages("auth.appPasswordHelp.note"))

- -
-
-
-
-
-} \ No newline at end of file diff --git a/app/views/auth/login.scala.html b/app/views/auth/login.scala.html deleted file mode 100644 index 9bf2a0bb..00000000 --- a/app/views/auth/login.scala.html +++ /dev/null @@ -1,48 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@(loginForm: Form[controllers.LoginData])(implicit request: Request[AnyContent], messages: Messages, webJarsUtil: WebJarsUtil) - -@main(messages("auth.login.title")) { -
-
-
-
-
-

@messages("auth.login.heading")

- - @views.html.fragments.flashMessages(request.flash) - - - - @helper.form(action = routes.AuthController.authenticate) { - @helper.CSRF.formField - -
- - -
@messages("auth.login.handle.help")
-
- -
- - -
@Html(messages("auth.login.passwordHelp", routes.AuthController.showAppPasswordHelp().url))
-
- -
- -
- } -
-
-
-
-
-} diff --git a/app/views/biosampleReport.scala.html b/app/views/biosampleReport.scala.html deleted file mode 100644 index 6a336d7c..00000000 --- a/app/views/biosampleReport.scala.html +++ /dev/null @@ -1,120 +0,0 @@ -@import models.domain.genomics.BiosampleType - -@(paginatedResult: models.api.PaginatedResult[models.api.BiosampleWithOrigin], publicationId: Int)(implicit messages: Messages) - -
-

@messages("biosample.details")

- - - - - - - - - - - - - - - - @for(sample <- paginatedResult.items) { - - - - - - - - - - - - } - -
@messages("biosample.header.sampleId")@messages("biosample.header.accession")@messages("biosample.header.origin")@messages("biosample.header.timePeriod")@messages("biosample.header.datingPeriod")@messages("biosample.header.sex")@messages("biosample.header.yHaplo")@messages("biosample.header.mtHaplo")@messages("biosample.header.population")
@sample.sampleName - @{ - sample.accession match { - case acc if acc.startsWith("SAMN") => { - - {acc} - - } - case acc if acc.startsWith("ERS") || acc.startsWith("SAM") => { - - {acc} - - } - case acc => { - {acc} - } - } - } - @sample.formattedOrigin - "bg-secondary" - } - }"> - @sample.sampleTypeDescription - - @sample.formattedDateRange - @defining(sample.sex.map(_.toLowerCase).getOrElse("unknown")) { sex => - "fa-mars sex-male" - case "female" => "fa-venus sex-female" - case _ => "fa-genderless sex-unknown" - } - }" title="@sample.sex.getOrElse("Unknown")"> - } - - @defining(sample.sex.map(_.toLowerCase).getOrElse("unknown")) { sex => - @if(sex == "female") { - N/A - } else { - @sample.yDnaHaplogroup - } - } - @sample.mtDnaHaplogroup - @sample.bestFitPopulation.map { pop => - - @pop.populationName (@{ - f"${pop.probability * 100}%.1f" - }%) - - }.getOrElse(messages("generic.notAvailable")) -
- -
- @if(paginatedResult.hasPreviousPage) { - - } - @messages("pagination.pageOf", paginatedResult.currentPage, paginatedResult.totalPages) - @if(paginatedResult.hasNextPage) { - - } -
-
diff --git a/app/views/biosamples/map.scala.html b/app/views/biosamples/map.scala.html deleted file mode 100644 index 8f607b0d..00000000 --- a/app/views/biosamples/map.scala.html +++ /dev/null @@ -1,55 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@()(implicit webJarsUtil: WebJarsUtil, messages: Messages, request: RequestHeader) - -@main(messages("map.title")) { -
-
- @messages("map.loading") -
-
- - - - - - -} diff --git a/app/views/contact.scala.html b/app/views/contact.scala.html deleted file mode 100644 index 942084c7..00000000 --- a/app/views/contact.scala.html +++ /dev/null @@ -1,131 +0,0 @@ -@import com.nappin.play.recaptcha.WidgetHelper -@import models.forms.Contact -@import org.webjars.play.WebJarsUtil - -@this(recaptchaField: views.html.recaptcha.recaptchaField) -@(contactForm: Form[Contact.ContactDTO], isProd: Boolean, isAuthenticated: Boolean)(implicit request: Request[AnyContent], messages: Messages, webJarsUtil: WebJarsUtil, widgetHelper: WidgetHelper) -@main(messages("contact.title")) { -
-
-
-
-
-

@messages("contact.title")

- - @if(isAuthenticated) { - - } - - @views.html.fragments.flashMessages(request.flash, Some(contactForm)) - - @helper.form(action = routes.ContactController.submit(), Symbol("class") -> "needs-validation", Symbol("autocomplete") -> "off") { - @helper.CSRF.formField - - @* Name and email fields - only shown for anonymous users *@ - @if(!isAuthenticated) { -
- - - @contactForm("name").errors.map { error => -
@messages(error.message)
- } -
@messages("contact.name.help")
-
- -
- - - @contactForm("email").errors.map { error => -
@messages(error.message)
- } -
@messages("contact.email.help")
-
- } else { - @* Hidden fields with placeholder values for authenticated users *@ - - - } - -
- - - @contactForm("subject").errors.map { error => -
@messages(error.message)
- } -
- -
- - - @contactForm("message").errors.map { error => -
@messages(error.message)
- } -
- - @* Honeypot field - hidden from real users *@ - - - @* reCAPTCHA only for anonymous users in production *@ - @if(isProd && !isAuthenticated) { - @recaptchaField(form = contactForm, fieldName = "captcha", includeNoScript = false, - isRequired = true, tabindex = 5, args = Symbol("class") -> "extraClass") - } - -
- - - @if(isAuthenticated) { - - @messages("contact.viewHistory") - - } -
- } -
-
-
-
-
-} diff --git a/app/views/content/en/cookiesText.scala.html b/app/views/content/en/cookiesText.scala.html deleted file mode 100644 index c70089ca..00000000 --- a/app/views/content/en/cookiesText.scala.html +++ /dev/null @@ -1,51 +0,0 @@ -@()(implicit messages: Messages) - -

- This Cookie Usage Policy explains how Decoding-Us.com (the "Website") uses cookies.

- -
-

What are Cookies?

-

Cookies are small text files that are placed on your computer or mobile device when you visit a website. They are widely used to make websites work, or work more efficiently, as well as to provide information to the owners of the website.

-
- -
-

Cookies We Use

-

This Website uses cookies exclusively for authentication and authorization purposes. Specifically:

-
    -
  • Session Cookies: To maintain your logged-in state as you navigate the Website. These cookies are essential for the authentication system to function and are deleted when you close your browser or log out.
  • -
  • Security Cookies: To help protect your account and prevent unauthorized access, including CSRF (Cross-Site Request Forgery) protection tokens.
  • -
-
- -
-

What We Do NOT Use Cookies For

-

We want to be clear about what we do not do with cookies:

-
    -
  • No Tracking: We do not track your browsing activity on other websites.
  • -
  • No Analytics: We do not use cookies for analytics or to monitor user behavior patterns.
  • -
  • No Marketing: We do not use cookies for advertising or marketing purposes.
  • -
  • No Third-Party Sharing: We do not share cookie data with any third parties.
  • -
  • No Data Sales: We do not sell any information collected through cookies.
  • -
-
- -
-

Third-Party Cookies

-

This Website does not utilize any third-party cookies. We do not embed third-party tracking scripts, advertising networks, or social media widgets that would set their own cookies.

-
- -
-

Managing Cookies

-

Since our cookies are essential for authentication, disabling them will prevent you from logging into the Website. You can manage cookies through your browser settings:

-
    -
  • Most browsers allow you to view, manage, and delete cookies in the privacy or security settings.
  • -
  • If you delete our session cookies, you will be logged out and need to sign in again.
  • -
-
- -
-

Updates to this Cookie Policy

-

We may update this Cookie Usage Policy from time to time. Any changes will be posted on this page. Our commitment to using cookies only for authentication and authorization purposes will not change without clear notice to our users.

-
- -

Last Updated: 2025-12-09

diff --git a/app/views/content/en/coverageText.scala.html b/app/views/content/en/coverageText.scala.html deleted file mode 100644 index 374b6d8f..00000000 --- a/app/views/content/en/coverageText.scala.html +++ /dev/null @@ -1,62 +0,0 @@ -

- Coverage statistics are calculated from sequencing reads aligned to the - chm13v2.0/hs1 reference genome - using BWA-MEM. - A site is considered "callable" when it has at least four reads with high mapping confidence. -

- - - -
- Technical Details -

- Analysis performed by - DecodingUsTools Coverage Analysis with default settings. -

-
- -
-
Callable Bases
-
- Sites with 4+ reads where each read has high mapping confidence. -
- -
Bases No Coverage
-
- Sites with no aligned reads. -
- -
Low Quality Mapping
-
- Sites that don't meet minimum quality thresholds. -
- -
Mean Depth
-
- Average number of reads per base position. Note: For sex chromosomes (X and Y), - male samples typically show approximately half the advertised sequencing depth - (e.g., ~15x coverage on X/Y for a 30x WGS) due to having only one copy of these chromosomes. - Female samples should show the full advertised depth on the X chromosome (~30x for a 30x WGS) - since they have two X chromosomes. -
- -
95% Confidence Interval
-
- Range (shown in parentheses as lower-upper) where the true population mean likely falls, - displayed when multiple samples are available. -
-
\ No newline at end of file diff --git a/app/views/content/en/faqText.scala.html b/app/views/content/en/faqText.scala.html deleted file mode 100644 index 17fa4488..00000000 --- a/app/views/content/en/faqText.scala.html +++ /dev/null @@ -1,146 +0,0 @@ -@()(implicit messages: Messages) - -
    -
  1. -
    -
    -

    Question 1

    -

    What is Decoding-Us.com?

    -
    -
    -
    -

    - Decoding-Us.com is a collaborative platform designed for the genetic genealogy and population research community. It serves two main purposes: -

    -
      -
    • Public Resource: We analyze and visualize publicly available Whole Genome Sequencing (WGS) data from academic sources, providing alternative Y-DNA and Mitochondrial DNA trees, coverage benchmarks, and reference data.
    • -
    • Federated Collaboration: We are building a decentralized application using the AT Protocol (the same technology behind Bluesky) that allows researchers to securely share and compare genetic data without relying on a central server to hold their files.
    • -
    -

    - Our goal is to empower citizen scientists with tools to explore human history while maintaining complete ownership and control over their sensitive genetic data. -

    -
    -
    -
    -
  2. - -
  3. -
    -
    -

    Question 2

    -

    How does the "Federated" model work?

    -
    -
    -
    -

    - In a traditional model (like AncestryDNA or 23andMe), you provide a biological sample (such as buccal swab or blood) for sequencing. The service then processes your sample, sequences your genetic data, and often maintains ownership or significant control over that data on their servers. Decoding-Us.com offers a unique alternative: our platform is designed to bridge data from any genetic service—be it 23&Me, Ancestry, FTDNA, YSEQ, or direct-to-consumer Whole Genome Sequencing (WGS) providers. As long as you maintain ownership and access to your sequencing data, our system empowers you to bring it into our federated network, uniting insights from across different providers. -

    -

    - In our Federated Model (Phase 3 of our roadmap), you don't upload your raw data to us. Instead: -

    -
      -
    1. You run a "Personal Data Server" (PDS) application—software that lives on your own computer or private cloud.
    2. -
    3. This software analyzes your WGS data locally to extract non-sensitive summaries (like your haplogroup or anonymous matching tokens).
    4. -
    5. You choose to publish these summaries to the "Atmosphere"—our secure network.
    6. -
    7. Decoding-Us.com acts as an "AppView," indexing this public information so you can find matches and collaborate with other researchers.
    8. -
    -

    - This approach, known as a "Decentralized AppView," ensures that your massive and sensitive raw data files (BAM/CRAM) never leave your control, while still allowing you to participate in community discoveries. -

    -
    -
    -
    -
  4. - -
  5. -
    -
    -

    Question 3

    -

    Can I upload my Big Y, WGS, or other DNA files?

    -
    -
    -
    -

    - No, we do not accept direct file uploads. To protect your privacy and reduce infrastructure costs, we do not host user data files. -

    -

    - Instead, we provide tools that allow you to process your data on your own machine. Currently, we offer an Alpha version of the Decoding-Us Navigator (also known as the Navigator Workbench). This edge-computing application runs locally on your PC (Windows/Linux/Mac) and empowers you to: -

    -
      -
    • Analyze Locally: Process BAM/CRAM files directly on your machine to generate coverage metrics and haplogroup determinations without uploading massive files.
    • -
    • Integrate with Atmosphere: Future versions will allow you to publish anonymized summaries to your Personal Data Server, enabling you to share insights with the federated network while keeping your raw data private.
    • -
    -

    - The Navigator is built on the JVM (Java/Scala) for performance and cross-platform compatibility. It represents the core of our privacy-first philosophy: bring the analysis to the data, not the data to the analysis. -

    -
    -
    -
    -
  6. - -
  7. -
    -
    -

    Question 4

    -

    What is the "Atmosphere" and the AT Protocol?

    -
    -
    -
    -

    - The AT Protocol is a new technology for decentralized social networking. It allows users to own their identity and data, moving freely between different services without losing their connections. -

    -

    - We call our implementation the Atmosphere. Just as the air connects us all, the Atmosphere connects individual researchers' Personal Data Servers. It uses "Lexicons"—standard dictionaries for genetic data—to ensure that a researcher in the UK can automatically compare notes with a researcher in the US, even if they use different software, provided they both speak the "Atmosphere" language. -

    -
    -
    -
    -
  8. - -
  9. -
    -
    -

    Question 5

    -

    Why the .com domain? Is this a commercial service?

    -
    -
    -
    -

    - Decoding-Us.com is currently a free resource. However, hosting high-performance databases and indexing the global federation costs money. We chose a .com domain to keep our options open for long-term sustainability. -

    -

    - Future sustainability models might include: -

    -
      -
    • Patronage Donation System: A voluntary tiered donation model where community members can become "Patrons" to support hardware and hosting costs. Contributions are optional, but help ensure the platform remains free and open for everyone.
    • -
    • Managed PDS Hosting: For users who don't want to run their own server, we could offer a paid, secure hosting service for their Personal Data Server.
    • -
    • Sponsorships: Partnerships with sequencing labs or academic institutions, provided they align with our strict privacy and anti-tracking values.
    • -
    -

    - Regardless of the model, our commitment to open-source code and user data sovereignty will remain unchanged. -

    -
    -
    -
    -
  10. - -
  11. -
    -
    -

    Question 6

    -

    How can I submit my haplogroup tree research for merging into DecodingUs?

    -
    -
    -
    -

    - DecodingUs welcomes contributions from researchers to help build a more comprehensive and accurate haplogroup tree. You can submit your data via our automated Tree Merge API. -

    -

    - For a detailed guide on how to format your tree data, the rules for provenance (how your research gets credited), and how conflicts are resolved, please refer to our dedicated guide: - Learn more about Submitting Tree Data -

    -
    -
    -
    -
  12. -
diff --git a/app/views/content/en/homeText.scala.html b/app/views/content/en/homeText.scala.html deleted file mode 100644 index 9bc2eea3..00000000 --- a/app/views/content/en/homeText.scala.html +++ /dev/null @@ -1,46 +0,0 @@ -@()(implicit messages: Messages) - -

Decoding Us will be a next-generation platform for citizen science focused on empowering individuals to - contribute to genealogical and population studies using Whole Genome Sequencing (WGS) data and compatible - technologies. Recognizing the limitations of centralized data silos and the growing importance of user data - sovereignty, Decoding Us will be architected as a federated system, placing control and ownership firmly in - the hands of the participants. This approach will foster greater trust, encourage broader participation, - and unlock new possibilities for collaborative discovery in understanding our shared human history.

-

The system shall be architected with these goals:

-
    -
  • Federated Design: -
      -
    • Identity: Users will possess permanent decentralized identifiers for their - accounts using the robust and user-centric - AT Protocol. - This ensures portability and long-term control over their digital identity within the ecosystem.
    • -
    • Personal Data Stores (PDS): - Participation will involve users securely sharing their - sequencing data through software operating on their own infrastructure – be it a personal computer, - a dedicated Virtual Private Server (VPS), or potentially even services offered by their sequencing - vendors. This personal data store remains under the user's direct ownership, offering the - flexibility to migrate between providers as needed. The software running on the PDS will manage - data storage, facilitate secure communication with the network, and potentially perform initial - data processing tasks.
    • -
    -
  • -
  • Secure Computing with Gapless Reference Analysis: - The system will prioritize user privacy by - enabling participating individuals to compare specific genetic features, such as Single Nucleotide Variant (SNV) values - downstream of identified Y or mitochondrial DNA (mtDNA) haplogroups, in a privacy-preserving manner. - The system will utilize the hs1/chm13v2.0 gapless human reference genome for variant calling and analysis, - which currently provides the most comprehensive downstream analysis support. The necessary genetic features for comparison - will be exchanged with potential matches through end-to-end encrypted messages. Specialized applications - running on the user's PDS can then perform the computationally intensive comparisons, with a mechanism - for consensus checking among peers to ensure the reliability of the results. This secure exchange - ensures that sensitive raw genomic data remains within the user's control. While the human pangenome - reference offers promising future capabilities, we will continue with the proven gapless reference approach - until the ecosystem of analysis tools matures sufficiently to support pangenome-based workflows. -
  • -
- -

Looking Ahead: While the initial focus is on WGS data, the federated architecture of - Decoding Us is designed to be extensible, potentially accommodating other compatible genomic or - related data types in the future. We envision a vibrant community of citizen scientists collaborating on - research questions, sharing insights, and contributing to a deeper understanding of human history and - population dynamics, all within a framework that prioritizes data ownership, privacy, and security.

diff --git a/app/views/content/en/privacyPolicyText.scala.html b/app/views/content/en/privacyPolicyText.scala.html deleted file mode 100644 index 9a647466..00000000 --- a/app/views/content/en/privacyPolicyText.scala.html +++ /dev/null @@ -1,80 +0,0 @@ -@()(implicit messages: Messages) - -

- This Privacy Statement explains how Decoding-Us.com (the "Website") handles the limited data processed during your interaction with our application, including the use of data from a federated platform.

- -
-

1. Introduction

-

This statement outlines our practices regarding the processing of personal data in connection with Decoding-Us.com. As a - Hypermedia as the Engine of Application State (HATEOAS) - application focused on providing public data, we also integrate anonymized and public data from a federated platform to enhance our content.

-
- -
-

2. Features Facilitating Research Collaboration

-

Decoding-Us.com provides an Application View that allows users of participating AT Protocol Personal Data Servers (PDS) within the private research federation to send messages to one another. This messaging functionality is intended to facilitate collaboration among researchers based on shared haplogroups in the genealogical era, ancestral populations, or existing matching networks. Users on the PDS have full control over whether to allow or ignore incoming messages.

-

To further enhance research and potential connections, Decoding-Us.com may also collect anonymized ancestral population information and Earliest Known Most Distant Ancestor (MDKA) data from publicly shared data on the federated - PDS. This MDKA - information may include geocoordinates, date of birth, date of death, and surname. To protect potential privacy, the - MDKA's date of birth will be masked if it falls within the last 100 years.

-

It is crucial to understand that researchers control what information they make public on their - PDS - through their privacy settings. Decoding-Us.com only accesses and processes data that has been explicitly shared publicly by these researchers within the federated environment. We do not access or collect any private or personally identifiable genetic or genealogical data beyond what researchers choose to share publicly.

-

The purpose of these features is to empower researchers to connect, share insights, and collaboratively refine haplogroup understanding and genealogical connections. The anonymized - MDKA information helps provide context without revealing recent personal details.

-
- -
-

3. Data We Collect

-

Currently, this Website does not actively collect personal data such as names, email addresses, or account information directly from our viewing users. Our design does not require or facilitate user registration or login on Decoding-Us.com itself.

-

However, our server logs automatically record certain information when you access the Website, including:

-
    -
  • Your IP address
  • -
  • The date and time of your request
  • -
  • The type of browser and operating system you are using (user-agent string)
  • -
  • The specific endpoints (URLs) you access
  • -
-

Through our Application View functionality for the private research federation, we may also process the following types of - publicly shared, anonymized data from participating - PDS:

-
    -
  • Aggregated statistics on genetic tests and coverage.
  • -
  • Information about private SNPs beyond the terminal haplogroup branch.
  • -
  • Data on negative calls for the terminal branch and its siblings.
  • -
  • Anonymized ancestral population information.
  • -
  • Earliest Known Most Distant Ancestor (MDKA) data (geocoordinates, masked date of birth if within the last 100 years, date of death, and surname).
  • -
-

This data is processed to facilitate research collaboration and enhance the haplogroup information presented on Decoding-Us.com.

-
- -
-

4. Communication Between PDS Users

-

The messaging feature is facilitated by the AT Protocol PDS - infrastructure. Decoding-Us.com provides the interface for this communication, but the control over allowing or ignoring messages resides entirely with the individual - PDS users. We do not monitor the content of these messages.

-
- -
-

5. Data Sharing

-

Decoding-Us.com does not share the personal data of our website viewers with the federated researchers or their AT Protocol - PDS - applications. Our interaction with the federation involves crawling and analyzing aggregated, non-personally identifiable data that researchers have chosen to make public within their - PDS - privacy controls. We also facilitate direct messaging between - PDS - users within the federation, but we do not access or store the content of these messages. The anonymized ancestral population and MDKA information we collect is used to enhance the research capabilities and content on Decoding-Us.com and is not shared back with individual - PDS users in a personally identifiable way.

-
- -
-

6. Your Rights and International Data Protection Laws

-

Depending on your location and applicable data protection laws, such as the General Data Protection Regulation (GDPR) in the European Union, the California Consumer Privacy Act (CCPA) in California, and similar regulations elsewhere, you may have certain rights regarding your personal data. These rights can include the right to access, rectify, erase, restrict processing, and object to the processing of your personal data, as well as the right to data portability.

-

While the scope of personal data we currently process from our users is limited to server logs, we are committed to respecting these rights where applicable. If you have any inquiries regarding your rights under relevant data protection laws and how they might apply to the limited data we process, please contact us using the information below.

-
- -
-

7. Updates to this Privacy Statement

-

We may update this Privacy Statement from time to time to reflect changes in our practices or legal requirements, including the integration with the federated platform. We will post any changes on this page, and we encourage you to review this statement periodically.

-
- -

Last Updated: 2025-05-13

diff --git a/app/views/content/en/reputationText.scala.html b/app/views/content/en/reputationText.scala.html deleted file mode 100644 index c63d190e..00000000 --- a/app/views/content/en/reputationText.scala.html +++ /dev/null @@ -1,42 +0,0 @@ -

- The Decoding Us Reputation System is designed to foster a high-quality, trustworthy community for genetic genealogy and research. - It rewards positive contributions and protects the community from spam and abuse. -

- -

How it Works

-

- Every user starts with a neutral reputation score. As you contribute to the platform, your score increases. - Conversely, actions that harm the community may lower your score. - Higher scores unlock advanced features, ensuring that powerful tools are used by trusted members. -

- -

Earning Reputation

-

You can earn reputation points through:

-
    -
  • Identity Verification: Verifying your email and identity (+10 points).
  • -
  • Data Contributions: Submitting sequencer metadata that is verified by the community (+5 points).
  • -
  • Community Engagement: Having your posts or comments upvoted by others (+1 point).
  • -
  • Research Participation: Accepting recruitment requests from researchers (+2 points).
  • -
  • Welcome Bonus: New users receive a starter bonus (+5 points).
  • -
- -

Losing Reputation

-

Your reputation score decreases if:

-
    -
  • Spam Reports: Your content is marked as spam by moderators or community consensus (-50 points).
  • -
  • Negative Feedback: Your posts are consistently downvoted (-1 point).
  • -
- -

Reputation Levels

-

Your score determines your access to certain features:

-
    -
  • Newcomer (0-10): Read-only access to public feeds.
  • -
  • Member (10+): Can post to the public feed and upvote content.
  • -
  • Trusted (20+): Can initiate Direct Messages (DMs) with other users.
  • -
  • Community Leader (50+): Can create new groups and has higher visibility.
  • -
- -
- Note: Reputation scores are calculated transparently based on these rules. - Our goal is to create a safe environment where scientific collaboration can thrive. -
diff --git a/app/views/content/en/termsText.scala.html b/app/views/content/en/termsText.scala.html deleted file mode 100644 index 2b934e51..00000000 --- a/app/views/content/en/termsText.scala.html +++ /dev/null @@ -1,78 +0,0 @@ -@()(implicit messages: Messages) - -
-

Acceptance of Terms

-

By accessing and using Decoding-Us.com (the "Website"), you agree to be bound by these Terms of Use.

-
- -
-

Use of Content

-

The content provided on Decoding-Us.com, including the haplogroup trees and related information, is made available under the - Creative Commons Attribution 4.0 International License (CC BY 4.0). This means you are free to:

-
    -
  • Share — copy and redistribute the material in any medium or format
  • -
  • Adapt — remix, transform, and build upon the material for any purpose, even commercially.
  • -
-

Under the following terms:

-
    -
  • Attribution — You must give - appropriate credit, provide a link to the license, and indicate if - changes were made. You may do so in any reasonable manner, but not in - any way that suggests the licensor endorses you or your use.
  • -
-

Specifically, when providing attribution, we - request that you credit "Decoding-Us.com" and, where possible, include a - link back to our Website: decoding-us.com.

-

The haplogroup trees and related information on this Website are generated and enhanced by integrating data from multiple sources, including aggregated, non-personally identifiable data obtained from a private federation of genetic genealogy researchers operating AT Protocol Personal Data Server (PDS) applications, - and publicly available genetic data from repositories such as the - European Nucleotide Archive - and - PGP: Harvard - . This integration allows for broader population coverage and more comprehensive branch identification. - The data from the private federation is made publicly available by the researchers within their - PDS - privacy controls and is crawled by Decoding-Us.com. Publicly available data from repositories is accessed and integrated according to their respective terms of use.

-
- -
-

Features for Research Collaboration

-

Decoding-Us.com provides an Application View that allows researchers within the private federation using AT Protocol - PDS - applications to communicate with each other based on shared genealogical and genetic information. This messaging feature is facilitated to encourage collaboration and potential relative matching. Users of the - PDS control whether they receive and engage with these messages.

-

The Website may also display anonymized ancestral population and Earliest Known Most Distant Ancestor ( - MDKA - ) information obtained from publicly shared data within the federation. This information is intended to provide genealogical context and facilitate connections among researchers. Please note that the - MDKA's date of birth may be masked to protect potential privacy.

-
- -
-

Responsibility for PDS User Interactions

-

The messaging functionality is a feature provided to facilitate communication between users of the AT Protocol - PDS - within the private federation. Decoding-Us.com acts as an interface and does not assume responsibility for the content, conduct, or outcomes of these direct interactions between - PDS - users. Users are solely responsible for their communications and interactions with others within the federated environment.

-
- -
-

Use of Anonymized Genealogical Data

-

Any anonymized ancestral population and MDKA information displayed on Decoding-Us.com is derived from publicly shared data within the private research federation. While this information is intended to be helpful for genealogical research, Decoding-Us.com makes no warranties regarding its accuracy or completeness. Users should exercise their own judgment and conduct independent research to verify any genealogical information obtained through the Website.

-
- -
-

No Warranties

-

THE CONTENT ON THIS WEBSITE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED. TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, DECODING-US.COM DISCLAIMS ALL WARRANTIES, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. WE DO NOT WARRANT THAT THE WEBSITE WILL BE UNINTERRUPTED OR ERROR-FREE, THAT DEFECTS WILL BE CORRECTED, OR THAT THE WEBSITE OR THE SERVERS THAT MAKE IT AVAILABLE ARE FREE OF VIRUSES OR OTHER HARMFUL COMPONENTS.

-
- -
-

Limitation of Liability

-

TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL DECODING-US.COM BE LIABLE FOR ANY INDIRECT, INCIDENTAL, SPECIAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS, DATA, USE, GOODWILL, OR OTHER INTANGIBLE LOSSES) ARISING OUT OF OR RELATING TO YOUR ACCESS TO OR USE OF, OR YOUR INABILITY TO ACCESS OR USE, THE WEBSITE OR ITS CONTENT, WHETHER BASED ON WARRANTY, CONTRACT, TORT (INCLUDING NEGLIGENCE), STATUTE, OR ANY OTHER LEGAL THEORY, EVEN IF DECODING-US.COM HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

-
- -
-

Governing Law

-

These Terms of Use shall be governed by and construed in accordance with the laws of the State of Wisconsin, USA, without regard to its conflict of law provisions.

-
- -

Last Updated: 2025-05-13

diff --git a/app/views/content/howToSubmitTreeData.scala.html b/app/views/content/howToSubmitTreeData.scala.html deleted file mode 100644 index 8562c987..00000000 --- a/app/views/content/howToSubmitTreeData.scala.html +++ /dev/null @@ -1,108 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@(implicit messages: Messages, request: RequestHeader, webJarsUtil: WebJarsUtil) - -@main("Submitting Haplogroup Tree Data to DecodingUs") { -
-

Submitting Haplogroup Tree Data to DecodingUs: A Guide for Researchers

- -

This guide explains how to format your haplogroup tree data for submission to DecodingUs and outlines the rules for how your contributions will be credited. Our system is designed to integrate research from various sources efficiently and fairly.

- -
- -

1. Understanding Your Tree Data

-

When you submit your haplogroup tree, you're essentially providing a hierarchical structure of genetic lineages. Each "node" in this tree represents a haplogroup, and each haplogroup is defined by a set of genetic markers, called "variants."

- -

Key components of your tree data:

-
    -
  • Haplogroup Name: The common name for the haplogroup (e.g., "R1b-L21").
  • -
  • Defining Variants: The specific genetic variants (SNPs) that define this haplogroup (e.g., "L21", "S145"). These are crucial for our system to accurately place your haplogroup in the global tree.
  • -
  • Parent-Child Relationships: The hierarchical structure showing which haplogroups descend from others.
  • -
  • Age Estimates (Optional): You can include estimated ages for when a haplogroup "formed" (formedYbp) or its "Time to Most Recent Common Ancestor" (tmrcaYbp), along with optional lower and upper bounds.
  • -
- -

JSON Structure Example:

-
-
-

-[
-  {
-    "name": "R1b-L21",
-    "definingVariants": ["L21", "S145"],
-    "parent": "R1b",
-    "formedYbp": 4500,
-    "formedYbpLower": 4300,
-    "formedYbpUpper": 4700
-  },
-  {
-    "name": "R1b-DF13",
-    "definingVariants": ["DF13"],
-    "parent": "R1b-L21",
-    "tmrcaYbp": 4000
-  },
-  {
-    "name": "R1b-M222",
-    "definingVariants": ["M222"],
-    "parent": "R1b-DF13"
-  }
-]
-                
-
-
- -
- -

2. How DecodingUs Matches Your Data (Variant-Based Matching)

-

Our system uses a sophisticated matching algorithm that prioritizes genetic variants over haplogroup names. Why? Because different research organizations often use different naming conventions for the exact same haplogroup.

- -
-

Example: "R-L21" (ytree.net), "R1b1a1a2a1a1" (ISOGG), and "R1b-L21" (DecodingUs) might all refer to the same haplogroup defined by the variant "L21."

-
- -

By focusing on the defining variants, our system ensures your data is accurately integrated, regardless of the nomenclature you use.

- -
- -

3. Provenance: How Your Research Gets Credited

-

DecodingUs values attributing research contributions fairly. We use a "multi-source provenance" system to track where every piece of information comes from.

- -

Core Principles of Credit Assignment:

-
    -
  • DecodingUs is the Baseline: Our existing internal tree serves as the foundation. Your contributions merge into this baseline.
  • -
  • ISOGG's Foundational Role: For well-established parts of the tree, ISOGG (International Society of Genetic Genealogy) typically holds the "primary credit" for discovery. If an existing node already has ISOGG credit, it usually retains it.
  • -
  • Credit for New Discoveries: This is where your research shines! When your submitted data introduces new discoveries, your organization (the "incoming source") will receive credit for them: -
      -
    • New Splits: If your data reveals a finer, more detailed structure within an existing branch (i.e., you introduce new intermediate haplogroups that break down a previously single haplogroup into multiple sub-branches), you will receive credit for these new "split" nodes.
    • -
    • New Terminal Branches: If your data adds new "leaf nodes" (the most recent, un-split haplogroups) that weren't present in our tree, you will receive credit for these new terminal branches.
    • -
    -
  • -
  • Curator Role: Trusted experts with the 'Tree Curator' role can directly contribute to tree development using internal tracking mechanisms, review proposed branches from the Haplogroup Discovery System, and resolve conflicts. This role provides significant opportunities for direct contribution to the tree's evolution.
  • -
-

This means ISOGG keeps credit for established structures, while you get credit for advancing the tree with new findings. Our system tracks all contributing sources for each haplogroup and variant association.

- -
- -

4. Resolving Disagreements (Conflict Resolution)

-

Sometimes, different sources might provide conflicting information (e.g., different age estimates for the same haplogroup). To handle this, we use priority rules.

-
    -
  • We can configure a list of trusted sources in order of priority (e.g., ISOGG higher than a specific research project).
  • -
  • When a conflict arises, the information from the higher-priority source will be used.
  • -
  • You can often specify your preferred priority settings when submitting data.
  • -
- -
- -

5. Preview Your Changes (Dry Run)

-

Before fully integrating your data, you can submit it in a "dry run" mode. This allows you to:

-
    -
  • See how your data would be merged into the existing tree.
  • -
  • Identify any potential conflicts.
  • -
  • Review the proposed credit assignments.
  • -
  • Understand the statistics of changes (e.g., how many new nodes would be created).
  • -
-

This ensures you have full confidence in the integration before it becomes permanent.

- -
- -

By understanding these principles, you can effectively prepare and submit your haplogroup tree research to DecodingUs, contributing to a more comprehensive and accurately attributed global phylogenetic tree.

-
-} \ No newline at end of file diff --git a/app/views/cookies.scala.html b/app/views/cookies.scala.html deleted file mode 100644 index ff18a031..00000000 --- a/app/views/cookies.scala.html +++ /dev/null @@ -1,15 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@()(implicit webJarsUtil: WebJarsUtil, messages: Messages, request: RequestHeader) - -@main(messages("nav.cookies")) { -
-

@messages("legal.cookies.title")

- @{ - messages.lang.code match { - case "es" => views.html.content.en.cookiesText() - case "fr" => views.html.content.en.cookiesText() - case _ => views.html.content.en.cookiesText() - } - } -
-} diff --git a/app/views/coverage.scala.html b/app/views/coverage.scala.html deleted file mode 100644 index 5de46a63..00000000 --- a/app/views/coverage.scala.html +++ /dev/null @@ -1,107 +0,0 @@ -@()(implicit request: RequestHeader, webJarsUtil: org.webjars.play.WebJarsUtil, messages: Messages) - -@main(messages("nav.coverage")) { -
-

@messages("nav.coverage")

-

- @messages("coverage.subtitle") -

- -
- - -
- - - -
- -
- -
-

@messages("coverage.metrics.title")

- @{ - messages.lang.code match { - case "es" => views.html.content.en.coverageText() // Replace with actual spanish content if available - case "fr" => views.html.content.en.coverageText() // Replace with actual french content if available - case _ => views.html.content.en.coverageText() - } - } -
-
- - - -} \ No newline at end of file diff --git a/app/views/curator/audit/historyPanel.scala.html b/app/views/curator/audit/historyPanel.scala.html deleted file mode 100644 index 75738fe4..00000000 --- a/app/views/curator/audit/historyPanel.scala.html +++ /dev/null @@ -1,79 +0,0 @@ -@import models.domain.curator.AuditLogEntry -@import play.api.libs.json.Json -@import utils.CuratorViewUtils -@(entityType: String, entityId: Int, history: Seq[AuditLogEntry])(implicit request: RequestHeader) - -
-
-
Audit History
- @entityType #@entityId -
-
- @if(history.isEmpty) { -

No audit history available.

- } else { -
- @for(entry <- history) { -
-
- @entry.action - @entry.createdAt.toLocalDate @entry.createdAt.toLocalTime.toString.take(5) -
- - @entry.comment.map { c => -

@c

- } - - @if(entry.action == "update" && entry.oldValue.isDefined && entry.newValue.isDefined) { -
- View changes -
-
- Before: -
@Json.prettyPrint(entry.oldValue.get)
-
-
- After: -
@Json.prettyPrint(entry.newValue.get)
-
-
-
- } - - @if(entry.action == "create" && entry.newValue.isDefined) { -
- View created data -
@Json.prettyPrint(entry.newValue.get)
-
- } - - @if(entry.action == "delete" && entry.oldValue.isDefined) { -
- View deleted data -
@Json.prettyPrint(entry.oldValue.get)
-
- } -
- } -
- } - -
- @if(entityType == "haplogroup") { - - Back to Details - - } else { - - Back to Details - - } -
-
-
\ No newline at end of file diff --git a/app/views/curator/changesets/ambiguityReport.scala.html b/app/views/curator/changesets/ambiguityReport.scala.html deleted file mode 100644 index 55d5ef88..00000000 --- a/app/views/curator/changesets/ambiguityReport.scala.html +++ /dev/null @@ -1,94 +0,0 @@ -@import models.domain.haplogroups.ChangeSet -@import org.webjars.play.WebJarsUtil -@(changeSet: ChangeSet, reportContent: String)(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Ambiguity Report - " + changeSet.name) { -
- - -
-

- Ambiguity Report: @changeSet.name -

-
- @changeSet.ambiguityReportPath.map { _ => - - Download Markdown - - } - - Back to Change Sets - -
-
- -
-
-
-
-
Change Set Info
-
-
-
-
Source
-
@changeSet.sourceName
- -
Type
-
- - @changeSet.haplogroupType - -
- -
Created
-
@changeSet.createdAt.toLocalDate
- -
Ambiguities
-
@changeSet.statistics.ambiguityCount
-
-
-
- -
-
-
Quick Links
-
- -
-
- -
-
-
-
Report Content
-
-
-
- @if(changeSet.ambiguityReportPath.isDefined) { -
@reportContent
- } else { -
- @reportContent -
- } -
-
-
-
-
-
-} diff --git a/app/views/curator/changesets/changesFragment.scala.html b/app/views/curator/changesets/changesFragment.scala.html deleted file mode 100644 index fe0e510d..00000000 --- a/app/views/curator/changesets/changesFragment.scala.html +++ /dev/null @@ -1,100 +0,0 @@ -@import models.domain.haplogroups.{TreeChangeView, TreeChangeType} -@import play.api.data.Form -@(changeSetId: Int, changes: Seq[TreeChangeView], reviewForm: Form[_])(implicit request: RequestHeader, messages: Messages) - -@changeTypeBadge(t: TreeChangeType) = { - @t match { - case TreeChangeType.Create => { - CREATE - } - case TreeChangeType.Update => { - UPDATE - } - case TreeChangeType.Reparent => { - REPARENT - } - case TreeChangeType.AddVariant => { - +VARIANT - } - case TreeChangeType.RemoveVariant => { - -VARIANT - } - case TreeChangeType.Delete => { - DELETE - } - } -} - -@if(changes.isEmpty) { -
- No pending changes to review. -
-} else { -
- @changes.size pending change(s) to review: -
-
- @for(cv <- changes) { -
-
-
- @changeTypeBadge(cv.change.changeType) - - @cv.change.changeType match { - case TreeChangeType.Create => { - @cv.haplogroupName.getOrElse("New node") - @cv.parentName.map { p => under @p } - } - case TreeChangeType.Reparent => { - @cv.haplogroupName.getOrElse(s"Node #${cv.change.haplogroupId.getOrElse("?")}") - to @cv.parentName.getOrElse("?") - } - case TreeChangeType.AddVariant => { - Add variant to @cv.haplogroupName.getOrElse(s"#${cv.change.haplogroupId.getOrElse("?")}") - } - case TreeChangeType.Update => { - Update @cv.haplogroupName.getOrElse(s"Node #${cv.change.haplogroupId.getOrElse("?")}") - } - case _ => { - @cv.change.changeType on @cv.haplogroupName.getOrElse(s"#${cv.change.haplogroupId.getOrElse("?")}") - } - } - -
-
-
-
- @views.html.helper.CSRF.formField - - -
-
- @views.html.helper.CSRF.formField - - -
-
- @views.html.helper.CSRF.formField - - -
-
-
- } -
-} diff --git a/app/views/curator/changesets/detailPanel.scala.html b/app/views/curator/changesets/detailPanel.scala.html deleted file mode 100644 index 45e281ca..00000000 --- a/app/views/curator/changesets/detailPanel.scala.html +++ /dev/null @@ -1,284 +0,0 @@ -@import models.domain.haplogroups.{ChangeSetDetails, ChangeSetStatus} -@import play.api.data.Form -@(details: ChangeSetDetails, discardForm: Form[_])(implicit request: RequestHeader, messages: Messages) - -@statusBadge(s: ChangeSetStatus) = { - @s match { - case ChangeSetStatus.Draft => { - Draft - } - case ChangeSetStatus.ReadyForReview => { - Ready for Review - } - case ChangeSetStatus.UnderReview => { - Under Review - } - case ChangeSetStatus.Applied => { - Applied - } - case ChangeSetStatus.Discarded => { - Discarded - } - } -} - -
-
-
- @details.changeSet.name -
- @statusBadge(details.changeSet.status) -
-
-
- Source -

@details.changeSet.sourceName

-
- - @details.changeSet.description.map { desc => -
- Description -

@desc

-
- } - -
-
- Type -

- - @details.changeSet.haplogroupType - -

-
-
- Created -

@details.changeSet.createdAt.toLocalDate by @details.changeSet.createdBy

-
-
- -
- -
Statistics
-
-
-
-
@details.changeSet.statistics.nodesCreated
- Created -
-
-
-
-
@details.changeSet.statistics.nodesUpdated
- Updated -
-
-
-
-
@details.changeSet.statistics.relationshipsUpdated
- Reparented -
-
-
- -
-
- Total Changes -

@details.totalChanges

-
-
- By Type -

- @details.changesByType.map { case (t, count) => - @t: @count - } -

-
-
- - @details.changeSet.ambiguityReportPath.map { path => -
-
- @details.changeSet.statistics.ambiguityCount ambiguities detected - - View Report - -
-
- } - -
- - @* View options *@ - - - @* Resolutions section - shown for reviewable change sets *@ - @if(details.changeSet.status == ChangeSetStatus.ReadyForReview || details.changeSet.status == ChangeSetStatus.UnderReview) { -
-
-
Conflict Resolutions
- -
-
-
-
- Loading resolutions... -
-
- - - Create resolutions via API to correct merge decisions. - Learn more - -
-
- } - - @* Action buttons based on status *@ - @details.changeSet.status match { - case ChangeSetStatus.ReadyForReview => { -
-
- @views.html.helper.CSRF.formField - -
- -
- } - case ChangeSetStatus.UnderReview => { -
-
-
-
- Loading pending changes... -
-
- -
- -
- @views.html.helper.CSRF.formField - -
-
- @views.html.helper.CSRF.formField - -
- -
- } - case ChangeSetStatus.Applied => { -
- Applied on @details.changeSet.appliedAt.map(_.toLocalDate).getOrElse("N/A") - @details.changeSet.appliedBy.map { by =>
by @by } -
- } - case ChangeSetStatus.Discarded => { -
- Discarded on @details.changeSet.discardedAt.map(_.toLocalDate).getOrElse("N/A") - @details.changeSet.discardedBy.map { by =>
by @by } - @details.changeSet.discardReason.map { reason =>
Reason: @reason } -
- } - case _ => { -
- Change set is in @details.changeSet.status status -
- } - } - - @* Discard form (collapsed by default) *@ -
-
- @views.html.helper.CSRF.formField -
- - -
- -
-
- - @* Comments section *@ - @if(details.comments.nonEmpty) { -
-
Comments (@details.comments.size)
-
- @for(comment <- details.comments.take(3)) { -
- @comment.author on @comment.createdAt.toLocalDate -

@comment.content

-
- } -
- } -
-
diff --git a/app/views/curator/changesets/diffFragment.scala.html b/app/views/curator/changesets/diffFragment.scala.html deleted file mode 100644 index 1ab01dff..00000000 --- a/app/views/curator/changesets/diffFragment.scala.html +++ /dev/null @@ -1,127 +0,0 @@ -@import models.domain.haplogroups.{TreeDiff, TreeDiffEntry, DiffType} -@(diff: TreeDiff)(implicit request: RequestHeader, messages: Messages) - -@diffTypeBadge(dt: DiffType) = { - @dt match { - case DiffType.Added => { - Added - } - case DiffType.Removed => { - Removed - } - case DiffType.Modified => { - Modified - } - case DiffType.Reparented => { - Reparented - } - } -} - -@diffTypeFilter(dt: DiffType) = @{ - dt match { - case DiffType.Added => "added" - case DiffType.Removed => "removed" - case DiffType.Modified => "modified" - case DiffType.Reparented => "reparented" - } -} - -@if(diff.entries.isEmpty) { -
- No differences found in this change set. -
-} else { -
- - Showing @diff.entries.size entries: - @diff.summary.nodesAdded added, - @diff.summary.nodesModified modified, - @diff.summary.nodesReparented reparented, - @diff.summary.nodesRemoved removed - -
- -
- @for(entry <- diff.entries) { -
-
-
-
- @diffTypeBadge(entry.diffType) - @entry.haplogroupName - @entry.haplogroupId.map { id => - (ID: @id) - } -
-

@entry.changeDescription

- - @* Show parent change for reparented nodes *@ - @if(entry.diffType == DiffType.Reparented) { -
- Parent: - @entry.oldParentName.getOrElse("(root)") @entry.newParentName.getOrElse("(root)") -
- } - - @* Show new parent for added nodes *@ - @if(entry.diffType == DiffType.Added && entry.newParentName.isDefined) { -
- Under: @entry.newParentName.get -
- } - - @* Show variant changes if any *@ - @if(entry.variantsAdded.nonEmpty || entry.variantsRemoved.nonEmpty) { -
- @if(entry.variantsAdded.nonEmpty) { - - + - @entry.variantsAdded.take(5).mkString(", ") - @if(entry.variantsAdded.size > 5) { ... and @{entry.variantsAdded.size - 5} more } - - } - @if(entry.variantsRemoved.nonEmpty) { - - - - @entry.variantsRemoved.take(5).mkString(", ") - @if(entry.variantsRemoved.size > 5) { ... and @{entry.variantsRemoved.size - 5} more } - - } -
- } -
-
- @entry.changeIds.size change(s) -
-
-
- } -
- - @* Summary footer *@ -
-
-
- @diff.summary.nodesAdded - Nodes Added -
-
- @diff.summary.nodesModified - Nodes Modified -
-
- @diff.summary.nodesReparented - Reparented -
-
- @diff.summary.nodesRemoved - Removed -
-
- @diff.summary.variantsAdded - Variants+ -
-
-
-} diff --git a/app/views/curator/changesets/diffView.scala.html b/app/views/curator/changesets/diffView.scala.html deleted file mode 100644 index 619a0c62..00000000 --- a/app/views/curator/changesets/diffView.scala.html +++ /dev/null @@ -1,155 +0,0 @@ -@import models.domain.haplogroups.{ChangeSetDetails, ChangeSetStatus} -@import org.webjars.play.WebJarsUtil -@(details: ChangeSetDetails)(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@statusBadge(s: ChangeSetStatus) = { - @s match { - case ChangeSetStatus.Draft => { - Draft - } - case ChangeSetStatus.ReadyForReview => { - Ready for Review - } - case ChangeSetStatus.UnderReview => { - Under Review - } - case ChangeSetStatus.Applied => { - Applied - } - case ChangeSetStatus.Discarded => { - Discarded - } - } -} - -@main("Tree Diff - " + details.changeSet.name) { -
- - -
-

- Tree Diff: @details.changeSet.name - @statusBadge(details.changeSet.status) -

- - Back to Change Sets - -
- -
-
-
-
-
Change Set Info
-
-
-
-
Source
-
@details.changeSet.sourceName
- -
Type
-
- - @details.changeSet.haplogroupType - -
- -
Created
-
@details.changeSet.createdAt.toLocalDate
- -
Total Changes
-
@details.totalChanges
-
-
-
- -
-
-
Summary
-
-
- - - - - - - - - - - - - - - - - - - -
+ Created@details.changeSet.statistics.nodesCreated
~ Updated@details.changeSet.statistics.nodesUpdated
Reparented@details.changeSet.statistics.relationshipsUpdated
+V Variants@details.changeSet.statistics.variantsAdded
-
-
-
- -
-
-
-
Diff Entries
-
- - - - - -
-
-
-
-
- Loading... -
-

Loading diff...

-
-
-
-
-
-
- - -} diff --git a/app/views/curator/changesets/list.scala.html b/app/views/curator/changesets/list.scala.html deleted file mode 100644 index ba3573ae..00000000 --- a/app/views/curator/changesets/list.scala.html +++ /dev/null @@ -1,99 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@(hgType: Option[String], status: Option[String], pageSize: Int)(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Curator - Change Sets") { - - -
-
-
- @views.html.fragments.breadcrumbs(Seq( - ("Curator", controllers.routes.CuratorController.dashboard), - ("Change Sets", controllers.routes.TreeVersioningCuratorController.listChangeSets(None, None, 20)) - )) -
-
- - @views.html.fragments.flashMessages(request.flash) - -
-
-
-
-
Change Sets
- Tree versioning for bulk merge operations -
-
-
-
- -
-
- -
-
- -
-
-
- Loading... -
-

Loading change sets...

-
-
-
-
-
- -
-
-
-
- -

Select a change set to view details

-
-
-
-
-
-
- - -} diff --git a/app/views/curator/changesets/listFragment.scala.html b/app/views/curator/changesets/listFragment.scala.html deleted file mode 100644 index 66fe11bd..00000000 --- a/app/views/curator/changesets/listFragment.scala.html +++ /dev/null @@ -1,57 +0,0 @@ -@import models.domain.haplogroups.{ChangeSetSummary, ChangeSetStatus} -@(changeSets: Seq[ChangeSetSummary], hgType: Option[String], status: Option[String], currentPage: Int, totalPages: Int, pageSize: Int)(implicit request: RequestHeader, messages: Messages) - -@if(changeSets.isEmpty) { -
- No change sets found matching your criteria. -

Change sets are created automatically when tree merge operations are performed.

-
-} else { -
- - - - - - - - - - - - @for(cs <- changeSets) { - - - - - - - - } - -
NameTypeStatusChangesCreated
- @cs.name -
@cs.sourceName -
- @views.html.fragments.badges.haplogroupType(cs.haplogroupType.toString) - @views.html.fragments.badges.changeSetStatus(cs.status) - @cs.totalChanges - @views.html.fragments.badges.pendingCount(cs.pendingChanges) - - @cs.createdAt.toLocalDate -
by @cs.createdBy -
-
- - @views.html.fragments.pagination( - currentPage, - totalPages, - (p: Int) => controllers.routes.TreeVersioningCuratorController.changeSetsFragment(hgType, status, p, pageSize), - "#changesets-table", - "#type-filter, #status-filter" - ) -} diff --git a/app/views/curator/dashboard.scala.html b/app/views/curator/dashboard.scala.html deleted file mode 100644 index f9575277..00000000 --- a/app/views/curator/dashboard.scala.html +++ /dev/null @@ -1,163 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import models.domain.haplogroups.{ChangeSet, ChangeSetStatus} -@(yHaplogroupCount: Int, mtHaplogroupCount: Int, variantCount: Int, yActiveChangeSet: Option[ChangeSet], mtActiveChangeSet: Option[ChangeSet])(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@needsAttention(cs: ChangeSet) = @{ - cs.status == ChangeSetStatus.ReadyForReview -} - -@main("Curator Dashboard") { -
-

Curator Dashboard

- - @views.html.fragments.flashMessages(request.flash) - - @* Notification banner for change sets needing review *@ - @defining(Seq(yActiveChangeSet, mtActiveChangeSet).flatten.filter(needsAttention)) { pendingReview => - @if(pendingReview.nonEmpty) { - - } - } - -
-
-
-
-
Y-DNA Haplogroups
-

@yHaplogroupCount

- - Manage Y-DNA - -
-
-
-
-
-
-
mtDNA Haplogroups
-

@mtHaplogroupCount

- - Manage mtDNA - -
-
-
-
-
-
-
Variants
-

@variantCount

- - Manage Variants - -
-
-
-
- -
-
-
-
-
Tree Versioning
- @if(yActiveChangeSet.isDefined || mtActiveChangeSet.isDefined) { - WIP Active - } -
-
-

Review and approve changes from tree merge operations before applying to Production.

- - @* Show active change sets if any *@ - @if(yActiveChangeSet.isDefined || mtActiveChangeSet.isDefined) { -
- Active Change Sets: -
    - @yActiveChangeSet.map { cs => -
  • - Y-DNA: @cs.name - @cs.status - - View Diff - -
  • - } - @mtActiveChangeSet.map { cs => -
  • - mtDNA: @cs.name - @cs.status - - View Diff - -
  • - } -
-
- } else { -

- No pending changes. Production trees are up to date. -

- } - - - View All Change Sets - -
-
-
-
- - -
-} diff --git a/app/views/curator/genomeregions/createForm.scala.html b/app/views/curator/genomeregions/createForm.scala.html deleted file mode 100644 index a00ea0dc..00000000 --- a/app/views/curator/genomeregions/createForm.scala.html +++ /dev/null @@ -1,145 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import models.domain.genomics.GenbankContig -@import controllers.GenomeRegionFormData -@(form: Form[GenomeRegionFormData], contigs: Seq[GenbankContig], supportedBuilds: Seq[String])(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Create Genome Region") { -
-
-
- @views.html.fragments.breadcrumbs(Seq( - ("Curator", controllers.routes.CuratorController.dashboard), - ("Genome Regions", controllers.routes.GenomeRegionsCuratorController.listRegions(None, None, 1, 25)), - ("Create", controllers.routes.GenomeRegionsCuratorController.createRegionForm) - )) -
-
- -
-
-
-
-
Create Genome Region
-
-
- @views.html.fragments.flashMessages(request.flash, Some(form)) - - @helper.form(action = controllers.routes.GenomeRegionsCuratorController.createRegion, Symbol("class") -> "needs-validation") { - @helper.CSRF.formField - -
- - - @form.error("genbankContigId").map { error => -
@error.message
- } -
- -
- - - @form.error("regionType").map { error => -
@error.message
- } -
- -
- - -
Optional name for named regions like palindromes
-
- -
-
- - - @form.error("startPos").map { error => -
@error.message
- } -
-
- - - @form.error("endPos").map { error => -
@error.message
- } -
-
- -
- - -
Quality modifier between 0 and 1
-
- -
- - - Cancel - -
- } -
-
-
-
-
-} diff --git a/app/views/curator/genomeregions/detailPanel.scala.html b/app/views/curator/genomeregions/detailPanel.scala.html deleted file mode 100644 index 7ee66afe..00000000 --- a/app/views/curator/genomeregions/detailPanel.scala.html +++ /dev/null @@ -1,62 +0,0 @@ -@import models.api.genomics.GenomeRegionDetailDto -@(region: GenomeRegionDetailDto)(implicit request: RequestHeader) - -
-
-
Region Details
-
- - Edit - - -
-
-
-
-
ID
-
@region.id
- -
Type
-
- @region.regionType -
- - @region.name.map { name => -
Name
-
@name
- } - - @if(region.coordinates.nonEmpty) { -
Coordinates
- @for((build, coord) <- region.coordinates) { -
@build
-
- @coord.contig: - @{java.text.NumberFormat.getIntegerInstance().format(coord.start)} - - @{java.text.NumberFormat.getIntegerInstance().format(coord.end)} - (@{java.text.NumberFormat.getIntegerInstance().format(coord.end - coord.start)} bp) -
- } - } else { -
Coordinates
-
No coordinates defined
- } - - @if((region.properties \ "modifier").asOpt[BigDecimal].isDefined) { -
Modifier
-
@{(region.properties \ "modifier").as[BigDecimal]}
- } - - @if((region.properties \ "stain").asOpt[String].isDefined) { -
Stain
-
@{(region.properties \ "stain").as[String]}
- } -
-
-
\ No newline at end of file diff --git a/app/views/curator/genomeregions/editForm.scala.html b/app/views/curator/genomeregions/editForm.scala.html deleted file mode 100644 index 20ad415f..00000000 --- a/app/views/curator/genomeregions/editForm.scala.html +++ /dev/null @@ -1,143 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import models.domain.genomics.GenbankContig -@import controllers.GenomeRegionFormData -@(id: Int, form: Form[GenomeRegionFormData], contigs: Seq[GenbankContig], supportedBuilds: Seq[String])(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Edit Genome Region") { -
-
-
- @views.html.fragments.breadcrumbs(Seq( - ("Curator", controllers.routes.CuratorController.dashboard), - ("Genome Regions", controllers.routes.GenomeRegionsCuratorController.listRegions(None, None, 1, 25)), - ("Edit", controllers.routes.GenomeRegionsCuratorController.editRegionForm(id)) - )) -
-
- -
-
-
-
-
Edit Genome Region #@id
-
-
- @views.html.fragments.flashMessages(request.flash, Some(form)) - - @helper.form(action = controllers.routes.GenomeRegionsCuratorController.updateRegion(id), Symbol("class") -> "needs-validation") { - @helper.CSRF.formField - -
- - - @form.error("genbankContigId").map { error => -
@error.message
- } -
- -
- - - @form.error("regionType").map { error => -
@error.message
- } -
- -
- - -
- -
-
- - - @form.error("startPos").map { error => -
@error.message
- } -
-
- - - @form.error("endPos").map { error => -
@error.message
- } -
-
- -
- - -
- -
- - - Cancel - -
- } -
-
-
-
-
-} diff --git a/app/views/curator/genomeregions/list.scala.html b/app/views/curator/genomeregions/list.scala.html deleted file mode 100644 index bb50f804..00000000 --- a/app/views/curator/genomeregions/list.scala.html +++ /dev/null @@ -1,85 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import models.api.genomics.GenomeRegionDetailDto -@(regions: Seq[GenomeRegionDetailDto], build: Option[String], currentPage: Int, totalPages: Int, pageSize: Int, totalCount: Int, supportedBuilds: Seq[String])(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Curator - Genome Regions") { - - -
-
-
- @views.html.fragments.breadcrumbs(Seq( - ("Curator", controllers.routes.CuratorController.dashboard), - ("Genome Regions", controllers.routes.GenomeRegionsCuratorController.listRegions(None, None, 1, 25)) - )) -
-
- - @views.html.fragments.flashMessages(request.flash) - -
-
-
-
-
Genome Regions
- - Create - -
-
-
-
-
- - - - - -
-
-
- -
- @listFragment(regions, build, currentPage, totalPages, pageSize, totalCount) -
-
-
-
- -
-
-
-
- -

Select a region to view details

-
-
-
-
-
-
- - -} \ No newline at end of file diff --git a/app/views/curator/genomeregions/listFragment.scala.html b/app/views/curator/genomeregions/listFragment.scala.html deleted file mode 100644 index 873a7a62..00000000 --- a/app/views/curator/genomeregions/listFragment.scala.html +++ /dev/null @@ -1,92 +0,0 @@ -@import models.api.genomics.GenomeRegionDetailDto -@(regions: Seq[GenomeRegionDetailDto], build: Option[String], currentPage: Int, totalPages: Int, pageSize: Int, totalCount: Int)(implicit request: RequestHeader, messages: Messages) - -@defining(java.text.NumberFormat.getIntegerInstance()) { nf => -
- - @if(build.nonEmpty) { - Found @nf.format(totalCount) regions for @build.get - } else { - Showing @nf.format(totalCount) total regions - } - - @if(totalPages > 1) { - Page @currentPage of @nf.format(totalPages) - } -
-} - -@if(regions.isEmpty) { -
- @if(build.nonEmpty) { - No genome regions found for @build.get. - } else { - No genome regions found. - } -
-} else { -
- - - - - - - - - - - - @for(region <- regions) { - @defining(build.flatMap(b => region.coordinates.get(b)).orElse(region.coordinates.headOption.map(_._2))) { displayCoord => - @defining(build.orElse(region.coordinates.headOption.map(_._1))) { displayBuild => - - - - - - - - } - } - } - -
ChromosomeTypeNamePositionBuild
- @displayCoord.map(_.contig).getOrElse("?") - - @region.regionType - - @region.name.getOrElse("-") - - @displayCoord.map { c => - @{java.text.NumberFormat.getIntegerInstance().format(c.start)} - - @{java.text.NumberFormat.getIntegerInstance().format(c.end)} - }.getOrElse("-") - - @displayBuild.map { b => - @b - }.getOrElse("-") -
-
- - @views.html.fragments.pagination( - currentPage, - totalPages, - (p: Int) => controllers.routes.GenomeRegionsCuratorController.regionsFragment(None, build, p, pageSize), - "#regions-table", - "#build-filter" - ) -} - -@buildBadgeClass(refGenome: String) = @{ - refGenome match { - case "GRCh37" => "bg-warning text-dark" - case "GRCh38" => "bg-info" - case "hs1" => "bg-success" - case _ => "bg-secondary" - } -} \ No newline at end of file diff --git a/app/views/curator/haplogroups/createForm.scala.html b/app/views/curator/haplogroups/createForm.scala.html deleted file mode 100644 index 1c4c3e3d..00000000 --- a/app/views/curator/haplogroups/createForm.scala.html +++ /dev/null @@ -1,374 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import controllers.CreateHaplogroupFormData -@import models.domain.haplogroups.Haplogroup -@(form: Form[CreateHaplogroupFormData], yRoots: Seq[Haplogroup], mtRoots: Seq[Haplogroup])(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Create Haplogroup") { -
-
-
- @views.html.fragments.breadcrumbs(Seq( - ("Curator", controllers.routes.CuratorController.dashboard), - ("Haplogroups", controllers.routes.CuratorController.listHaplogroups(None, None, 1, 20)), - ("Create", controllers.routes.CuratorController.createHaplogroupForm) - )) - -
-
-
Create Haplogroup
-
-
- @views.html.fragments.flashMessages(request.flash, Some(form)) - -
- Creation Rules: -
    -
  • New Root: Creates the tree root (only if no root exists)
  • -
  • New Root Above Existing: Creates a new root and makes the current root its child (e.g., Neanderthal above Human)
  • -
  • New Leaf: Creates a terminal node under an existing haplogroup
  • -
  • To create an internal node (intermediate haplogroup), use the Split function from an existing haplogroup
  • -
-
- - @helper.form(controllers.routes.CuratorController.createHaplogroup) { - @helper.CSRF.formField - -
- - - @form.error("haplogroupType").map { error => -
@error.message
- } -
- -
- - -
- - -
- - - -
- - -
- - - -
- - - -
- -
- - - @form.error("name").map { error => -
@error.message
- } -
- -
- - -
- -
- - -
- -
-
- - - @form.error("source").map { error => -
@error.message
- } -
-
- - - @form.error("confidenceLevel").map { error => -
@error.message
- } -
-
- -
- - - Cancel - -
- } -
-
-
-
-
- - -} diff --git a/app/views/curator/haplogroups/detailPanel.scala.html b/app/views/curator/haplogroups/detailPanel.scala.html deleted file mode 100644 index cdb402d2..00000000 --- a/app/views/curator/haplogroups/detailPanel.scala.html +++ /dev/null @@ -1,210 +0,0 @@ -@import models.domain.haplogroups.Haplogroup -@import models.domain.genomics.VariantV2 -@import models.domain.curator.AuditLogEntry -@import utils.CuratorViewUtils -@(haplogroup: Haplogroup, parentOpt: Option[Haplogroup], children: Seq[Haplogroup], variants: Seq[VariantV2], history: Seq[AuditLogEntry])(implicit request: RequestHeader) - -
-
-
@haplogroup.name
- - @haplogroup.haplogroupType - -
-
-
-
Lineage
-
@haplogroup.lineage.getOrElse("-")
- -
Description
-
@haplogroup.description.getOrElse("-")
- -
Source
-
@haplogroup.source
- -
Confidence
-
@haplogroup.confidenceLevel
- - @if(haplogroup.formedYbp.isDefined || haplogroup.tmrcaYbp.isDefined) { -
Branch Ages
-
- @haplogroup.formedEstimate.map { est => -
Formed: @est.formattedWithRange
- } - @haplogroup.tmrcaEstimate.map { est => -
TMRCA: @est.formattedWithRange
- } - @haplogroup.ageEstimateSource.map { src => -
Source: @src
- } -
- } - -
Valid From
-
@haplogroup.validFrom.toLocalDate
- - @haplogroup.validUntil.map { until => -
Valid Until
-
@until.toLocalDate
- } -
- - @haplogroup.provenance.map { prov => -
-
Provenance
-
-
Primary Credit
-
- - @prov.primaryCredit - -
- - @if(prov.nodeProvenance.nonEmpty) { -
Contributors
-
- @for(source <- prov.nodeProvenance.toSeq.sorted) { - @source - } -
- } - - @prov.lastMergedFrom.map { source => -
Last Merged
-
- - from @source - @prov.lastMergedAt.map { ts => - on @ts.toLocalDate - } - -
- } - - @if(prov.variantProvenance.nonEmpty) { -
Variant Sources
-
- -
-
    - @for((variant, sources) <- prov.variantProvenance.toSeq.sortBy(_._1).take(20)) { -
  • - @variant - - @for(src <- sources.toSeq.sorted) { - @src - } - -
  • - } - @if(prov.variantProvenance.size > 20) { -
  • - +@(prov.variantProvenance.size - 20) more variants... -
  • - } -
-
-
- } -
- } - -
- -
Tree Position
-
-
Parent
-
- @parentOpt.map { parent => - - @parent.name - - }.getOrElse { Root } -
- -
Children
-
- @if(children.isEmpty) { - None - } else { - @for(child <- children.take(10)) { - - @child.name - - } - @if(children.size > 10) { - +@(children.size - 10) more - } - } -
-
- -
- -
- @variantsPanel(haplogroup.id.get, variants) -
- -
- -
- - Edit - - - Split - - @parentOpt.map { _ => - - Merge into Parent - - } - - Reparent - - -
- - @if(history.nonEmpty) { -
-
Recent History
-
    - @for(entry <- history.take(5)) { -
  • - @entry.action - @entry.createdAt.toLocalDate - @entry.comment.map { c =>
    @c } -
  • - } -
- @if(history.size > 5) { - - View all history... - - } - } -
-
\ No newline at end of file diff --git a/app/views/curator/haplogroups/editForm.scala.html b/app/views/curator/haplogroups/editForm.scala.html deleted file mode 100644 index 1a5b531b..00000000 --- a/app/views/curator/haplogroups/editForm.scala.html +++ /dev/null @@ -1,191 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import controllers.HaplogroupFormData -@(id: Int, form: Form[HaplogroupFormData])(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Edit Haplogroup") { -
-
-
- @views.html.fragments.breadcrumbs(Seq( - ("Curator", controllers.routes.CuratorController.dashboard), - ("Haplogroups", controllers.routes.CuratorController.listHaplogroups(None, None, 1, 20)), - ("Edit", controllers.routes.CuratorController.editHaplogroupForm(id)) - )) -
-
- -
-
-
-
-
Edit Haplogroup
-
-
- @views.html.fragments.flashMessages(request.flash, Some(form)) - - @helper.form(controllers.routes.CuratorController.updateHaplogroup(id)) { - @helper.CSRF.formField - -
-
- - - @form.error("name").map { error => -
@error.message
- } -
-
- - - -
-
- -
- - -
- -
- - -
- -
-
- - - @form.error("source").map { error => -
@error.message
- } -
-
- - - @form.error("confidenceLevel").map { error => -
@error.message
- } -
-
- -
-
Branch Age Estimates (YBP)
- -
-
-
-
Formed
-
-
- -
-
- -
-
- -
-
-
-
-
-
-
TMRCA
-
-
- -
-
- -
-
- -
-
-
-
-
- -
- - -
- -
- - - Cancel - -
- } -
-
-
-
-
-} diff --git a/app/views/curator/haplogroups/list.scala.html b/app/views/curator/haplogroups/list.scala.html deleted file mode 100644 index 66e878e8..00000000 --- a/app/views/curator/haplogroups/list.scala.html +++ /dev/null @@ -1,94 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@(query: Option[String], hgType: Option[String], pageSize: Int)(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Curator - Haplogroups") { - - -
-
-
- @views.html.fragments.breadcrumbs(Seq( - ("Curator", controllers.routes.CuratorController.dashboard), - ("Haplogroups", controllers.routes.CuratorController.listHaplogroups(None, None, 1, 20)) - )) -
-
- - @views.html.fragments.flashMessages(request.flash) - -
-
-
-
-
Haplogroups
- - Create - -
-
-
-
- @views.html.fragments.searchInput( - id = "haplogroup-search", - name = "query", - value = query, - placeholder = "Search haplogroups...", - hxGet = controllers.routes.CuratorController.haplogroupsFragment(None, hgType, 1, pageSize), - hxTarget = "#haplogroups-table" - ) -
-
- -
-
- -
-
-
- Loading... -
-

Loading haplogroups...

-
-
-
-
-
- -
-
-
-
- -

Select a haplogroup to view details

-
-
-
-
-
-
- - -} diff --git a/app/views/curator/haplogroups/listFragment.scala.html b/app/views/curator/haplogroups/listFragment.scala.html deleted file mode 100644 index 71cc385f..00000000 --- a/app/views/curator/haplogroups/listFragment.scala.html +++ /dev/null @@ -1,61 +0,0 @@ -@import models.domain.haplogroups.Haplogroup -@(haplogroups: Seq[Haplogroup], query: Option[String], hgType: Option[String], currentPage: Int, totalPages: Int, pageSize: Int)(implicit request: RequestHeader, messages: Messages) - -@if(haplogroups.isEmpty) { -
No haplogroups found matching your criteria.
-} else { -
- - - - - - - - - - - - @for(hg <- haplogroups) { - - - - - - - - } - -
NameTypeSourceProvenance
@hg.name - @views.html.fragments.badges.haplogroupType(hg.haplogroupType.toString) - @hg.source - @hg.provenance.map { prov => - @views.html.fragments.badges.provenance( - prov.primaryCredit, - if(prov.nodeProvenance.size > 1) prov.nodeProvenance.size - 1 else 0, - Some(s"Primary: ${prov.primaryCredit}, Contributors: ${prov.nodeProvenance.mkString(", ")}") - ) - }.getOrElse { - - - } - - - - -
-
- - @views.html.fragments.pagination( - currentPage, - totalPages, - (p: Int) => controllers.routes.CuratorController.haplogroupsFragment(query, hgType, p, pageSize), - "#haplogroups-table", - "#haplogroup-search, #type-filter" - ) -} diff --git a/app/views/curator/haplogroups/mergeConfirmForm.scala.html b/app/views/curator/haplogroups/mergeConfirmForm.scala.html deleted file mode 100644 index b8263821..00000000 --- a/app/views/curator/haplogroups/mergeConfirmForm.scala.html +++ /dev/null @@ -1,126 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import services.MergePreview -@(preview: MergePreview)(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main(s"Merge ${preview.child.name}") { -
-
-
- - -
-
-
Confirm Merge into Parent
-
-
-
- This operation will: -
    -
  1. Move all variants from @preview.child.name up to @preview.parent.name
  2. -
  3. Move all children of @preview.child.name to become children of @preview.parent.name
  4. -
  5. Delete @preview.child.name
  6. -
-
- -
-
-
-
- Will be Deleted -
-
-
@preview.child.name
- @preview.child.haplogroupType - @preview.child.lineage.map { l => -
@l
- } -
-
-
-
-
-
- Will Absorb -
-
-
@preview.parent.name
- @preview.parent.haplogroupType - @preview.parent.lineage.map { l => -
@l
- } -
-
-
-
- -
What will be transferred:
- -
- Variants: - @if(preview.allVariants.nonEmpty) { - @preview.uniqueVariants.size of @preview.allVariants.size variant(s) will be added -
    - @for(variant <- preview.allVariants) { -
  • - @if(preview.uniqueVariants.exists(_.variantId == variant.variantId)) { - - } else { - - (already on parent) - } - @variant.displayName - @variant.mutationType -
  • - } -
- } else { - No variants - } -
- -
- Children to promote: - @if(preview.grandchildren.nonEmpty) { - @preview.grandchildren.size -
    - @for(child <- preview.grandchildren) { -
  • - - @child.name - @child.haplogroupType -
  • - } -
- } else { - No children - } -
- - @helper.form(controllers.routes.CuratorController.mergeIntoParent(preview.child.id.get)) { - @helper.CSRF.formField - -
- - - Cancel - -
- } -
-
-
-
-
-} diff --git a/app/views/curator/haplogroups/reparentForm.scala.html b/app/views/curator/haplogroups/reparentForm.scala.html deleted file mode 100644 index 4cef1e28..00000000 --- a/app/views/curator/haplogroups/reparentForm.scala.html +++ /dev/null @@ -1,120 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import controllers.ReparentFormData -@import models.domain.haplogroups.Haplogroup -@(haplogroup: Haplogroup, currentParent: Option[Haplogroup], potentialParents: Seq[Haplogroup], form: Form[ReparentFormData])(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main(s"Reparent ${haplogroup.name}") { -
-
-
- - -
-
-
Reparent @haplogroup.name
-
-
- @form.globalError.map { error => -
@error.message
- } - -
- - Current Structure: - @currentParent match { - case Some(parent) => { - @parent.name@haplogroup.name - } - case None => { - @haplogroup.name (root node - no parent) - } - } -
- - @helper.form(controllers.routes.CuratorController.reparent(haplogroup.id.get)) { - @helper.CSRF.formField - -
- - - @form.error("newParentId").map { error => -
@error.message
- } -
- Select the haplogroup that @haplogroup.name should become a child of. -
-
- -
- - - @form.error("source").map { error => -
@error.message
- } -
- Attribution for this structural change (e.g., "post-merge-cleanup", "curator-correction"). -
-
- -
- - -
- Optional explanation for the audit log. -
-
- -
- - Warning: Reparenting will move @haplogroup.name and all its descendants - to the new location in the tree. This operation is logged and can be reviewed in the audit history. -
- -
- - - Cancel - -
- } -
-
-
-
-
-} diff --git a/app/views/curator/haplogroups/splitBranchForm.scala.html b/app/views/curator/haplogroups/splitBranchForm.scala.html deleted file mode 100644 index 4a65444d..00000000 --- a/app/views/curator/haplogroups/splitBranchForm.scala.html +++ /dev/null @@ -1,184 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import controllers.SplitBranchFormData -@import models.domain.haplogroups.Haplogroup -@import models.domain.genomics.VariantV2 -@import play.api.libs.json.JsObject -@import utils.VariantViewUtils -@(parent: Haplogroup, variants: Seq[VariantV2], children: Seq[Haplogroup], form: Form[SplitBranchFormData])(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main(s"Split ${parent.name}") { -
-
-
- - -
-
-
Create Subclade under @parent.name
-
-
- @form.globalError.map { error => -
@error.message
- } - - @helper.form(controllers.routes.CuratorController.splitBranch(parent.id.get)) { - @helper.CSRF.formField - -
New Subclade Details
- -
- - - @form.error("name").map { error => -
@error.message
- } -
- -
- - -
- -
- - -
- -
-
- - - @form.error("source").map { error => -
@error.message
- } -
-
- - - @form.error("confidenceLevel").map { error => -
@error.message
- } -
-
- -
- - @if(variants.nonEmpty) { -
- Variants to MOVE to new subclade - (will be removed from @parent.name) -
- -
- @for(variant <- variants) { - @defining(VariantViewUtils.refGenomes(variant)) { refs => -
- form(s"variantIds[$i]").value).map(_.toInt).contains(variant.variantId.get)){checked}> - -
- } - } -
- } else { -
- No variants currently associated with @parent.name. -
- } - -
- - @if(children.nonEmpty) { -
- Children to re-parent under new subclade - (will become children of new subclade) -
- -
- @for(child <- children) { -
- form(s"childIds[$i]").value).map(_.toInt).contains(child.id.get)){checked}> - -
- } -
- } else { -
- @parent.name has no children. -
- } - -
- - - Cancel - -
- } -
-
-
-
-
-} \ No newline at end of file diff --git a/app/views/curator/haplogroups/variantHistoryPanel.scala.html b/app/views/curator/haplogroups/variantHistoryPanel.scala.html deleted file mode 100644 index 38364971..00000000 --- a/app/views/curator/haplogroups/variantHistoryPanel.scala.html +++ /dev/null @@ -1,34 +0,0 @@ -@import models.domain.haplogroups.HaplogroupVariantMetadata -@import utils.CuratorViewUtils -@(haplogroupVariantId: Int, history: Seq[HaplogroupVariantMetadata])(implicit request: RequestHeader) - -
-
-
Haplogroup-Variant History
-
-
- @if(history.isEmpty) { -

No revision history available.

- } else { -
    - @for(entry <- history) { -
  • -
    -
    - @entry.change_type - Revision @entry.revision_id -
    - @entry.timestamp.toLocalDate -
    -
    - By: @entry.author -
    -
    - @entry.comment -
    -
  • - } -
- } -
-
\ No newline at end of file diff --git a/app/views/curator/haplogroups/variantSearchResults.scala.html b/app/views/curator/haplogroups/variantSearchResults.scala.html deleted file mode 100644 index 79c59232..00000000 --- a/app/views/curator/haplogroups/variantSearchResults.scala.html +++ /dev/null @@ -1,71 +0,0 @@ -@import models.domain.genomics.VariantV2 -@import play.api.libs.json.JsObject -@import utils.VariantViewUtils -@(haplogroupId: Int, haplogroupName: String, query: Option[String], variants: Seq[VariantV2])(implicit request: RequestHeader) - -
- - -
Enter rsId (rs...) or common name to search.
-
- -@if(query.exists(_.nonEmpty)) { - @if(variants.isEmpty) { -
- No matching variants found that aren't already associated with @haplogroupName. -
- } else { -
- @for(variant <- variants.take(10)) { - @defining(VariantViewUtils.refGenomes(variant)) { refs => -
-
-
- @variant.displayName - @variant.rsIds.headOption.filter(_ != variant.displayName).map { rs => - (@rs) - } - @refs.size build@if(refs.size != 1){s} -
- -
-
- @for(refGenome <- refs) { -
- @refGenome - @VariantViewUtils.formatPosition(variant, refGenome) - @Html(VariantViewUtils.formatAlleles(variant, refGenome)) -
- } -
-
- } - } -
- @if(variants.size > 10) { -
- Showing first 10 of @variants.size results. Refine your search for more specific results. -
- } - } -} else { -
- Start typing to search for variants... -
-} \ No newline at end of file diff --git a/app/views/curator/haplogroups/variantsPanel.scala.html b/app/views/curator/haplogroups/variantsPanel.scala.html deleted file mode 100644 index 3fa3f049..00000000 --- a/app/views/curator/haplogroups/variantsPanel.scala.html +++ /dev/null @@ -1,127 +0,0 @@ -@import models.domain.genomics.VariantV2 -@import play.api.libs.json.JsObject -@import utils.VariantViewUtils -@(haplogroupId: Int, variants: Seq[VariantV2])(implicit request: RequestHeader) - -
-
- Defining Variants - @variants.size -
- -
- -@if(variants.isEmpty) { -

No variants associated with this haplogroup.

-} else { - @defining(s"variants-$haplogroupId") { containerId => -
- -
- -
-
    - @for(variant <- variants) { - @defining(VariantViewUtils.refGenomes(variant)) { refs => -
  • -
    -
    - @variant.displayName - @variant.rsIds.headOption.filter(_ != variant.canonicalName.getOrElse("")).map { rs => - (@rs) - } - @refs.size build@if(refs.size != 1){s} -
    - @variant.variantId.map { vid => - - } -
    -
    - @for(refGenome <- refs) { -
    - @refGenome - @VariantViewUtils.formatPosition(variant, refGenome) - @Html(VariantViewUtils.formatAlleles(variant, refGenome)) -
    - } -
    -
  • - } - } -
-
- - - } - - -} - - - \ No newline at end of file diff --git a/app/views/curator/variants/createForm.scala.html b/app/views/curator/variants/createForm.scala.html deleted file mode 100644 index 624f5fe3..00000000 --- a/app/views/curator/variants/createForm.scala.html +++ /dev/null @@ -1,162 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import controllers.VariantFormData -@(form: Form[VariantFormData])(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Create Variant") { -
-
-
- @views.html.fragments.breadcrumbs(Seq( - ("Curator", controllers.routes.CuratorController.dashboard), - ("Variants", controllers.routes.CuratorController.listVariants(None, 1, 20)), - ("Create", controllers.routes.CuratorController.createVariantForm) - )) - -
-
-
Create Variant
-
-
- @views.html.fragments.flashMessages(request.flash, Some(form)) - - @helper.form(controllers.routes.CuratorController.createVariant) { - @helper.CSRF.formField - -
-
- - - @form.error("refGenome").map { error => -
@error.message
- } -
-
- - - @form.error("contig").map { error => -
@error.message
- } -
-
- -
-
- - - @form.error("position").map { error => -
@error.message
- } -
-
- - -
Reference genome allele (ancestral)
- @form.error("referenceAllele").map { error => -
@error.message
- } -
-
- - -
Mutated allele (derived)
- @form.error("alternateAllele").map { error => -
@error.message
- } -
-
- -
- - - @form.error("variantType").map { error => -
@error.message
- } -
- -
-
Optional Identifiers
- -
-
- - -
Becomes canonical name if provided
-
-
- - -
-
- -
- - - Cancel - -
- } -
-
-
-
-
-} diff --git a/app/views/curator/variants/detailPanel.scala.html b/app/views/curator/variants/detailPanel.scala.html deleted file mode 100644 index 8a9a543f..00000000 --- a/app/views/curator/variants/detailPanel.scala.html +++ /dev/null @@ -1,125 +0,0 @@ -@import models.domain.genomics.{NamingStatus, VariantV2} -@import models.domain.haplogroups.Haplogroup -@import models.domain.curator.AuditLogEntry -@import play.api.libs.json.JsObject -@import utils.{CuratorViewUtils, VariantViewUtils} -@(variant: VariantV2, haplogroups: Seq[Haplogroup], history: Seq[AuditLogEntry])(implicit request: RequestHeader, messages: Messages) - -
-
-
@variant.displayName
- @variant.mutationType.displayName -
-
-
-
Canonical Name
-
@variant.canonicalName.getOrElse("-")
- -
rsIds
-
- @if(variant.rsIds.nonEmpty) { - @for(rsId <- variant.rsIds) { - @rsId - } - } else { - } -
- - @defining(VariantViewUtils.primaryAlleles(variant)) { alleles => -
Ancestral
-
@alleles._1
- -
Derived
-
@alleles._2
- } - -
Type
-
@variant.mutationType.displayName
- -
Naming Status
-
- - @variant.namingStatus.displayName - -
-
- - @if(variant.commonNames.nonEmpty) { -
-
Alternative Names
-
- @for(name <- variant.commonNames) { - @name - } -
- } - -
- -
Reference Builds
- @views.html.fragments.variant.referenceBuilds(variant) - -
- -
Used By Haplogroups
- @if(haplogroups.isEmpty) { -

This variant is not associated with any haplogroups.

- } else { -
- @for(hg <- haplogroups.take(10)) { - - @hg.name - - } - @if(haplogroups.size > 10) { - +@(haplogroups.size - 10) more - } -
- } - - @variant.notes.map { notes => -
-
Notes
-

@notes

- } - -
- -
- - Edit - - -
- - @if(history.nonEmpty) { -
-
Recent History
-
    - @for(entry <- history.take(5)) { -
  • - @entry.action - @entry.createdAt.toLocalDate - @entry.comment.map { c =>
    @c } -
  • - } -
- @if(history.size > 5) { - - View all history... - - } - } -
-
diff --git a/app/views/curator/variants/editForm.scala.html b/app/views/curator/variants/editForm.scala.html deleted file mode 100644 index 4e21d2c8..00000000 --- a/app/views/curator/variants/editForm.scala.html +++ /dev/null @@ -1,129 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import controllers.VariantFormData -@(id: Int, form: Form[VariantFormData], contigDisplay: String)(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Edit Variant") { -
-
-
- @views.html.fragments.breadcrumbs(Seq( - ("Curator", controllers.routes.CuratorController.dashboard), - ("Variants", controllers.routes.CuratorController.listVariants(None, 1, 20)), - ("Edit", controllers.routes.CuratorController.editVariantForm(id)) - )) - -
-
-
Edit Variant
-
-
- @views.html.fragments.flashMessages(request.flash, Some(form)) - - @helper.form(controllers.routes.CuratorController.updateVariant(id)) { - @helper.CSRF.formField - -
- Genomic coordinates are immutable after creation. Only metadata fields can be edited. -
- -
-
- - - - -
-
- - - -
-
- -
-
- - - -
-
- - - -
-
- -
-
Editable Fields
- -
- - - @form.error("variantType").map { error => -
@error.message
- } -
- -
-
- - -
Becomes canonical name if provided
-
-
- - -
-
- -
- - - Cancel - -
- } -
-
-
-
-
-} diff --git a/app/views/curator/variants/list.scala.html b/app/views/curator/variants/list.scala.html deleted file mode 100644 index 17941ff1..00000000 --- a/app/views/curator/variants/list.scala.html +++ /dev/null @@ -1,81 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@(query: Option[String], pageSize: Int)(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main("Curator - Variants") { - - -
-
-
- @views.html.fragments.breadcrumbs(Seq( - ("Curator", controllers.routes.CuratorController.dashboard), - ("Variants", controllers.routes.CuratorController.listVariants(None, 1, 20)) - )) -
-
- - @views.html.fragments.flashMessages(request.flash) - -
-
-
-
-
Variants
- - Create - -
-
-
-
- @views.html.fragments.searchInput( - id = "variant-search", - name = "query", - value = query, - placeholder = "Search by rsId or common name...", - hxGet = controllers.routes.CuratorController.variantsFragment(None, 1, pageSize), - hxTarget = "#variants-table" - ) -
-
- -
-
-
- Loading... -
-

Loading variants...

-
-
-
-
-
- -
-
-
-
- -

Select a variant to view details

-
-
-
-
-
-
- - -} diff --git a/app/views/curator/variants/listFragment.scala.html b/app/views/curator/variants/listFragment.scala.html deleted file mode 100644 index 09dab8d3..00000000 --- a/app/views/curator/variants/listFragment.scala.html +++ /dev/null @@ -1,93 +0,0 @@ -@import models.domain.genomics.VariantV2 -@import utils.VariantViewUtils -@(variants: Seq[VariantV2], query: Option[String], currentPage: Int, totalPages: Int, pageSize: Int, totalCount: Int)(implicit request: RequestHeader, messages: Messages) - -@defining(java.text.NumberFormat.getIntegerInstance()) { nf => -
- - @if(query.exists(_.trim.nonEmpty)) { - Found @nf.format(totalCount) variants matching "@query.get" - } else { - Showing @nf.format(totalCount) total variants - } - - @if(totalPages > 1) { - Page @currentPage of @nf.format(totalPages) - } -
-} - -@if(variants.isEmpty) { -
- @if(query.exists(_.trim.nonEmpty)) { - No variants found matching "@query.get". - } else { - Enter a search term to find variants. - } -
-} else { -
- - - - - - - - - - - - @for(variant <- variants) { - @defining(VariantViewUtils.primaryAlleles(variant)) { alleles => - @defining(VariantViewUtils.refGenomes(variant)) { refs => - - - - - - - - } - } - } - -
Name / rsIdAnc/DerTypeBuilds
- @variant.displayName - @variant.rsIds.headOption.filter(_ != variant.canonicalName.getOrElse("")).map { rsId => -
@rsId - } -
- @alleles._1 - @if(alleles._2 != "?") { - @alleles._2 - } - - @variant.mutationType - - @for(refGenome <- refs) { - - @refGenome - - } - - - - -
-
- - @views.html.fragments.pagination( - currentPage, - totalPages, - p => controllers.routes.CuratorController.variantsFragment(query, p, pageSize), - "#variants-table", - "#variant-search" - ) -} \ No newline at end of file diff --git a/app/views/errors/forbidden.scala.html b/app/views/errors/forbidden.scala.html deleted file mode 100644 index 60547911..00000000 --- a/app/views/errors/forbidden.scala.html +++ /dev/null @@ -1,17 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@(message: String)(implicit request: RequestHeader, messagesProvider: Messages, webJarsUtil: WebJarsUtil) - -@main(messagesProvider("error.forbidden.title")) { -
-
-
-

403

-

@messagesProvider("error.forbidden.heading")

-

@message

- - @messagesProvider("nav.home") - -
-
-
-} diff --git a/app/views/errors/notFound.scala.html b/app/views/errors/notFound.scala.html deleted file mode 100644 index 5c4ac98b..00000000 --- a/app/views/errors/notFound.scala.html +++ /dev/null @@ -1,17 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@(message: String)(implicit request: RequestHeader, messagesProvider: Messages, webJarsUtil: WebJarsUtil) - -@main(messagesProvider("error.notFound.title")) { -
-
-
-

404

-

@messagesProvider("error.notFound.heading")

-

@message

- - @messagesProvider("nav.home") - -
-
-
-} diff --git a/app/views/faq.scala.html b/app/views/faq.scala.html deleted file mode 100644 index 4b25c34c..00000000 --- a/app/views/faq.scala.html +++ /dev/null @@ -1,15 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@()(implicit webJarsUtil: WebJarsUtil, messages: Messages, request: RequestHeader) - -@main(messages("nav.faq")) { -
-

@messages("legal.faq.title")

- @{ - messages.lang.code match { - case "es" => views.html.content.en.faqText() - case "fr" => views.html.content.en.faqText() - case _ => views.html.content.en.faqText() - } - } -
-} diff --git a/app/views/fragments/badges/changeSetStatus.scala.html b/app/views/fragments/badges/changeSetStatus.scala.html deleted file mode 100644 index 573a26ab..00000000 --- a/app/views/fragments/badges/changeSetStatus.scala.html +++ /dev/null @@ -1,19 +0,0 @@ -@import models.domain.haplogroups.ChangeSetStatus -@(status: ChangeSetStatus) -@status match { - case ChangeSetStatus.Draft => { - Draft - } - case ChangeSetStatus.ReadyForReview => { - Ready for Review - } - case ChangeSetStatus.UnderReview => { - Under Review - } - case ChangeSetStatus.Applied => { - Applied - } - case ChangeSetStatus.Discarded => { - Discarded - } -} diff --git a/app/views/fragments/badges/changeStatus.scala.html b/app/views/fragments/badges/changeStatus.scala.html deleted file mode 100644 index 6baf97e5..00000000 --- a/app/views/fragments/badges/changeStatus.scala.html +++ /dev/null @@ -1,16 +0,0 @@ -@import models.domain.haplogroups.ChangeStatus -@(status: ChangeStatus) -@status match { - case ChangeStatus.Pending => { - Pending - } - case ChangeStatus.Applied => { - Applied - } - case ChangeStatus.Reverted => { - Reverted - } - case ChangeStatus.Skipped => { - Skipped - } -} diff --git a/app/views/fragments/badges/haplogroupType.scala.html b/app/views/fragments/badges/haplogroupType.scala.html deleted file mode 100644 index 3fd99db9..00000000 --- a/app/views/fragments/badges/haplogroupType.scala.html +++ /dev/null @@ -1,2 +0,0 @@ -@(htStr: String) -@htStr diff --git a/app/views/fragments/badges/pendingCount.scala.html b/app/views/fragments/badges/pendingCount.scala.html deleted file mode 100644 index ebd28ecb..00000000 --- a/app/views/fragments/badges/pendingCount.scala.html +++ /dev/null @@ -1,4 +0,0 @@ -@(count: Int) -@if(count > 0) { - @count pending -} diff --git a/app/views/fragments/badges/provenance.scala.html b/app/views/fragments/badges/provenance.scala.html deleted file mode 100644 index 88374ca2..00000000 --- a/app/views/fragments/badges/provenance.scala.html +++ /dev/null @@ -1,8 +0,0 @@ -@(credit: String, additionalCount: Int = 0, tooltip: Option[String] = None) - title="@t" }> - @credit - @if(additionalCount > 0) { - +@additionalCount - } - diff --git a/app/views/fragments/badges/treeChangeType.scala.html b/app/views/fragments/badges/treeChangeType.scala.html deleted file mode 100644 index d7ad23aa..00000000 --- a/app/views/fragments/badges/treeChangeType.scala.html +++ /dev/null @@ -1,22 +0,0 @@ -@import models.domain.haplogroups.TreeChangeType -@(changeType: TreeChangeType) -@changeType match { - case TreeChangeType.Create => { - CREATE - } - case TreeChangeType.Update => { - UPDATE - } - case TreeChangeType.Reparent => { - REPARENT - } - case TreeChangeType.AddVariant => { - +VARIANT - } - case TreeChangeType.RemoveVariant => { - -VARIANT - } - case TreeChangeType.Delete => { - DELETE - } -} diff --git a/app/views/fragments/breadcrumbs.scala.html b/app/views/fragments/breadcrumbs.scala.html deleted file mode 100644 index 036fe12e..00000000 --- a/app/views/fragments/breadcrumbs.scala.html +++ /dev/null @@ -1,13 +0,0 @@ -@(crumbs: Seq[(String, play.api.mvc.Call)]) - - diff --git a/app/views/fragments/coverageBenchmarks.scala.html b/app/views/fragments/coverageBenchmarks.scala.html deleted file mode 100644 index 263a82fa..00000000 --- a/app/views/fragments/coverageBenchmarks.scala.html +++ /dev/null @@ -1,70 +0,0 @@ -@import models.domain.genomics.{CoverageBenchmark, SequencingLab} - -@(benchmarks: Seq[CoverageBenchmark], lab: Option[SequencingLab])(implicit messages: Messages) -@* Helper function to calculate 95% confidence interval *@ -@calculateCI(mean: Option[Double], stddev: Option[Double], n: Int) = @{ - (mean, stddev) match { - case (Some(m), Some(sd)) if n > 1 => - val margin = 1.96 * sd / math.sqrt(n) - val lower = m - margin - val upper = m + margin - f"$m%.1f ($lower%.1f-$upper%.1f)" - case (Some(m), _) => - f"$m%.1f" - case _ => - "-" - } -} - -
- - - - - - - - - - - - - - - - - - - @for(benchmark <- benchmarks) { - - - - - - - - - - - - - - - } - -
@messages("benchmark.header.testType")@messages("benchmark.header.contig")@messages("benchmark.header.meanReadLen")@messages("benchmark.header.readLenRange")@messages("benchmark.header.meanInsertSize")@messages("benchmark.header.insertSizeRange")@messages("benchmark.header.meanDepth")@messages("benchmark.header.noCoverage")@messages("benchmark.header.lowQuality")@messages("benchmark.header.callable")@messages("benchmark.header.meanMappingQuality")@messages("benchmark.header.samples")
@benchmark.testType@benchmark.contig@benchmark.meanReadLen.map(v => f"$v%.1f").getOrElse("-") - @benchmark.minReadLen.map(_.toString).getOrElse("-") - - @benchmark.maxReadLen.map(_.toString).getOrElse("-") - @benchmark.meanInsertLen.map(v => f"$v%.1f").getOrElse("-") - @benchmark.minInsertLen.map(_.toString).getOrElse("-") - - @benchmark.maxInsertLen.map(_.toString).getOrElse("-") - @calculateCI(benchmark.meanDepthAvg, benchmark.meanDepthStddev, benchmark.numSamples)@calculateCI(benchmark.basesNoCoverageAvg, benchmark.basesNoCoverageStddev, benchmark.numSamples)@calculateCI(benchmark.basesLowQualMappingAvg, benchmark.basesLowQualMappingStddev, benchmark.numSamples)@calculateCI(benchmark.basesCallableAvg, benchmark.basesCallableStddev, benchmark.numSamples)@benchmark.meanMappingQuality.map(v => f"$v%.1f").getOrElse("-") - @benchmark.numSamples -
-
- -@if(benchmarks.isEmpty) { - -} diff --git a/app/views/fragments/error.scala.html b/app/views/fragments/error.scala.html deleted file mode 100644 index cf938205..00000000 --- a/app/views/fragments/error.scala.html +++ /dev/null @@ -1,6 +0,0 @@ -@(message: String) - - \ No newline at end of file diff --git a/app/views/fragments/errorPanel.scala.html b/app/views/fragments/errorPanel.scala.html deleted file mode 100644 index d7405899..00000000 --- a/app/views/fragments/errorPanel.scala.html +++ /dev/null @@ -1,5 +0,0 @@ -@(message: String) - -
- @message -
diff --git a/app/views/fragments/flashMessages.scala.html b/app/views/fragments/flashMessages.scala.html deleted file mode 100644 index 057ae4ab..00000000 --- a/app/views/fragments/flashMessages.scala.html +++ /dev/null @@ -1,29 +0,0 @@ -@(flash: Flash, form: Option[Form[?]] = None)(implicit messages: Messages) - -@flash.get("success").map { msg => - -} - -@flash.get("error").map { msg => - -} - -@form.map { f => - @if(f.hasGlobalErrors) { - - } -} diff --git a/app/views/fragments/haplogroup.scala.html b/app/views/fragments/haplogroup.scala.html deleted file mode 100644 index 9dfbbe02..00000000 --- a/app/views/fragments/haplogroup.scala.html +++ /dev/null @@ -1,301 +0,0 @@ -@import controllers.routes.TreeController -@import models.HaplogroupType -@import models.HaplogroupType.{MT, Y} -@import models.api.TreeDTO -@import models.view.TreeViewModel - -@(tree: TreeDTO, hapType: HaplogroupType, renderedTreeData: Option[TreeViewModel], currentUrl: String, showBranchAgeEstimates: Boolean = false)(implicit messages: Messages) - -@fullPageUrl(haplogroup: Option[String]) = @{ - hapType match { - case Y => TreeController.ytree(haplogroup) - case MT => TreeController.mtree(haplogroup) - } -} - -@fragmentUrl(haplogroup: Option[String]) = @{ - hapType match { - case Y => TreeController.yTreeFragment(haplogroup) - case MT => TreeController.mTreeFragment(haplogroup) - } -} - - - -
- - @messages("tree.legend.established") - @messages("tree.legend.updated") - - @if(renderedTreeData.isDefined) { - - } -
- -
- @* Loading Overlay *@ -
-
- Loading... -
-
- -
- @renderedTreeData.map { rtd => - - - @for(link <- rtd.allLinks) { - - } - - @for(node <- rtd.allNodes) { - - - @* Node Rectangle *@ - - - - @* Header bar *@ - - - - @* Name row with icon *@ - - @if(node.isBackbone){✓ }@if(node.isRecentlyUpdated){★ }@node.name - - - @* Variants row *@ - - @node.variantsCount.map(c => s"$c variants ▸").getOrElse("—") - - - @if(showBranchAgeEstimates) { - @* Formed row *@ - - Formed: @node.formedFormatted.getOrElse("—") - - @* TMRCA row *@ - - TMRCA: @node.tmrcaFormatted.getOrElse("—") - - } - - } - - }.getOrElse { -
@messages("tree.noData")
- } -
- -
- -
- - - - \ No newline at end of file diff --git a/app/views/fragments/htmlPagination.scala.html b/app/views/fragments/htmlPagination.scala.html deleted file mode 100644 index 9917b616..00000000 --- a/app/views/fragments/htmlPagination.scala.html +++ /dev/null @@ -1,32 +0,0 @@ -@(currentPage: Int, totalPages: Int, linkGenerator: (Int, Int) => play.api.mvc.Call, pageSize: Int)(implicit messages: Messages) - -@if(totalPages > 1) { - -} diff --git a/app/views/fragments/pagination.scala.html b/app/views/fragments/pagination.scala.html deleted file mode 100644 index 4fe7eb1c..00000000 --- a/app/views/fragments/pagination.scala.html +++ /dev/null @@ -1,34 +0,0 @@ -@(currentPage: Int, totalPages: Int, pageCall: Int => play.api.mvc.Call, target: String, include: String)(implicit messages: Messages) - -@if(totalPages > 1) { - -} diff --git a/app/views/fragments/searchInput.scala.html b/app/views/fragments/searchInput.scala.html deleted file mode 100644 index a074653e..00000000 --- a/app/views/fragments/searchInput.scala.html +++ /dev/null @@ -1,39 +0,0 @@ -@( - id: String, - name: String, - value: Option[String], - placeholder: String, - hxGet: play.api.mvc.Call, - hxTarget: String, - hxIndicator: String = "", - hxTrigger: String = "input changed delay:300ms, search" -)(implicit messages: Messages) - -
- - - - @messages("generic.loading") - - - - @if(value.exists(_.nonEmpty)) { - - } -
diff --git a/app/views/fragments/snpDetailSidebar.scala.html b/app/views/fragments/snpDetailSidebar.scala.html deleted file mode 100644 index cc9f9f20..00000000 --- a/app/views/fragments/snpDetailSidebar.scala.html +++ /dev/null @@ -1,284 +0,0 @@ -@import controllers.routes.TreeController -@import models.api.VariantDTO -@import models.domain.haplogroups.HaplogroupProvenance - -@(haplogroupName: String, snps: Seq[VariantDTO], provenance: Option[HaplogroupProvenance] = None)(implicit messages: Messages) - - - - - -@formatAliasType(aliasType: String) = @{ - aliasType match { - case "common_name" => "SNP Names" - case "rs_id" => "dbSNP IDs" - case "isogg" => "ISOGG" - case "yfull" => "YFull" - case "ftdna" => "FTDNA" - case other => other.replace("_", " ").capitalize - } -} - -@formatSourceName(source: String) = @{ - source.toLowerCase match { - case "isogg" | "backbone" => "ISOGG" - case "yfull" => "YFull" - case "ftdna" => "FTDNA" - case "ytree" => "ytree" - case other => other.replace("_", " ").split(" ").map(_.capitalize).mkString(" ") - } -} diff --git a/app/views/fragments/successPanel.scala.html b/app/views/fragments/successPanel.scala.html deleted file mode 100644 index b4e29217..00000000 --- a/app/views/fragments/successPanel.scala.html +++ /dev/null @@ -1,5 +0,0 @@ -@(message: String) - -
- @message -
diff --git a/app/views/fragments/treeView.scala.html b/app/views/fragments/treeView.scala.html deleted file mode 100644 index de1930b9..00000000 --- a/app/views/fragments/treeView.scala.html +++ /dev/null @@ -1,43 +0,0 @@ -@(titleKey: String, - fragmentCall: Option[String] => play.api.mvc.Call, - fullPageCall: Option[String] => play.api.mvc.Call, - rootHaplogroup: Option[String], - searchPlaceholder: String, - searchInputTitle: String, - showVerticalTree: Boolean = false)(implicit webJarsUtil: org.webjars.play.WebJarsUtil, messages: Messages, request: RequestHeader) - -@main(messages(titleKey)) { -
-

@messages(titleKey)

- -
-
- - - -
- -
- - -
-
- - - -
-
@messages("tree.loading")
-
-
-} diff --git a/app/views/fragments/variant/referenceBuilds.scala.html b/app/views/fragments/variant/referenceBuilds.scala.html deleted file mode 100644 index bc7143ff..00000000 --- a/app/views/fragments/variant/referenceBuilds.scala.html +++ /dev/null @@ -1,62 +0,0 @@ -@import models.domain.genomics.VariantV2 -@import utils.VariantViewUtils -@(variant: VariantV2)(implicit messages: Messages) - -@defining(VariantViewUtils.refGenomes(variant)) { refs => - @if(refs.size > 1) { -
- - - - - - @if(variant.isStr) { - - } else { - - } - - - - @for(refGenome <- refs) { - @defining(VariantViewUtils.formatAllelesTuple(variant, refGenome)) { alleles => - - - - - - } - } - -
@messages("variants.detail.build")@messages("variants.detail.position")@messages("variants.detail.motifRepeats")@messages("variants.detail.alleles")
- - @VariantViewUtils.shortRefGenome(refGenome) - - @VariantViewUtils.formatPosition(variant, refGenome) - @alleles._1 - @if(alleles._2 != "?") { - - @alleles._2 - } -
-
- } else if(refs.nonEmpty) { - @defining(refs.head) { refGenome => - @defining(VariantViewUtils.formatAllelesTuple(variant, refGenome)) { alleles => -
- @VariantViewUtils.shortRefGenome(refGenome) - @VariantViewUtils.formatPosition(variant, refGenome) - - @alleles._1 - @if(alleles._2 != "?") { - - @alleles._2 - } - -
- } - } - } else { -

@messages("variants.detail.noCoordinates")

- } -} diff --git a/app/views/fragments/verticalTree.scala.html b/app/views/fragments/verticalTree.scala.html deleted file mode 100644 index 0a04b0a2..00000000 --- a/app/views/fragments/verticalTree.scala.html +++ /dev/null @@ -1,269 +0,0 @@ -@import models.api.{TreeDTO} -@import models.HaplogroupType -@import models.HaplogroupType.{MT, Y} -@import models.view.TreeViewModel -@import controllers.routes.TreeController - -@(tree: TreeDTO, hapType: HaplogroupType, renderedTreeData: Option[TreeViewModel], currentUrl: String)(implicit messages: Messages) - -@fullPageUrl(haplogroup: Option[String]) = @{ - hapType match { - case Y => TreeController.ytree(haplogroup) - case MT => TreeController.mtree(haplogroup) - } -} - -@fragmentUrl(haplogroup: Option[String]) = @{ - hapType match { - case Y => TreeController.yTreeFragment(haplogroup) - case MT => TreeController.mTreeFragment(haplogroup) - } -} - - - -
- - @messages("tree.legend.established") - @messages("tree.legend.updated") - -
- -
- @* Loading Overlay *@ -
-
- Loading... -
-
- -
- @renderedTreeData.map { rtd => - - @for(link <- rtd.allLinks) { - - } - - @for(node <- rtd.allNodes) { - - - @* Node Box *@ - @* Note: In Vertical layout, node.x is horizontal, node.y is vertical *@ - - - - @* Header bar *@ - - - - @* Node Name *@ - - @if(node.isBackbone){✓ }@if(node.isRecentlyUpdated){★ }@node.name - - - @* Variant Count Badge *@ - - - - @node.variantsCount.getOrElse(0) variants - - - @* View Variants Link *@ - @if(node.variantsCount.getOrElse(0) > 0) { - - View Details ▸ - - } - - } - - }.getOrElse { -
@messages("tree.noData")
- } -
- -
-
- - - - \ No newline at end of file diff --git a/app/views/index.scala.html b/app/views/index.scala.html deleted file mode 100644 index a7c55429..00000000 --- a/app/views/index.scala.html +++ /dev/null @@ -1,17 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@()(implicit webJarsUtil: WebJarsUtil, messages: Messages, request: RequestHeader) - -@main(messages("home.title")) { - @messages( -
-

@messages("home.welcome")

- @{ - messages.lang.code match { - case "es" => views.html.content.en.homeText() - case "fr" => views.html.content.en.homeText() - case _ => views.html.content.en.homeText() - } - } -
-} diff --git a/app/views/main.scala.html b/app/views/main.scala.html deleted file mode 100644 index e625f037..00000000 --- a/app/views/main.scala.html +++ /dev/null @@ -1,30 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@(title: String)(content: Html)(implicit webJarsUtil: WebJarsUtil, messages: Messages, request: RequestHeader) - - - - - - - - @* Here's where we render the page title `String`. *@ - @title - @webJarsUtil.locate("bootstrap", "css/bootstrap.min.css").css() - - @webJarsUtil.locate("popper.js", "umd/popper.min.js").script() - @webJarsUtil.locate("bootstrap", "js/bootstrap.min.js").script() - @webJarsUtil.locate("htmx.org", "dist/htmx.min.js").script() - - - - @_navbar() - @* And here's where we render the `Html` object containing - * the page content. *@ - @content - - @_footer() - - @* Cookie consent banner for GDPR compliance *@ - @partials.cookieConsentBanner() - - diff --git a/app/views/mtree.scala.html b/app/views/mtree.scala.html deleted file mode 100644 index 96ebc1c6..00000000 --- a/app/views/mtree.scala.html +++ /dev/null @@ -1,13 +0,0 @@ -@(rootHaplogroup: Option[String], showVerticalTree: Boolean = false)(implicit webJarsUtil: org.webjars.play.WebJarsUtil, messages: Messages, request: RequestHeader) - -@import controllers.routes.TreeController - -@fragments.treeView( - titleKey = "nav.mtree", - fragmentCall = TreeController.mTreeFragment, - fullPageCall = TreeController.mtree, - rootHaplogroup = rootHaplogroup, - searchPlaceholder = "e.g. H, U5b...", - searchInputTitle = "Enter a haplogroup name (e.g. H, U5)", - showVerticalTree = showVerticalTree -) \ No newline at end of file diff --git a/app/views/partials/cookieConsentBanner.scala.html b/app/views/partials/cookieConsentBanner.scala.html deleted file mode 100644 index 1d256bb2..00000000 --- a/app/views/partials/cookieConsentBanner.scala.html +++ /dev/null @@ -1,141 +0,0 @@ -@()(implicit messages: Messages) - - - - - - diff --git a/app/views/partials/languageSwitcher.scala.html b/app/views/partials/languageSwitcher.scala.html deleted file mode 100644 index ae886ae9..00000000 --- a/app/views/partials/languageSwitcher.scala.html +++ /dev/null @@ -1,12 +0,0 @@ -@()(implicit messages: Messages, request: RequestHeader) - - diff --git a/app/views/partials/messageBadge.scala.html b/app/views/partials/messageBadge.scala.html deleted file mode 100644 index 58ea9b9d..00000000 --- a/app/views/partials/messageBadge.scala.html +++ /dev/null @@ -1,4 +0,0 @@ -@(count: Int) -@if(count > 0) { -@if(count > 99) { 99+ } else { @count } -} diff --git a/app/views/privacyPolicy.scala.html b/app/views/privacyPolicy.scala.html deleted file mode 100644 index 428e0062..00000000 --- a/app/views/privacyPolicy.scala.html +++ /dev/null @@ -1,15 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@()(implicit webJarsUtil: WebJarsUtil, messages: Messages, request: RequestHeader) - -@main(messages("nav.privacy")) { -
-

@messages("legal.privacy.title")

- @{ - messages.lang.code match { - case "es" => views.html.content.en.privacyPolicyText() - case "fr" => views.html.content.en.privacyPolicyText() - case _ => views.html.content.en.privacyPolicyText() - } - } -
-} diff --git a/app/views/publicationCandidates/list.scala.html b/app/views/publicationCandidates/list.scala.html deleted file mode 100644 index 0acd3bdf..00000000 --- a/app/views/publicationCandidates/list.scala.html +++ /dev/null @@ -1,190 +0,0 @@ -@import models.domain.publications.PublicationCandidate -@import java.time.format.DateTimeFormatter -@import play.api.mvc.Request -@import play.twirl.api.Html -@import org.webjars.play.WebJarsUtil - -@(candidates: Seq[PublicationCandidate], currentPage: Int, pageSize: Int, totalCandidates: Int, currentStatus: String, statusCounts: Map[String, Int])(implicit request: Request[AnyContent], messages: Messages, webJarsUtil: WebJarsUtil) - -@main(messages("publicationCandidates.title")) { -
-

@messages("publicationCandidates.heading")

- - @views.html.fragments.flashMessages(request.flash) - - @* Status filter tabs *@ - - - @if(candidates.isEmpty) { - - } else { -
- @helper.CSRF.formField - - - - - @* Bulk action bar *@ - @if(currentStatus == "pending" || currentStatus == "deferred") { -
- - 0 selected -
- - - -
-
- } - -
- - - - @if(currentStatus == "pending" || currentStatus == "deferred") { - - } - - - - - - - - - @for(candidate <- candidates) { - - @if(currentStatus == "pending" || currentStatus == "deferred") { - - } - - - - - - - } - -
@messages("publicationCandidates.table.title")@messages("publicationCandidates.table.journal")@messages("publicationCandidates.table.date")@messages("publicationCandidates.table.relevance")@messages("publicationCandidates.table.actions")
- @candidate.title - @candidate.doi.map { doi => -
DOI: @doi - } -
@candidate.journalName.getOrElse("-")@candidate.publicationDate.map(_.format(DateTimeFormatter.ISO_LOCAL_DATE)).getOrElse("-") - @candidate.relevanceScore.map { s => - @f"$s%.2f" - }.getOrElse("-") - - @if(currentStatus == "pending" || currentStatus == "deferred") { -
- - @helper.CSRF.formField - - - @if(currentStatus == "pending") { -
- @helper.CSRF.formField - -
- } -
- @helper.CSRF.formField - - -
-
- } else { - - @candidate.reviewedAt.map(_.format(DateTimeFormatter.ofPattern("yyyy-MM-dd"))).getOrElse("") - @candidate.rejectionReason.map { r =>
@r } -
- } -
-
- - - @* Pagination *@ - @views.html.fragments.htmlPagination( - currentPage = currentPage, - totalPages = (totalCandidates + pageSize - 1) / pageSize, - pageSize = pageSize, - linkGenerator = (p, ps) => controllers.routes.PublicationCandidateController.listCandidates(p, ps, currentStatus) - ) - } -
- - -} diff --git a/app/views/publicationDetails.scala.html b/app/views/publicationDetails.scala.html deleted file mode 100644 index 658c1068..00000000 --- a/app/views/publicationDetails.scala.html +++ /dev/null @@ -1,308 +0,0 @@ -@import models.domain.publications.StudySource - -@(publicationWithDetails: models.api.PublicationWithEnaStudiesAndSampleCount)(implicit messages: Messages) - -
-
-
-

@publicationWithDetails.publication.title

- - @publicationWithDetails.publication.authors.map { authors => -

- @messages("publication.authors") - @authors - @if(authors.contains(" et al.")) { - - @messages("publication.authors.etAl") - - } -

- }.getOrElse(

- @messages("publication.authors") - @messages("generic.notAvailable") -

) - - @if(publicationWithDetails.publication.journal.isDefined || publicationWithDetails.publication.publicationDate.isDefined) { -

- @publicationWithDetails.publication.journal.map { journal => - @messages("publication.journal") @journal - } - @publicationWithDetails.publication.publicationDate.map { date => - - @messages("publication.published") - } -

- } -
- - @publicationWithDetails.publication.abstractSummary.map { summary => -
-

@messages("publication.abstract")

-

@summary

-
- } - - @if(publicationWithDetails.publication.openAccessStatus.isDefined || - publicationWithDetails.publication.citedByCount.isDefined || - publicationWithDetails.publication.citationNormalizedPercentile.isDefined || - publicationWithDetails.publication.primaryTopic.isDefined || - publicationWithDetails.publication.publicationType.isDefined || - publicationWithDetails.publication.publisher.isDefined) { -
-

@messages("publication.additionalDetails")

-
- @if(publicationWithDetails.publication.primaryTopic.isDefined || publicationWithDetails.publication.publicationType.isDefined) { -
@messages("publication.classification")
-
- @publicationWithDetails.publication.primaryTopic.map { topic => - @topic - } - @publicationWithDetails.publication.publicationType.map { pubType => - @pubType.capitalize - } -
- } - - @if(publicationWithDetails.publication.publisher.isDefined || publicationWithDetails.publication.openAccessStatus.isDefined) { -
@messages("publication.access")
-
- @publicationWithDetails.publication.publisher.map { publisher => - @publisher - } - @publicationWithDetails.publication.openAccessStatus.map { status => - @status.toLowerCase match { - case "diamond" => { - - Diamond - - } - case "gold" => { - - Gold - - } - case "green" => { - - Green - - } - case "hybrid" => { - - Hybrid - - } - case "bronze" => { - - Bronze - - } - case "closed" => { - - Closed - - } - case _ => { - @status.capitalize - } - } - } -
- } - - @if(publicationWithDetails.publication.citedByCount.isDefined || publicationWithDetails.publication.citationNormalizedPercentile.isDefined) { -
@messages("publication.impact")
-
- @publicationWithDetails.publication.citedByCount.map { count => - - - @messages("publication.cited", count) - - } - @publicationWithDetails.publication.citationNormalizedPercentile.map { percentile => - - - @messages("publication.percentile", "%,.1f".format(percentile * 100)) - - } -
- } -
-
- } - - @if(publicationWithDetails.bioStudies.nonEmpty) { -
-

@messages("publication.genomicStudies")

- @defining(publicationWithDetails.bioStudies.groupBy(_.source)) { studiesBySource => - @for((source, studies) <- studiesBySource) { -

@{ - source match { - case StudySource.ENA => messages("publication.source.ena") - case StudySource.NCBI_BIOPROJECT => messages("publication.source.ncbi") - case _ => source.toString - } - }

-
    - @for(study <- studies) { -
  • - @{ - source match { - case StudySource.ENA => { - - {study.accession} - - } - case StudySource.NCBI_BIOPROJECT => { - - {study.accession} - - } - case _ => { - {study.accession} - } - } - } - - @study.title - @if(study.centerName != "N/A") { - - @study.centerName - } -
  • - } -
- } - } -
- } - - - @if(publicationWithDetails.sampleCount > 0) { -
- - } - - - - -
diff --git a/app/views/publicationList.scala.html b/app/views/publicationList.scala.html deleted file mode 100644 index 1056528b..00000000 --- a/app/views/publicationList.scala.html +++ /dev/null @@ -1,57 +0,0 @@ -@(paginatedPublications: models.api.PaginatedResult[models.api.PublicationWithEnaStudiesAndSampleCount], searchQuery: Option[String])(implicit messages: Messages) - -@if(paginatedPublications.items.isEmpty) { - -} else { -
- @for(details <- paginatedPublications.items) { - @publicationDetails(details) -
- } -
- -
-
- - -
-
- @messages("publication.list.total", paginatedPublications.totalItems) - @views.html.fragments.htmlPagination( - currentPage = paginatedPublications.currentPage, - totalPages = paginatedPublications.totalPages, - pageSize = paginatedPublications.pageSize, - linkGenerator = (p, ps) => controllers.routes.PublicationController.getAllPublicationsWithDetailsHtml(Some(p), Some(ps), searchQuery) - ) -
-
-} - - \ No newline at end of file diff --git a/app/views/publications/submitPaper.scala.html b/app/views/publications/submitPaper.scala.html deleted file mode 100644 index 4fbc9e4b..00000000 --- a/app/views/publications/submitPaper.scala.html +++ /dev/null @@ -1,46 +0,0 @@ -@import helper.* -@import org.webjars.play.WebJarsUtil - -@(paperForm: Form[models.forms.PaperSubmission])(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main(messages("publication.submit.title")) { -
-

@messages("publication.submit.title")

- - @views.html.fragments.flashMessages(request.flash) - - @form(routes.PublicationController.submitPaper()) { - @CSRF.formField - -
- @inputText( - paperForm("doi"), - Symbol("class") -> "form-control", - Symbol("placeholder") -> messages("publication.submit.doi.placeholder"), - Symbol("_label") -> messages("publication.submit.doi.label"), - Symbol("_help") -> messages("publication.submit.doi.help") - ) -
- -
- @inputText( - paperForm("enaAccession"), - Symbol("class") -> "form-control", - Symbol("placeholder") -> messages("publication.submit.ena.placeholder"), - Symbol("_label") -> messages("publication.submit.ena.label"), - Symbol("_help") -> messages("publication.submit.ena.help") - ) -
- -
- - - -
- - - } -
-} diff --git a/app/views/references.scala.html b/app/views/references.scala.html deleted file mode 100644 index 5bbb5d43..00000000 --- a/app/views/references.scala.html +++ /dev/null @@ -1,71 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@()(implicit webJarsUtil: WebJarsUtil, messages: Messages, request: RequestHeader) - -@main(messages("nav.references")) { - - -
- - -
-
- @views.html.fragments.searchInput( - id = "publication-search", - name = "query", - value = None, - placeholder = messages("references.search.placeholder"), - hxGet = controllers.routes.PublicationController.getAllPublicationsWithDetailsHtml(Some(1), None, None), - hxTarget = "#publications-container", - hxIndicator = "#search-box", - hxTrigger = "input changed delay:300ms, search" - ) - - @messages("references.search.help") -
-
- -
-
-
- @messages("generic.loading") -
-
-
-
-} \ No newline at end of file diff --git a/app/views/reputation.scala.html b/app/views/reputation.scala.html deleted file mode 100644 index d9643b88..00000000 --- a/app/views/reputation.scala.html +++ /dev/null @@ -1,20 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@()(implicit webJarsUtil: WebJarsUtil, messages: Messages, request: RequestHeader) - -@main(messages("reputation.title")) { -
-

@messages("reputation.title")

- -
-
- @{ - messages.lang.code match { - case "es" => views.html.content.en.reputationText() - case "fr" => views.html.content.en.reputationText() - case _ => views.html.content.en.reputationText() - } - } -
-
-
-} diff --git a/app/views/support/admin/messageDetail.scala.html b/app/views/support/admin/messageDetail.scala.html deleted file mode 100644 index b9d6dc6d..00000000 --- a/app/views/support/admin/messageDetail.scala.html +++ /dev/null @@ -1,164 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import models.domain.support.{ContactMessage, MessageReply, MessageStatus} -@import models.domain.user.User -@import controllers.ReplyFormData -@import utils.CuratorViewUtils -@(message: ContactMessage, replies: Seq[MessageReply], sender: Option[User], replyForm: Form[ReplyFormData])(implicit request: RequestHeader, messagesProvider: Messages, webJarsUtil: WebJarsUtil) - -@main(messagesProvider("support.admin.messageDetail.title")) { -
- - - @views.html.fragments.flashMessages(request.flash) - -
-
-
-
-
@message.subject
- - @messagesProvider(s"support.status.${message.status.value}") - -
-
-
- - @messagesProvider("support.admin.receivedAt"): @message.createdAt - -
-
-
@message.message
-
-
-
- - @if(replies.nonEmpty) { -
@messagesProvider("support.admin.replies")
- @for(reply <- replies) { -
-
-
- - @messagesProvider("support.admin.repliedBy"): Admin - - @reply.createdAt -
-
@reply.replyText
- @if(reply.emailSent) { -
- - @messagesProvider("support.admin.emailSent") @reply.emailSentAt.map(_.toLocalDate).getOrElse("") - -
- } -
-
- } - } - -
-
-
@messagesProvider("support.admin.sendReply")
-
-
- @helper.form(action = controllers.routes.SupportAdminController.submitReply(message.id.get)) { - @helper.CSRF.formField - -
- - @replyForm("replyText").errors.map { error => -
@messagesProvider(error.message)
- } -
- - @if(message.senderEmail.isDefined) { -
- - -
- } else { - -
- @messagesProvider("support.admin.noEmailForAuth") -
- } - - - } -
-
-
- -
-
-
-
@messagesProvider("support.admin.senderInfo")
-
-
-
- @sender match { - case Some(user) => { -
@messagesProvider("support.admin.type")
-
@messagesProvider("support.admin.authenticated")
-
@messagesProvider("support.admin.handle")
-
@user.handle.getOrElse("N/A")
-
@messagesProvider("support.admin.userId")
-
@user.id.get
- } - case None => { -
@messagesProvider("support.admin.type")
-
@messagesProvider("support.admin.anonymous")
-
@messagesProvider("support.admin.name")
-
@message.senderName.getOrElse("N/A")
-
@messagesProvider("support.admin.email")
-
@message.senderEmail.getOrElse("N/A")
- } - } -
-
-
- -
-
-
@messagesProvider("support.admin.actions")
-
-
-
- @if(message.status != MessageStatus.Closed) { - - @messagesProvider("support.admin.markClosed") - - } - @if(message.status == MessageStatus.Closed) { - - @messagesProvider("support.admin.reopen") - - } -
-
-
-
-
-
-} - -@statusBadgeClass(status: MessageStatus) = @{ - status match { - case MessageStatus.New => "bg-primary" - case MessageStatus.Read => "bg-info" - case MessageStatus.Replied => "bg-success" - case MessageStatus.Closed => "bg-secondary" - } -} diff --git a/app/views/support/admin/messageList.scala.html b/app/views/support/admin/messageList.scala.html deleted file mode 100644 index 00b9cd48..00000000 --- a/app/views/support/admin/messageList.scala.html +++ /dev/null @@ -1,81 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import models.domain.support.{ContactMessage, MessageStatus} -@import utils.CuratorViewUtils -@(messages: Seq[ContactMessage], statusFilter: Option[MessageStatus], currentPage: Int, totalPages: Int, pageSize: Int)(implicit request: RequestHeader, messagesProvider: Messages, webJarsUtil: WebJarsUtil) - -@main(messagesProvider("support.admin.title")) { -
-

@messagesProvider("support.admin.heading")

- - @views.html.fragments.flashMessages(request.flash) - -
-
-
-
- -
-
- -
-
-
-
- - @if(messages.isEmpty) { -
@messagesProvider("support.admin.noMessages")
- } else { -
- - - - - - - - - - - - @for(msg <- messages) { - - - - - - - - } - -
@messagesProvider("support.admin.table.status")@messagesProvider("support.admin.table.from")@messagesProvider("support.admin.table.subject")@messagesProvider("support.admin.table.date")
- - @messagesProvider(s"support.status.${msg.status.value}") - - - @if(msg.userId.isDefined) { - @messagesProvider("support.admin.authenticated") - } - @msg.senderName.orElse(msg.userId.map(_ => messagesProvider("support.admin.registeredUser"))).getOrElse("Unknown") - @msg.subject@msg.createdAt.toLocalDate - - @messagesProvider("support.admin.view") - -
-
- - @views.html.fragments.htmlPagination( - currentPage = currentPage, - totalPages = totalPages, - pageSize = pageSize, - linkGenerator = (p, ps) => controllers.routes.SupportAdminController.listMessages(statusFilter.map(_.value), p, ps) - ) - } -
-} diff --git a/app/views/support/myMessages.scala.html b/app/views/support/myMessages.scala.html deleted file mode 100644 index 81627586..00000000 --- a/app/views/support/myMessages.scala.html +++ /dev/null @@ -1,56 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import models.domain.support.{ContactMessage, MessageStatus} -@import utils.CuratorViewUtils -@(messages: Seq[ContactMessage])(implicit request: RequestHeader, messagesProvider: Messages, webJarsUtil: WebJarsUtil) - -@main(messagesProvider("support.myMessages.title")) { -
-
-
-
-

@messagesProvider("support.myMessages.heading")

- - @messagesProvider("support.newMessage") - -
- - @views.html.fragments.flashMessages(request.flash) - - @if(messages.isEmpty) { -
-
- @messagesProvider("support.myMessages.empty") -
- } else { -
- @for(msg <- messages) { -
-
-
@msg.subject
- @msg.createdAt.toLocalDate -
-

@msg.message

-
- - - @messagesProvider(s"support.status.${msg.status.value}") - - -
-
- } -
- } -
-
-
-} - -@statusBadgeClass(status: MessageStatus) = @{ - status match { - case MessageStatus.New => "bg-primary" - case MessageStatus.Read => "bg-info" - case MessageStatus.Replied => "bg-success" - case MessageStatus.Closed => "bg-secondary" - } -} diff --git a/app/views/terms.scala.html b/app/views/terms.scala.html deleted file mode 100644 index 0b261cab..00000000 --- a/app/views/terms.scala.html +++ /dev/null @@ -1,15 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@()(implicit webJarsUtil: WebJarsUtil, messages: Messages, request: RequestHeader) - -@main(messages("nav.terms")) { -
-

@messages("legal.terms.title")

- @{ - messages.lang.code match { - case "es" => views.html.content.en.termsText() - case "fr" => views.html.content.en.termsText() - case _ => views.html.content.en.termsText() - } - } -
-} diff --git a/app/views/user/profile.scala.html b/app/views/user/profile.scala.html deleted file mode 100644 index c3c2571e..00000000 --- a/app/views/user/profile.scala.html +++ /dev/null @@ -1,54 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@import models.domain.user.User -@(profileForm: Form[controllers.ProfileFormData], user: User)(implicit request: Request[AnyContent], messages: Messages, webJarsUtil: WebJarsUtil) - -@main(messages("profile.title")) { -
-
-
-
-
-

@messages("profile.heading")

-
-
- @views.html.fragments.flashMessages(request.flash) - -
-
@messages("profile.accountDetails")
-
-
@messages("profile.handle")
-
@@@user.handle.getOrElse("-")
- -
@messages("profile.did")
-
@user.did
-
-
- -
- -
@messages("profile.editProfile")
- @helper.form(action = routes.ProfileController.update) { - @helper.CSRF.formField - -
- - -
@messages("profile.displayNameHelp")
- @profileForm("displayName").errors.map { error => -
@messages(error.message)
- } -
- -
- - @messages("generic.cancel") -
- } -
-
-
-
-
-} diff --git a/app/views/variants/browser.scala.html b/app/views/variants/browser.scala.html deleted file mode 100644 index b0dd9d45..00000000 --- a/app/views/variants/browser.scala.html +++ /dev/null @@ -1,68 +0,0 @@ -@import org.webjars.play.WebJarsUtil -@(query: Option[String], page: Int, pageSize: Int)(implicit request: RequestHeader, messages: Messages, webJarsUtil: WebJarsUtil) - -@main(messages("variants.browser.title")) { - - -
-
-
-

@messages("variants.browser.heading")

-

@messages("variants.browser.description")

-
-
- -
-
-
-
-
@messages("variants.browser.variants")
-
-
-
-
- @views.html.fragments.searchInput( - id = "variant-search", - name = "query", - value = query, - placeholder = messages("variants.browser.searchPlaceholder"), - hxGet = controllers.routes.VariantBrowserController.listFragment(None, 1, pageSize), - hxTarget = "#variants-table" - ) - @messages("variants.browser.searchHelp") -
-
- -
-
-
- Loading... -
-

Loading variants...

-
-
-
-
-
- -
-
-
-
- -

@messages("variants.browser.selectVariant")

-
-
-
-
-
-
-} diff --git a/app/views/variants/detailPanel.scala.html b/app/views/variants/detailPanel.scala.html deleted file mode 100644 index 915847ba..00000000 --- a/app/views/variants/detailPanel.scala.html +++ /dev/null @@ -1,98 +0,0 @@ -@import models.domain.genomics.{NamingStatus, VariantV2} -@import models.domain.haplogroups.Haplogroup -@import play.api.libs.json.JsObject -@import utils.VariantViewUtils -@(variant: VariantV2, haplogroups: Seq[Haplogroup])(implicit request: RequestHeader, messages: Messages) - -
-
-
@variant.displayName
- @variant.mutationType.displayName -
-
-
-
@messages("variants.detail.rsId")
-
- @variant.rsIds.headOption.map { rs => - @rs - }.getOrElse("-") -
- -
@messages("variants.detail.commonName")
-
@variant.canonicalName.getOrElse("-")
- - @defining(VariantViewUtils.primaryAlleles(variant)) { alleles => -
@messages("variants.detail.ancestral")
-
@alleles._1
- -
@messages("variants.detail.derived")
-
@alleles._2
- } - -
@messages("variants.detail.type")
-
@variant.mutationType.displayName
- -
@messages("variants.detail.status")
-
- - @variant.namingStatus.displayName - -
-
- - @if(variant.commonNames.nonEmpty || variant.rsIds.size > 1) { -
-
@messages("variants.detail.altNames")
-
- @if(variant.commonNames.nonEmpty) { -
- @messages("variants.detail.aliasType.snpNames"): - @for(name <- variant.commonNames) { - @name - } -
- } - @if(variant.rsIds.nonEmpty) { -
- @messages("variants.detail.aliasType.dbsnp"): - @for(rsId <- variant.rsIds) { - @rsId - } -
- } -
- } - -
- -
@messages("variants.detail.refBuilds")
- @views.html.fragments.variant.referenceBuilds(variant) - -
- -
@messages("variants.detail.usedBy")
- @if(haplogroups.isEmpty) { -

@messages("variants.detail.noHaplogroups")

- } else { -
- @for(hg <- haplogroups.take(10)) { - @defining(if(hg.haplogroupType.toString == "Y") controllers.routes.TreeController.ytree(Some(hg.name)).url else controllers.routes.TreeController.mtree(Some(hg.name)).url) { treeUrl => - - @hg.name - - } - } - @if(haplogroups.size > 10) { - +@(haplogroups.size - 10) @messages("variants.detail.more") - } -
- } - - @variant.notes.map { notes => -
-
@messages("variants.detail.notes")
-

@notes

- } -
-
\ No newline at end of file diff --git a/app/views/variants/listFragment.scala.html b/app/views/variants/listFragment.scala.html deleted file mode 100644 index ae8ff090..00000000 --- a/app/views/variants/listFragment.scala.html +++ /dev/null @@ -1,85 +0,0 @@ -@import models.domain.genomics.VariantV2 -@import utils.VariantViewUtils -@(variants: Seq[VariantV2], query: Option[String], currentPage: Int, totalPages: Int, pageSize: Int, totalCount: Int)(implicit request: RequestHeader, messages: Messages) - -@defining(java.text.NumberFormat.getIntegerInstance()) { nf => -
- - @if(query.exists(_.trim.nonEmpty)) { - @messages("variants.browser.foundMatching", nf.format(totalCount), query.get) - } else { - @messages("variants.browser.showingTotal", nf.format(totalCount)) - } - - @if(totalPages > 1) { - @messages("variants.browser.pageOf", currentPage, nf.format(totalPages)) - } -
-} - -@if(variants.isEmpty) { -
- @if(query.exists(_.trim.nonEmpty)) { - @messages("variants.browser.noResults", query.get) - } else { - @messages("variants.browser.enterSearch") - } -
-} else { -
- - - - - - - - - - - @for(variant <- variants) { - @defining(VariantViewUtils.primaryAlleles(variant)) { alleles => - @defining(VariantViewUtils.refGenomes(variant)) { refs => - - - - - - - } - } - } - -
@messages("variants.browser.col.name")@messages("variants.browser.col.alleles")@messages("variants.browser.col.type")@messages("variants.browser.col.builds")
- @variant.displayName - @variant.rsIds.headOption.filter(_ != variant.canonicalName.getOrElse("")).map { rsId => -
@rsId - } -
- @alleles._1 - @if(alleles._2 != "?") { - @alleles._2 - } - - @variant.mutationType.displayName - - @for(refGenome <- refs) { - - @VariantViewUtils.shortRefGenome(refGenome) - - } -
-
- - @views.html.fragments.pagination( - currentPage, - totalPages, - p => controllers.routes.VariantBrowserController.listFragment(query, p, pageSize), - "#variants-table", - "#variant-search" - ) -} \ No newline at end of file diff --git a/app/views/ytree.scala.html b/app/views/ytree.scala.html deleted file mode 100644 index 4dde8472..00000000 --- a/app/views/ytree.scala.html +++ /dev/null @@ -1,13 +0,0 @@ -@(rootHaplogroup: Option[String], showVerticalTree: Boolean = false)(implicit webJarsUtil: org.webjars.play.WebJarsUtil, messages: Messages, request: RequestHeader) - -@import controllers.routes.TreeController - -@fragments.treeView( - titleKey = "nav.ytree", - fragmentCall = TreeController.yTreeFragment, - fullPageCall = TreeController.ytree, - rootHaplogroup = rootHaplogroup, - searchPlaceholder = "e.g. R-M269, I-M253...", - searchInputTitle = "Enter a haplogroup name (e.g. R-M269)", - showVerticalTree = showVerticalTree -) \ No newline at end of file diff --git a/build.sbt b/build.sbt deleted file mode 100644 index 649eb376..00000000 --- a/build.sbt +++ /dev/null @@ -1,67 +0,0 @@ -name := """decodingus""" - -version := "1.0-SNAPSHOT" - -lazy val root = (project in file(".")).enablePlugins(PlayScala) - -scalaVersion := "3.3.6" - -val SLICK_VERSION = "6.2.0" -val SLICK_PG_VERSION = "0.23.1" -val TAPIR_VERSION = "1.11.50" -val AWS_VERSION = "2.40.3" - -// WARNING: Updating beyond 1.1.2 will result in startup errors, since quartz schedular needs this version -val APACHE_PEKKO_VERSION = "1.1.5" - -scalacOptions ++= Seq("-Xmax-inlines", "128") - -libraryDependencies ++= Seq( - guice, - caffeine, - "org.scala-lang.modules" %% "scala-xml" % "2.4.0", - "org.playframework" %% "play-slick" % SLICK_VERSION, - "org.playframework" %% "play-slick-evolutions" % SLICK_VERSION, - "org.postgresql" % "postgresql" % "42.7.8", - "com.github.tminglei" %% "slick-pg" % SLICK_PG_VERSION, - "com.github.tminglei" %% "slick-pg_jts" % SLICK_PG_VERSION, - "com.github.tminglei" %% "slick-pg_play-json" % SLICK_PG_VERSION, - "org.webjars" %% "webjars-play" % "3.0.9", - "org.webjars" % "bootstrap" % "5.3.8", - "org.webjars" % "popper.js" % "2.11.7", - "org.webjars.npm" % "htmx.org" % "2.0.8", - "org.scalatestplus.play" %% "scalatestplus-play" % "7.0.2" % Test, - "com.h2database" % "h2" % "2.4.240" % Test, - "org.codehaus.janino" % "janino" % "3.1.12", - "com.nappin" %% "play-recaptcha" % "3.0", - - // Core Tapir libraries - "com.softwaremill.sttp.tapir" %% "tapir-core" % TAPIR_VERSION, - "com.softwaremill.sttp.tapir" %% "tapir-json-play" % TAPIR_VERSION, - - // Play server interpreter - "com.softwaremill.sttp.tapir" %% "tapir-play-server" % TAPIR_VERSION, - - // OpenAPI / Swagger UI generation - "com.softwaremill.sttp.tapir" %% "tapir-swagger-ui-bundle" % TAPIR_VERSION, - - "io.github.samueleresca" %% "pekko-quartz-scheduler" % "1.3.0-pekko-1.1.x", - - "org.apache.pekko" %% "pekko-protobuf-v3" % APACHE_PEKKO_VERSION, - "org.apache.pekko" %% "pekko-serialization-jackson" % APACHE_PEKKO_VERSION, - "org.apache.pekko" %% "pekko-stream" % APACHE_PEKKO_VERSION, - "org.apache.pekko" %% "pekko-actor-typed" % APACHE_PEKKO_VERSION, - "org.apache.pekko" %% "pekko-slf4j" % APACHE_PEKKO_VERSION, - - "software.amazon.awssdk" % "secretsmanager" % AWS_VERSION, - "software.amazon.awssdk" % "ses" % AWS_VERSION, - "org.hashids" % "hashids" % "1.0.3", - "org.mindrot" % "jbcrypt" % "0.4", // BCrypt for password hashing - "com.github.samtools" % "htsjdk" % "4.3.0", - "org.scalatestplus" %% "mockito-5-10" % "3.2.18.0" % Test -) - -// Code Coverage Configuration -coverageMinimumStmtTotal := 5 -coverageFailOnMinimum := true -coverageHighlighting := true diff --git a/conf/application.conf b/conf/application.conf deleted file mode 100644 index 16f05ea6..00000000 --- a/conf/application.conf +++ /dev/null @@ -1,261 +0,0 @@ -# https://www.playframework.com/documentation/latest/Configuration - -play.http.secret.key="changeme" -play.http.secret.key=${?APPLICATION_SECRET} - -play.i18n { - langs = [ "en", "fr", "es" ] -} - -# Session cookie security -play.http.session.httpOnly = true -play.http.session.secure = true -play.http.session.secure = ${?SESSION_SECURE} -play.http.session.sameSite = "lax" - -# Increase max request body size for tree merge API (default is 100KB) -play.http.parser.maxMemoryBuffer = 10MB - -# Disable the startup banner -play.application.showBanner=false - -play.modules.enabled += "modules.BaseModule" -play.modules.enabled += "modules.ServicesModule" - -play.modules.enabled += "modules.RecaptchaModule" -play.modules.enabled += "modules.StartupModule" -play.modules.enabled += "modules.ApplicationModule" -play.modules.enabled += "modules.ApiSecurityModule" -play.modules.enabled += "modules.PDSRegistrationModule" - -play.cache.caffeine { - # Default cache configuration - default = { - initial-capacity = 100 - maximum-size = 10000 - expire-after-write = 24h - } - # Specific cache configurations if needed - sitemap = ${play.cache.caffeine.default} -} - -# Tell Play to use Caffeine instead of the default EhCache -play.cache.createBoundCaches = false -play.modules.enabled += "play.api.cache.caffeine.CaffeineCacheModule" - -recaptcha { - enable = false - enable = ${?ENABLE_RECAPTCHA} - - # Your existing recaptcha configuration here - privateKey = "your-secret-key" - privateKey = ${?RECAPTCHA_SECRET_KEY} - publicKey = "your-site-key" - publicKey = ${?RECAPTCHA_SITE_KEY} -} - -# Apply evolutions automatically, disable in the production version -play.evolutions.autocommit=true - -slick.dbs.default { - profile="slick.jdbc.PostgresProfile$" - db { - numThreads = 32 - queueSize = 5000 - - driver="org.postgresql.Driver" - url="jdbc:postgresql://localhost:5432/decodingus_db" - # Local Development Credentials - username="decoding_us_user" - # Overridden in production on startup - password="user_password" - } -} - -slick.dbs.metadata { - profile="slick.jdbc.PostgresProfile$" - db { - numThreads = 32 - queueSize = 5000 - - driver="org.postgresql.Driver" - url="jdbc:postgresql://localhost:5432/decodingus_metadata" # Temporarily pointing to the same DB - username="decodingus_user" - password="decodingus_password" - } -} - -contact { - recipient.email = "your-email@domain.com" - recipient.email = ${?CONTACT_RECIPIENT_EMAIL} # Can be overridden by environment variable -} - -# AT Protocol / PDS configuration -atproto { - client.timeout = 5000 - client.timeout = ${?ATPROTO_CLIENT_TIMEOUT} - plc.directory = "https://plc.directory" - plc.directory = ${?ATPROTO_PLC_DIRECTORY} -} - -# PDS Edge node authentication -pds.auth { - timestamp.window.seconds = 300 - timestamp.window.seconds = ${?PDS_AUTH_TIMESTAMP_WINDOW} -} - -pekko { - loglevel = "DEBUG" - stdout-loglevel = "DEBUG" - jvm-exit-on-fatal-error = off - - quartz.schedules { - # Publication Updater: Run every two weeks - PublicationUpdater { - # CRON expression for every two weeks at midnight UTC - # This will trigger at 00:00:00 UTC on the 1st and 15th of every month. - expression = "0 0 0 1,15 * ?" - timezone = "UTC" - description = "Refresh publication data from OpenAlex every two weeks" - } - - PublicationDiscovery { - # Run weekly on Sunday at 2 AM UTC - expression = "0 0 2 ? * SUN" - timezone = "UTC" - description = "Discover new publications via OpenAlex" - } - - YBrowseVariantUpdate { - # Run weekly on Monday at 3 AM UTC - expression = "0 0 3 ? * MON" - timezone = "UTC" - description = "Download and ingest Y-DNA SNP data from YBrowse" - } - - VariantExport { - # Run daily at 4 AM UTC (after YBrowse update on Mondays) - expression = "0 0 4 * * ?" - timezone = "UTC" - description = "Generate full variant export file for Edge App" - } - - MatchDiscovery { - # Run daily at 3 AM UTC - expression = "0 0 3 * * ?" - timezone = "UTC" - description = "Compute population overlap scores and generate match suggestions" - } - } -} - -decodingus.matching { - discovery { - shared-match-threshold = 2 - population-overlap-threshold = 0.6 - suggestion-expiry-days = 90 - max-suggestions-per-user = 100 - } - - requests { - default-expiry-days = 30 - max-pending-requests = 50 - consent-expiry-days = 365 - } - - relay { - session-timeout-minutes = 10 - max-concurrent-sessions = 100 - stale-cleanup-interval-seconds = 60 - } -} - -openalex.mailToEmail="jkane@tds.net" - -aws { - region = "us-east-1" - secrets { - apiKey { - name = "your-secret-name" - } - userEncryptionKey { - name = "your-user-encryption-key-secret-name" - } - } -} - -biosample.hash.salt = "your-biosample-salt" -biosample.hash.salt = ${?BIOSAMPLE_HASH_SALT} - -# Feature flags - enable/disable features in development -features { - tree { - # Show branch age estimates (Formed/TMRCA) on tree nodes - # Disabled until age data is populated - showBranchAgeEstimates = false - showBranchAgeEstimates = ${?FEATURE_SHOW_BRANCH_AGE_ESTIMATES} - } -} - -genomics { - references { - # Canonical names for supported linear reference builds (must match database reference_genome values) - # Using short names without patch versions: GRCh37, GRCh38, hs1 - # hs1 is UCSC's naming convention for T2T-CHM13v2.0 - supported = ["GRCh37", "GRCh38", "hs1"] - - # Aliases to map common/legacy names to canonical names - aliases { - "hg19" = "GRCh37" - "hg38" = "GRCh38" - # Legacy patch versions (for backwards compatibility with old data) - "GRCh37.p13" = "GRCh37" - "GRCh38.p14" = "GRCh38" - # T2T-CHM13 aliases (UCSC uses hs1) - "chm13" = "hs1" - "chm13v2.0" = "hs1" - "T2T-CHM13" = "hs1" - "T2T-CHM13v2.0" = "hs1" - } - - # Paths to reference genome FASTA files - fasta_paths { - "GRCh37" = "/home/jkane/Genomics/Reference/b37/human_g1k_v37.fasta.gz" - "GRCh38" = "/home/jkane/Genomics/Reference/b38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna" - "hs1" = "/home/jkane/Genomics/Reference/chm13v2.0/chm13v2.0.fa.gz" - } - } - - liftover { - # Chain files for coordinate conversion - # Format: "Source->Target" = "path/to/chain/file" - chains { - "GRCh38->GRCh37" = "/mnt/md0/genomics/chains/hg38ToHg19.over.chain.gz" - "GRCh38->hs1" = "/mnt/md0/genomics/chains/hg38ToHs1.over.chain.gz" - "hs1->GRCh38" = "/mnt/md0/genomics/chains/hs1ToHg38.over.chain.gz" - "hs1->GRCh37" = "/mnt/md0/genomics/chains/hs1ToHg19.over.chain.gz" - } - } - - ybrowse { - # URL to download the YBrowse Y-DNA SNP GFF3 file - gff_url = "https://ybrowse.org/gbrowse2/gff/snps_hg38.gff3" - gff_url = ${?YBROWSE_GFF_URL} - - # Local storage path for downloaded GFF file - gff_storage_path = "/mnt/md0/decodingus/ybrowse/snps_hg38.gff3" - gff_storage_path = ${?YBROWSE_GFF_STORAGE_PATH} - } - - hipstr { - # URL to download the HipSTR STR reference BED file (GRCh38) - url = "https://github.com/HipSTR-Tool/HipSTR-references/raw/master/human/GRCh38.hipstr_reference.bed.gz" - url = ${?HIPSTR_URL} - - # Local storage path for downloaded BED file - storage_path = "/mnt/md0/decodingus/hipstr/GRCh38.hipstr_reference.bed.gz" - storage_path = ${?HIPSTR_STORAGE_PATH} - } -} - - diff --git a/conf/application.test.conf b/conf/application.test.conf deleted file mode 100644 index 792ab6f5..00000000 --- a/conf/application.test.conf +++ /dev/null @@ -1,35 +0,0 @@ -include "application.conf" - -# Override database to use H2 in-memory for tests -slick.dbs.default { - profile = "slick.jdbc.H2Profile$" - db { - driver = "org.h2.Driver" - url = "jdbc:h2:mem:test;MODE=PostgreSQL;DATABASE_TO_UPPER=FALSE;DB_CLOSE_DELAY=-1" - username = "sa" - password = "" - numThreads = 2 - queueSize = 100 - } -} - -slick.dbs.metadata { - profile = "slick.jdbc.H2Profile$" - db { - driver = "org.h2.Driver" - url = "jdbc:h2:mem:test_metadata;MODE=PostgreSQL;DATABASE_TO_UPPER=FALSE;DB_CLOSE_DELAY=-1" - username = "sa" - password = "" - numThreads = 2 - queueSize = 100 - } -} - -# Disable evolutions for tests -play.evolutions.enabled = false - -# Disable startup services that hit the database -play.modules.disabled += "modules.StartupModule" - -# Disable scheduled jobs -pekko.quartz.schedules = {} diff --git a/conf/evolutions/default/1.sql b/conf/evolutions/default/1.sql deleted file mode 100644 index af893fbf..00000000 --- a/conf/evolutions/default/1.sql +++ /dev/null @@ -1,305 +0,0 @@ -# --- !Ups -CREATE EXTENSION IF NOT EXISTS postgis; - -CREATE TABLE specimen_donor -( - id SERIAL PRIMARY KEY, - donor_identifier VARCHAR(255) NOT NULL, - origin_biobank VARCHAR(255) NOT NULL -); - -CREATE TABLE biosample -( - id SERIAL PRIMARY KEY, - sample_accession VARCHAR(255) UNIQUE NOT NULL, - description TEXT NOT NULL, - alias VARCHAR(255), - center_name VARCHAR(255) NOT NULL, - sex VARCHAR(15) CHECK (sex IN ('male', 'female', 'intersex')), - geocoord GEOMETRY(Point, 4326), - specimen_donor_id INT REFERENCES specimen_donor (id) ON DELETE CASCADE, - sample_guid UUID NOT NULL -); - -CREATE TABLE citizen_biosample -( - id SERIAL PRIMARY KEY, - citizen_biosample_did VARCHAR(255) UNIQUE, - source_platform VARCHAR(255), - collection_date DATE, - sex VARCHAR(15) CHECK (sex IN ('male', 'female', 'intersex')), - geocoord GEOMETRY(Point, 4326), - description TEXT, - sample_guid UUID NOT NULL -); - -CREATE TABLE pgp_biosample -( - pgp_biosample_id SERIAL PRIMARY KEY, - pgp_participant_id VARCHAR(255) NOT NULL, - ena_biosample_accession VARCHAR(255) UNIQUE, - sex VARCHAR(15) CHECK (sex IN ('male', 'female', 'intersex')), - sample_guid UUID NOT NULL -); - -CREATE TABLE haplogroup -( - haplogroup_id SERIAL PRIMARY KEY, - name VARCHAR(255) NOT NULL, - lineage VARCHAR(255), - description TEXT, - haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')), - revision_id INTEGER NOT NULL, - source VARCHAR(255) NOT NULL, - confidence_level VARCHAR(255) NOT NULL, - valid_from TIMESTAMP NOT NULL, - valid_until TIMESTAMP, - unique (name) -); - -CREATE TABLE haplogroup_relationship -( - haplogroup_relationship_id SERIAL PRIMARY KEY, - child_haplogroup_id INTEGER NOT NULL, - parent_haplogroup_id INTEGER NOT NULL, - revision_id INTEGER NOT NULL, - valid_from TIMESTAMP NOT NULL, - valid_until TIMESTAMP, - source VARCHAR(255) NOT NULL, - FOREIGN KEY (child_haplogroup_id) REFERENCES haplogroup (haplogroup_id) ON DELETE CASCADE, - FOREIGN KEY (parent_haplogroup_id) REFERENCES haplogroup (haplogroup_id) ON DELETE CASCADE, - UNIQUE (child_haplogroup_id, revision_id) -); - -CREATE TABLE genbank_contig -( - genbank_contig_id SERIAL PRIMARY KEY, - accession VARCHAR(255) NOT NULL, - common_name VARCHAR(255), - reference_genome VARCHAR(255), - seq_length INT NOT NULL, - UNIQUE (accession) -); - -CREATE TABLE variant -( - variant_id SERIAL PRIMARY KEY, - genbank_contig_id INT NOT NULL, - position INTEGER NOT NULL, - reference_allele VARCHAR(255) NOT NULL, - alternate_allele VARCHAR(255) NOT NULL, - variant_type VARCHAR(5) NOT NULL CHECK (variant_type IN ('SNP', 'INDEL')), - rs_id VARCHAR(255), - common_name VARCHAR(255), - FOREIGN KEY (genbank_contig_id) REFERENCES genbank_contig (genbank_contig_id) ON DELETE CASCADE, - UNIQUE (genbank_contig_id, position, reference_allele, alternate_allele) -); - -CREATE TABLE haplogroup_variant -( - haplogroup_variant_id SERIAL PRIMARY KEY, - haplogroup_id INT NOT NULL, - variant_id INT NOT NULL, - FOREIGN KEY (haplogroup_id) REFERENCES haplogroup (haplogroup_id) ON DELETE CASCADE, - FOREIGN KEY (variant_id) REFERENCES variant (variant_id) ON DELETE CASCADE, - UNIQUE (haplogroup_id, variant_id) -); - -CREATE TABLE biosample_haplogroup -( - sample_guid UUID NOT NULL, - y_haplogroup_id INT, - mt_haplogroup_id INT, - FOREIGN KEY (y_haplogroup_id) REFERENCES haplogroup (haplogroup_id) ON DELETE CASCADE, - FOREIGN KEY (mt_haplogroup_id) REFERENCES haplogroup (haplogroup_id) ON DELETE CASCADE, - PRIMARY KEY (sample_guid) -); - -CREATE TABLE analysis_method -( - analysis_method_id SERIAL PRIMARY KEY, - method_name VARCHAR(255) NOT NULL UNIQUE -); - -CREATE TABLE population -( - population_id SERIAL PRIMARY KEY, - population_name VARCHAR(255) NOT NULL UNIQUE - -- parent_population_id BIGINT REFERENCES population(population_id) -- Uncomment if needed -); - -CREATE TABLE ancestry_analysis -( - ancestry_analysis_id SERIAL PRIMARY KEY, - sample_guid UUID NOT NULL, - analysis_method_id INT NOT NULL, - population_id INT NOT NULL, - probability DECIMAL(5, 4), - FOREIGN KEY (analysis_method_id) REFERENCES analysis_method (analysis_method_id) ON DELETE CASCADE, - FOREIGN KEY (population_id) REFERENCES population (population_id) ON DELETE CASCADE, - UNIQUE (sample_guid, analysis_method_id, population_id) -); - -CREATE TABLE sequence_library -( - id SERIAL PRIMARY KEY, - sample_guid UUID NOT NULL, - lab VARCHAR(255) NOT NULL, - test_type VARCHAR(255) NOT NULL, - run_date TIMESTAMP NOT NULL, - instrument VARCHAR(255) NOT NULL, - reads BIGINT NOT NULL, - read_length INTEGER NOT NULL, - paired_end BOOLEAN NOT NULL, - insert_size INTEGER, - created_at TIMESTAMP NOT NULL, - updated_at TIMESTAMP -); - -CREATE TABLE sequence_file -( - id SERIAL PRIMARY KEY, - library_id INT NOT NULL, - file_name VARCHAR(255) NOT NULL, - file_size_bytes BIGINT NOT NULL, - file_md5 VARCHAR(255) NOT NULL, - file_format VARCHAR(255) NOT NULL, - aligner VARCHAR(255) NOT NULL, - target_reference VARCHAR(255) NOT NULL, - created_at TIMESTAMP NOT NULL, - updated_at TIMESTAMP, - FOREIGN KEY (library_id) REFERENCES sequence_library (id) ON DELETE CASCADE -); - -CREATE TABLE sequence_http_location -( - id SERIAL PRIMARY KEY, - sequence_file_id INT NOT NULL, - file_url TEXT NOT NULL, - file_index_url TEXT, - FOREIGN KEY (sequence_file_id) REFERENCES sequence_file (id) ON DELETE CASCADE -); - -CREATE TABLE sequence_atp_location -( - id SERIAL PRIMARY KEY, - sequence_file_id INT NOT NULL, - repo_did VARCHAR(255) NOT NULL, - record_cid VARCHAR(255) NOT NULL, - record_path TEXT NOT NULL, - index_did VARCHAR(255), - index_cid VARCHAR(255), - FOREIGN KEY (sequence_file_id) REFERENCES sequence_file (id) ON DELETE CASCADE -); - -CREATE TABLE quality_metrics -( - id SERIAL PRIMARY KEY, - contig_id INT NOT NULL, - start_pos INT NOT NULL, - end_pos INT NOT NULL, - num_reads INT NOT NULL, - ref_n INT NOT NULL, - no_cov INT NOT NULL, - low_cov INT NOT NULL, - excessive_cov INT NOT NULL, - poor_mq INT NOT NULL, - callable INT NOT NULL, - cov_percent DOUBLE PRECISION NOT NULL, - mean_depth DOUBLE PRECISION NOT NULL, - mean_mq DOUBLE PRECISION NOT NULL, - sequence_file_id BIGINT NOT NULL, - FOREIGN KEY (sequence_file_id) REFERENCES sequence_file (id) ON DELETE CASCADE, - FOREIGN KEY (contig_id) REFERENCES genbank_contig (genbank_contig_id) ON DELETE CASCADE -); - -CREATE TABLE reported_variant -( - id BIGSERIAL PRIMARY KEY, - sample_guid UUID NOT NULL, - contig_id INT NOT NULL, - position INT NOT NULL, - reference_allele VARCHAR(255) NOT NULL, - alternate_allele VARCHAR(255) NOT NULL, - variant_type VARCHAR(5) NOT NULL CHECK (variant_type IN ('SNP', 'INDEL')), - reported_date TIMESTAMP NOT NULL, - provenance VARCHAR(255) NOT NULL, - confidence_score DOUBLE PRECISION NOT NULL, - notes TEXT, - status VARCHAR(255) NOT NULL, - FOREIGN KEY (contig_id) REFERENCES genbank_contig (genbank_contig_id) ON DELETE CASCADE -); - -CREATE TABLE reported_negative_variant -( - id BIGSERIAL PRIMARY KEY, - sample_guid UUID NOT NULL, - variant_id INT NOT NULL, - reported_date TIMESTAMP, - notes TEXT, - status VARCHAR(255) NOT NULL, - FOREIGN KEY (variant_id) REFERENCES variant (variant_id) ON DELETE CASCADE -); - -CREATE TABLE publication -( - id SERIAL PRIMARY KEY, - pubmed_id VARCHAR(20) UNIQUE, - doi VARCHAR(255) UNIQUE, - title TEXT NOT NULL, - journal VARCHAR(255), - publication_date DATE, - url VARCHAR(2048) -); - -CREATE TABLE ena_study -( - id SERIAL PRIMARY KEY, - accession VARCHAR(50) UNIQUE NOT NULL, - title VARCHAR(255) NOT NULL, - center_name VARCHAR(255) NOT NULL, - study_name VARCHAR(255) NOT NULL, - details TEXT -); - -CREATE TABLE publication_ena_study -( - publication_id INT, - ena_study_id INT, - FOREIGN KEY (publication_id) REFERENCES publication (id), - FOREIGN KEY (ena_study_id) REFERENCES ena_study (id), - PRIMARY KEY (publication_id, ena_study_id) -); - -CREATE TABLE publication_biosample -( - publication_id INT REFERENCES publication (id) ON DELETE CASCADE, - biosample_id INT REFERENCES biosample (id) ON DELETE CASCADE, - PRIMARY KEY (publication_id, biosample_id) -); - -# --- !Downs -DROP TABLE publication_biosample; -DROP TABLE publication_ena_study; -DROP TABLE ena_study; -DROP TABLE publication; -DROP TABLE reported_negative_variant; -DROP TABLE reported_variant; -DROP TABLE quality_metrics; -DROP TABLE sequence_atp_location; -DROP TABLE sequence_http_location; -DROP TABLE sequence_file; -DROP TABLE sequence_library; -DROP TABLE ancestry_analysis; -DROP TABLE population; -DROP TABLE analysis_method; -DROP TABLE biosample_haplogroup; -DROP TABLE haplogroup_variant; -DROP TABLE variant; -DROP TABLE genbank_contig; -DROP TABLE haplogroup_relationship; -DROP TABLE haplogroup; -DROP TABLE pgp_biosample; -DROP TABLE citizen_biosample; -DROP TABLE biosample; -DROP TABLE specimen_donor; diff --git a/conf/evolutions/default/10.sql b/conf/evolutions/default/10.sql deleted file mode 100644 index cae6ebef..00000000 --- a/conf/evolutions/default/10.sql +++ /dev/null @@ -1,33 +0,0 @@ -# --- !Ups - --- Add new columns to the public.publication table from OpenAlex integration -ALTER TABLE public.publication - ADD COLUMN open_alex_id VARCHAR(255), - ADD COLUMN citation_normalized_percentile REAL, - ADD COLUMN cited_by_count INTEGER, - ADD COLUMN open_access_status VARCHAR(50), - ADD COLUMN open_access_url VARCHAR(2048), - ADD COLUMN primary_topic VARCHAR(255), - ADD COLUMN publication_type VARCHAR(50), - ADD COLUMN publisher VARCHAR(255); - --- Add unique constraint for open_alex_id if it's guaranteed to be unique --- ALTER TABLE public.publication ADD CONSTRAINT publication_open_alex_id_uk UNIQUE (open_alex_id); --- Note: It's often safer to add unique constraints after populating existing NULLs --- if you have existing rows where open_alex_id would be NULL, as NULL is not unique. --- If you insert new data, you might add this in a later evolution or use a unique index --- that allows NULLs (e.g., CREATE UNIQUE INDEX ON table (col) WHERE col IS NOT NULL; in Postgres). --- For now, if your model allows Option[String], the database column should allow NULL. - -# --- !Downs - --- Revert changes: Drop the columns added in this evolution -ALTER TABLE public.publication - DROP COLUMN publisher, - DROP COLUMN publication_type, - DROP COLUMN primary_topic, - DROP COLUMN open_access_url, - DROP COLUMN open_access_status, - DROP COLUMN cited_by_count, - DROP COLUMN citation_normalized_percentile, - DROP COLUMN open_alex_id; \ No newline at end of file diff --git a/conf/evolutions/default/11.sql b/conf/evolutions/default/11.sql deleted file mode 100644 index cb904040..00000000 --- a/conf/evolutions/default/11.sql +++ /dev/null @@ -1,15 +0,0 @@ --- !Ups --- Add a lock column to prevent batch updates from the source from removing manual corrections -ALTER TABLE biosample ADD COLUMN locked boolean; - --- Then update existing records --- Lock samples that have either sex or geocoord manually set -UPDATE biosample -SET locked = false; - --- Finally make the column non-null with default -ALTER TABLE biosample ALTER COLUMN locked SET NOT NULL; -ALTER TABLE biosample ALTER COLUMN locked SET DEFAULT false; - --- !Downs -ALTER TABLE biosample DROP COLUMN locked; \ No newline at end of file diff --git a/conf/evolutions/default/12.sql b/conf/evolutions/default/12.sql deleted file mode 100644 index e4c61254..00000000 --- a/conf/evolutions/default/12.sql +++ /dev/null @@ -1,59 +0,0 @@ --- !Ups - -ALTER TABLE ena_study RENAME TO genomic_studies; - -ALTER TABLE genomic_studies - ADD COLUMN source VARCHAR(10), - ADD COLUMN submission_date DATE, - ADD COLUMN last_update DATE, - ADD COLUMN bio_project_id VARCHAR(50), - ADD COLUMN molecule VARCHAR(50), - ADD COLUMN topology VARCHAR(50), - ADD COLUMN taxonomy_id INTEGER, - ADD COLUMN version VARCHAR(10); - --- Update existing source values -UPDATE genomic_studies -SET source = 'ENA'; - --- Now add NOT NULL constraint after the update -ALTER TABLE genomic_studies - ALTER COLUMN source SET NOT NULL; - --- Add enum constraint to ensure only valid values -ALTER TABLE genomic_studies - ADD CONSTRAINT valid_source CHECK (source IN ('ENA', 'NCBI_BIOPROJECT', 'NCBI_GENBANK')); - --- Rename the column -ALTER TABLE publication_ena_study - RENAME COLUMN ena_study_id TO genomic_study_id; - --- Rename the foreign key constraint (if it exists) -ALTER TABLE publication_ena_study - RENAME CONSTRAINT publication_ena_study_ena_study_id_fkey - TO publication_ena_study_genomic_study_id_fkey; - - --- !Downs - --- Revert the foreign key constraint rename -ALTER TABLE publication_ena_study - RENAME CONSTRAINT publication_ena_study_genomic_study_id_fkey - TO publication_ena_study_ena_study_id_fkey; - --- Revert the column rename -ALTER TABLE publication_ena_study - RENAME COLUMN genomic_study_id TO ena_study_id; - -ALTER TABLE genomic_studies - DROP COLUMN source, - DROP COLUMN submission_date, - DROP COLUMN last_update, - DROP COLUMN bio_project_id, - DROP COLUMN molecule, - DROP COLUMN topology, - DROP COLUMN taxonomy_id, - DROP COLUMN version; - -ALTER TABLE genomic_studies RENAME TO ena_study; - diff --git a/conf/evolutions/default/13.sql b/conf/evolutions/default/13.sql deleted file mode 100644 index d7b2b002..00000000 --- a/conf/evolutions/default/13.sql +++ /dev/null @@ -1,5 +0,0 @@ --- !Ups -ALTER TABLE genomic_studies ALTER COLUMN source TYPE varchar(20); - --- !Downs -ALTER TABLE genomic_studies ALTER COLUMN source TYPE varchar(10); \ No newline at end of file diff --git a/conf/evolutions/default/14.sql b/conf/evolutions/default/14.sql deleted file mode 100644 index 719ffbfb..00000000 --- a/conf/evolutions/default/14.sql +++ /dev/null @@ -1,16 +0,0 @@ -# --- !Ups - -CREATE TABLE biosample_original_haplogroup -( - id SERIAL PRIMARY KEY, - biosample_id INT REFERENCES biosample (id) ON DELETE CASCADE, - publication_id INT REFERENCES publication (id) ON DELETE CASCADE, - original_y_haplogroup VARCHAR(255), - original_mt_haplogroup VARCHAR(255), - notes TEXT, - UNIQUE (biosample_id, publication_id) -); - --- !Downs - -DROP TABLE biosample_original_haplogroup; \ No newline at end of file diff --git a/conf/evolutions/default/15.sql b/conf/evolutions/default/15.sql deleted file mode 100644 index 25ecf66f..00000000 --- a/conf/evolutions/default/15.sql +++ /dev/null @@ -1,32 +0,0 @@ -# --- !Ups - --- Create the new checksums table -CREATE TABLE sequence_file_checksum ( - id SERIAL PRIMARY KEY, - sequence_file_id INT NOT NULL, - checksum VARCHAR(255) NOT NULL, - algorithm VARCHAR(50) NOT NULL, - verified_at TIMESTAMP NOT NULL, - FOREIGN KEY (sequence_file_id) REFERENCES sequence_file (id) ON DELETE CASCADE, - UNIQUE (sequence_file_id, algorithm) -); - --- Migrate existing MD5 checksums -INSERT INTO sequence_file_checksum (sequence_file_id, checksum, algorithm, verified_at) -SELECT id, file_md5, 'MD5', created_at -FROM sequence_file; - -ALTER TABLE sequence_file DROP COLUMN file_md5; - -# --- !Downs - --- If we dropped file_md5, restore it first -ALTER TABLE sequence_file ADD COLUMN file_md5 VARCHAR(255); - --- Restore MD5 checksums if we dropped the column -UPDATE sequence_file sf -SET file_md5 = sfc.checksum -FROM sequence_file_checksum sfc -WHERE sf.id = sfc.sequence_file_id AND sfc.algorithm = 'MD5'; - -DROP TABLE sequence_file_checksum; \ No newline at end of file diff --git a/conf/evolutions/default/16.sql b/conf/evolutions/default/16.sql deleted file mode 100644 index 98ac5213..00000000 --- a/conf/evolutions/default/16.sql +++ /dev/null @@ -1,50 +0,0 @@ -# --- !Ups --- First add the column with a temporary NULL constraint -ALTER TABLE public.biosample - ADD COLUMN sample_type varchar(10); - --- Update all existing rows to 'Standard' -UPDATE public.biosample -SET sample_type = 'Standard'; - --- Now make the column NOT NULL and add the constraint -ALTER TABLE public.biosample - ALTER COLUMN sample_type SET NOT NULL, - ADD CONSTRAINT biosample_type_check CHECK ( - sample_type IN ('Standard', 'PGP', 'Citizen', 'Ancient') - ); - --- Add the rest of the columns -ALTER TABLE public.biosample - ADD COLUMN pgp_participant_id varchar(50), - ADD COLUMN citizen_biosample_did varchar(255), - ADD COLUMN source_platform varchar(100), - ADD COLUMN date_range_start integer, - ADD COLUMN date_range_end integer; - --- Add constraints for PGP and Citizen samples -ALTER TABLE public.biosample - ADD CONSTRAINT pgp_participant_id_required - CHECK ( - (sample_type != 'PGP') OR - (sample_type = 'PGP' AND pgp_participant_id IS NOT NULL) - ); - -ALTER TABLE public.biosample - ADD CONSTRAINT citizen_did_required - CHECK ( - (sample_type != 'Citizen') OR - (sample_type = 'Citizen' AND citizen_biosample_did IS NOT NULL) - ); - -# --- !Downs -ALTER TABLE public.biosample - DROP CONSTRAINT IF EXISTS citizen_did_required, - DROP CONSTRAINT IF EXISTS pgp_participant_id_required, - DROP CONSTRAINT IF EXISTS biosample_type_check, - DROP COLUMN IF EXISTS date_range_end, - DROP COLUMN IF EXISTS date_range_start, - DROP COLUMN IF EXISTS source_platform, - DROP COLUMN IF EXISTS citizen_biosample_did, - DROP COLUMN IF EXISTS pgp_participant_id, - DROP COLUMN IF EXISTS sample_type; \ No newline at end of file diff --git a/conf/evolutions/default/17.sql b/conf/evolutions/default/17.sql deleted file mode 100644 index 4cd71fb0..00000000 --- a/conf/evolutions/default/17.sql +++ /dev/null @@ -1,6 +0,0 @@ -# --- !Ups ---- Establish a sequence for citizen biosamples for acession generation -CREATE SEQUENCE IF NOT EXISTS citizen_biosample_seq START 1; - -# --- !Downs -DROP SEQUENCE IF EXISTS citizen_biosample_seq; \ No newline at end of file diff --git a/conf/evolutions/default/18.sql b/conf/evolutions/default/18.sql deleted file mode 100644 index 146c94ed..00000000 --- a/conf/evolutions/default/18.sql +++ /dev/null @@ -1,104 +0,0 @@ --- !Ups - --- Create enum types -CREATE TYPE biological_sex AS ENUM ('male', 'female', 'intersex'); -CREATE TYPE biosample_type AS ENUM ('Standard', 'PGP', 'Citizen', 'Ancient'); - --- Add new columns to specimen_donor with temporary nullability -ALTER TABLE specimen_donor - ADD COLUMN sex biological_sex, - ADD COLUMN geocoord geometry(Point, 4326), - ADD COLUMN date_range_start integer, - ADD COLUMN date_range_end integer, - ADD COLUMN donor_type biosample_type, - ADD COLUMN pgp_participant_id varchar(50), - ADD COLUMN citizen_biosample_did varchar(255); - --- Migrate data from biosample to specimen_donor -UPDATE specimen_donor sd -SET sex = CASE - WHEN b.sex = 'male' THEN 'male'::biological_sex - WHEN b.sex = 'female' THEN 'female'::biological_sex - WHEN b.sex = 'intersex' THEN 'intersex'::biological_sex - ELSE NULL - END, - geocoord = b.geocoord, - date_range_start = b.date_range_start, - date_range_end = b.date_range_end, - donor_type = b.sample_type::biosample_type, - pgp_participant_id = b.pgp_participant_id, - citizen_biosample_did = b.citizen_biosample_did -FROM biosample b -WHERE b.specimen_donor_id = sd.id; - --- Set default value and not null constraint for donor_type after data migration -ALTER TABLE specimen_donor - ALTER COLUMN donor_type SET NOT NULL, - ALTER COLUMN donor_type SET DEFAULT 'Standard', - ADD CONSTRAINT pgp_participant_id_required - CHECK (donor_type != 'PGP' OR (donor_type = 'PGP' AND pgp_participant_id IS NOT NULL)), - ADD CONSTRAINT citizen_did_required - CHECK (donor_type != 'Citizen' OR (donor_type = 'Citizen' AND citizen_biosample_did IS NOT NULL)); - --- Remove migrated columns from biosample -ALTER TABLE biosample - DROP COLUMN sex, - DROP COLUMN geocoord, - DROP COLUMN sample_type, - DROP COLUMN pgp_participant_id, - DROP COLUMN citizen_biosample_did, - DROP COLUMN date_range_start, - DROP COLUMN date_range_end, - DROP CONSTRAINT IF EXISTS pgp_participant_id_required, - DROP CONSTRAINT IF EXISTS citizen_did_required, - DROP CONSTRAINT IF EXISTS biosample_sex_check, - DROP CONSTRAINT IF EXISTS biosample_type_check; - --- !Downs - --- Add columns back to biosample -ALTER TABLE biosample - ADD COLUMN sex varchar(15), - ADD COLUMN geocoord geometry(Point, 4326), - ADD COLUMN sample_type varchar(10), - ADD COLUMN pgp_participant_id varchar(50), - ADD COLUMN citizen_biosample_did varchar(255), - ADD COLUMN date_range_start integer, - ADD COLUMN date_range_end integer; - --- Migrate data back from specimen_donor to biosample -UPDATE biosample b -SET sex = sd.sex::text, - geocoord = sd.geocoord, - sample_type = sd.donor_type::text, - pgp_participant_id = sd.pgp_participant_id, - citizen_biosample_did = sd.citizen_biosample_did, - date_range_start = sd.date_range_start, - date_range_end = sd.date_range_end -FROM specimen_donor sd -WHERE b.specimen_donor_id = sd.id; - --- Add constraints back to biosample -ALTER TABLE biosample - ADD CONSTRAINT biosample_sex_check - CHECK (sex IN ('male', 'female', 'intersex')), - ADD CONSTRAINT biosample_type_check - CHECK (sample_type IN ('Standard', 'PGP', 'Citizen', 'Ancient')), - ADD CONSTRAINT pgp_participant_id_required - CHECK (sample_type != 'PGP' OR (sample_type = 'PGP' AND pgp_participant_id IS NOT NULL)), - ADD CONSTRAINT citizen_did_required - CHECK (sample_type != 'Citizen' OR (sample_type = 'Citizen' AND citizen_biosample_did IS NOT NULL)); - --- Remove added columns from specimen_donor -ALTER TABLE specimen_donor - DROP COLUMN sex, - DROP COLUMN geocoord, - DROP COLUMN date_range_start, - DROP COLUMN date_range_end, - DROP COLUMN donor_type, - DROP COLUMN pgp_participant_id, - DROP COLUMN citizen_biosample_did; - --- Drop the enum types (need to drop them last since columns depend on them) -DROP TYPE biological_sex; -DROP TYPE biosample_type; \ No newline at end of file diff --git a/conf/evolutions/default/19.sql b/conf/evolutions/default/19.sql deleted file mode 100644 index edf1b484..00000000 --- a/conf/evolutions/default/19.sql +++ /dev/null @@ -1,95 +0,0 @@ --- !Ups - --- First, drop existing tables in correct order -DROP TABLE IF EXISTS public.pangenome_alignment_coverage CASCADE; -DROP TABLE IF EXISTS public.pangenome_alignment_metadata CASCADE; -DROP TABLE IF EXISTS public.pangenome_variant_link CASCADE; -DROP TABLE IF EXISTS public.pangenome_edge CASCADE; -DROP TABLE IF EXISTS public.pangenome_path CASCADE; -DROP TABLE IF EXISTS public.pangenome_node CASCADE; -DROP TABLE IF EXISTS public.canonical_pangenome_variant CASCADE; -DROP TABLE IF EXISTS public.pangenome_graph CASCADE; - --- Create new simplified tables -CREATE TABLE public.pangenome_graph ( - id BIGSERIAL PRIMARY KEY, - graph_name VARCHAR(255) NOT NULL, - source_gfa_file VARCHAR(255), - description TEXT, - creation_date TIMESTAMP DEFAULT now() NOT NULL -); - -CREATE TABLE public.pangenome_path ( - id BIGSERIAL PRIMARY KEY, - graph_id BIGINT NOT NULL REFERENCES public.pangenome_graph(id), - path_name VARCHAR(255) NOT NULL, - is_reference BOOLEAN DEFAULT FALSE, - length_bp BIGINT, - description TEXT -); - -CREATE TABLE public.pangenome_node ( - id BIGSERIAL PRIMARY KEY, - graph_id BIGINT NOT NULL REFERENCES public.pangenome_graph(id), - node_name VARCHAR(255) NOT NULL, - sequence_length BIGINT -); - -CREATE TABLE public.canonical_pangenome_variant ( - id BIGSERIAL PRIMARY KEY, - pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph(id), - variant_type VARCHAR(50) NOT NULL, - variant_nodes INTEGER[] NOT NULL, - variant_edges INTEGER[] DEFAULT '{}'::INTEGER[] NOT NULL, - reference_path_id INTEGER REFERENCES public.pangenome_path(id), - reference_start_position INTEGER, - reference_end_position INTEGER, - reference_allele_sequence TEXT, - alternate_allele_sequence TEXT, - canonical_hash VARCHAR(255) NOT NULL UNIQUE, - description TEXT, - creation_date TIMESTAMP DEFAULT now() NOT NULL -); - -CREATE TABLE public.pangenome_alignment_metadata ( - id BIGSERIAL PRIMARY KEY, - sequence_file_id BIGINT NOT NULL REFERENCES public.sequence_file(id) ON DELETE CASCADE, - pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph(id), - metric_level VARCHAR(50) NOT NULL CHECK (metric_level IN ('GRAPH_OVERALL', 'PATH', 'NODE', 'REGION')), - pangenome_path_id INTEGER REFERENCES public.pangenome_path(id), - pangenome_node_id INTEGER REFERENCES public.pangenome_node(id), - region_start_node_id INTEGER REFERENCES public.pangenome_node(id), - region_end_node_id INTEGER REFERENCES public.pangenome_node(id), - region_name VARCHAR(255), - region_length_bp BIGINT, - metrics_date TIMESTAMP NOT NULL DEFAULT NOW(), - analysis_tool VARCHAR(255) NOT NULL, - analysis_tool_version VARCHAR(50), - notes TEXT, - metadata JSONB -); - -CREATE TABLE public.pangenome_alignment_coverage ( - alignment_metadata_id BIGINT PRIMARY KEY REFERENCES public.pangenome_alignment_metadata(id) ON DELETE CASCADE, - mean_depth DOUBLE PRECISION, - median_depth DOUBLE PRECISION, - percent_coverage_at_1x DOUBLE PRECISION, - percent_coverage_at_5x DOUBLE PRECISION, - percent_coverage_at_10x DOUBLE PRECISION, - percent_coverage_at_20x DOUBLE PRECISION, - percent_coverage_at_30x DOUBLE PRECISION, - bases_no_coverage BIGINT, - bases_low_quality_mapping BIGINT, - bases_callable BIGINT, - mean_mapping_quality DOUBLE PRECISION -); - --- !Downs - --- Re-create original tables in reverse order -DROP TABLE IF EXISTS public.pangenome_alignment_coverage CASCADE; -DROP TABLE IF EXISTS public.pangenome_alignment_metadata CASCADE; -DROP TABLE IF EXISTS public.canonical_pangenome_variant CASCADE; -DROP TABLE IF EXISTS public.pangenome_node CASCADE; -DROP TABLE IF EXISTS public.pangenome_path CASCADE; -DROP TABLE IF EXISTS public.pangenome_graph CASCADE; \ No newline at end of file diff --git a/conf/evolutions/default/2.sql b/conf/evolutions/default/2.sql deleted file mode 100644 index 242dfbb1..00000000 --- a/conf/evolutions/default/2.sql +++ /dev/null @@ -1,968 +0,0 @@ -# --- !Ups ---- Load GRCh37, GRCh38 and chm13v2.0 GenBank contigs -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (370, 'CM000663.2', 'chr1', 'GRCh38.p14', 248956422); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (371, 'CM000664.2', 'chr2', 'GRCh38.p14', 242193529); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (372, 'CM000665.2', 'chr3', 'GRCh38.p14', 198295559); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (373, 'CM000666.2', 'chr4', 'GRCh38.p14', 190214555); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (374, 'CM000667.2', 'chr5', 'GRCh38.p14', 181538259); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (375, 'CM000668.2', 'chr6', 'GRCh38.p14', 170805979); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (376, 'CM000669.2', 'chr7', 'GRCh38.p14', 159345973); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (377, 'CM000670.2', 'chr8', 'GRCh38.p14', 145138636); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (378, 'CM000671.2', 'chr9', 'GRCh38.p14', 138394717); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (379, 'CM000672.2', 'chr10', 'GRCh38.p14', 133797422); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (380, 'CM000673.2', 'chr11', 'GRCh38.p14', 135086622); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (381, 'CM000674.2', 'chr12', 'GRCh38.p14', 133275309); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (382, 'CM000675.2', 'chr13', 'GRCh38.p14', 114364328); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (383, 'CM000676.2', 'chr14', 'GRCh38.p14', 107043718); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (384, 'CM000677.2', 'chr15', 'GRCh38.p14', 101991189); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (385, 'CM000678.2', 'chr16', 'GRCh38.p14', 90338345); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (386, 'CM000679.2', 'chr17', 'GRCh38.p14', 83257441); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (387, 'CM000680.2', 'chr18', 'GRCh38.p14', 80373285); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (388, 'CM000681.2', 'chr19', 'GRCh38.p14', 58617616); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (389, 'CM000682.2', 'chr20', 'GRCh38.p14', 64444167); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (390, 'CM000683.2', 'chr21', 'GRCh38.p14', 46709983); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (391, 'CM000684.2', 'chr22', 'GRCh38.p14', 50818468); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (392, 'CM000685.2', 'chrX', 'GRCh38.p14', 156040895); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (393, 'CM000686.2', 'chrY', 'GRCh38.p14', 57227415); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (394, 'KI270706.1', 'chr1_KI270706v1_random', 'GRCh38.p14', 175055); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (395, 'KI270707.1', 'chr1_KI270707v1_random', 'GRCh38.p14', 32032); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (396, 'KI270708.1', 'chr1_KI270708v1_random', 'GRCh38.p14', 127682); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (397, 'KI270709.1', 'chr1_KI270709v1_random', 'GRCh38.p14', 66860); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (398, 'KI270710.1', 'chr1_KI270710v1_random', 'GRCh38.p14', 40176); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (399, 'KI270711.1', 'chr1_KI270711v1_random', 'GRCh38.p14', 42210); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (400, 'KI270712.1', 'chr1_KI270712v1_random', 'GRCh38.p14', 176043); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (401, 'KI270713.1', 'chr1_KI270713v1_random', 'GRCh38.p14', 40745); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (402, 'KI270714.1', 'chr1_KI270714v1_random', 'GRCh38.p14', 41717); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (403, 'KI270715.1', 'chr2_KI270715v1_random', 'GRCh38.p14', 161471); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (404, 'KI270716.1', 'chr2_KI270716v1_random', 'GRCh38.p14', 153799); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (406, 'GL000008.2', 'chr4_GL000008v2_random', 'GRCh38.p14', 209709); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (408, 'KI270717.1', 'chr9_KI270717v1_random', 'GRCh38.p14', 40062); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (409, 'KI270718.1', 'chr9_KI270718v1_random', 'GRCh38.p14', 38054); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (410, 'KI270719.1', 'chr9_KI270719v1_random', 'GRCh38.p14', 176845); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (411, 'KI270720.1', 'chr9_KI270720v1_random', 'GRCh38.p14', 39050); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (412, 'GL000009.2', 'chr14_GL000009v2_random', 'GRCh38.p14', 201709); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (415, 'KI270722.1', 'chr14_KI270722v1_random', 'GRCh38.p14', 194050); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (416, 'KI270723.1', 'chr14_KI270723v1_random', 'GRCh38.p14', 38115); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (417, 'KI270724.1', 'chr14_KI270724v1_random', 'GRCh38.p14', 39555); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (418, 'KI270725.1', 'chr14_KI270725v1_random', 'GRCh38.p14', 172810); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (419, 'KI270726.1', 'chr14_KI270726v1_random', 'GRCh38.p14', 43739); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (420, 'KI270727.1', 'chr15_KI270727v1_random', 'GRCh38.p14', 448248); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (421, 'KI270728.1', 'chr16_KI270728v1_random', 'GRCh38.p14', 1872759); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (422, 'GL000205.2', 'chr17_GL000205v2_random', 'GRCh38.p14', 185591); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (423, 'KI270729.1', 'chr17_KI270729v1_random', 'GRCh38.p14', 280839); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (424, 'KI270730.1', 'chr17_KI270730v1_random', 'GRCh38.p14', 112551); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (425, 'KI270731.1', 'chr22_KI270731v1_random', 'GRCh38.p14', 150754); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (426, 'KI270732.1', 'chr22_KI270732v1_random', 'GRCh38.p14', 41543); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (427, 'KI270733.1', 'chr22_KI270733v1_random', 'GRCh38.p14', 179772); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (428, 'KI270735.1', 'chr22_KI270735v1_random', 'GRCh38.p14', 42811); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (429, 'KI270736.1', 'chr22_KI270736v1_random', 'GRCh38.p14', 181920); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (430, 'KI270737.1', 'chr22_KI270737v1_random', 'GRCh38.p14', 103838); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (431, 'KI270738.1', 'chr22_KI270738v1_random', 'GRCh38.p14', 99375); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (432, 'KI270739.1', 'chr22_KI270739v1_random', 'GRCh38.p14', 73985); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (433, 'KI270740.1', 'chrY_KI270740v1_random', 'GRCh38.p14', 37240); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (437, 'GL000216.2', 'chrUn_GL000216v2', 'GRCh38.p14', 176608); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (443, 'KI270302.1', 'chrUn_KI270302v1', 'GRCh38.p14', 2274); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (444, 'KI270303.1', 'chrUn_KI270303v1', 'GRCh38.p14', 1942); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (445, 'KI270304.1', 'chrUn_KI270304v1', 'GRCh38.p14', 2165); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (446, 'KI270305.1', 'chrUn_KI270305v1', 'GRCh38.p14', 1472); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (447, 'KI270310.1', 'chrUn_KI270310v1', 'GRCh38.p14', 1201); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (448, 'KI270311.1', 'chrUn_KI270311v1', 'GRCh38.p14', 12399); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (449, 'KI270312.1', 'chrUn_KI270312v1', 'GRCh38.p14', 998); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (450, 'KI270315.1', 'chrUn_KI270315v1', 'GRCh38.p14', 2276); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (451, 'KI270316.1', 'chrUn_KI270316v1', 'GRCh38.p14', 1444); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (452, 'KI270317.1', 'chrUn_KI270317v1', 'GRCh38.p14', 37690); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (453, 'KI270320.1', 'chrUn_KI270320v1', 'GRCh38.p14', 4416); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (454, 'KI270322.1', 'chrUn_KI270322v1', 'GRCh38.p14', 21476); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (455, 'KI270329.1', 'chrUn_KI270329v1', 'GRCh38.p14', 1040); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (456, 'KI270330.1', 'chrUn_KI270330v1', 'GRCh38.p14', 1652); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (457, 'KI270333.1', 'chrUn_KI270333v1', 'GRCh38.p14', 2699); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (458, 'KI270334.1', 'chrUn_KI270334v1', 'GRCh38.p14', 1368); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (459, 'KI270335.1', 'chrUn_KI270335v1', 'GRCh38.p14', 1048); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (460, 'KI270336.1', 'chrUn_KI270336v1', 'GRCh38.p14', 1026); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (461, 'KI270337.1', 'chrUn_KI270337v1', 'GRCh38.p14', 1121); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (462, 'KI270338.1', 'chrUn_KI270338v1', 'GRCh38.p14', 1428); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (463, 'KI270340.1', 'chrUn_KI270340v1', 'GRCh38.p14', 1428); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (464, 'KI270362.1', 'chrUn_KI270362v1', 'GRCh38.p14', 3530); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (465, 'KI270363.1', 'chrUn_KI270363v1', 'GRCh38.p14', 1803); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (466, 'KI270364.1', 'chrUn_KI270364v1', 'GRCh38.p14', 2855); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (467, 'KI270366.1', 'chrUn_KI270366v1', 'GRCh38.p14', 8320); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (468, 'KI270371.1', 'chrUn_KI270371v1', 'GRCh38.p14', 2805); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (469, 'KI270372.1', 'chrUn_KI270372v1', 'GRCh38.p14', 1650); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (470, 'KI270373.1', 'chrUn_KI270373v1', 'GRCh38.p14', 1451); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (471, 'KI270374.1', 'chrUn_KI270374v1', 'GRCh38.p14', 2656); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (472, 'KI270375.1', 'chrUn_KI270375v1', 'GRCh38.p14', 2378); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (473, 'KI270376.1', 'chrUn_KI270376v1', 'GRCh38.p14', 1136); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (474, 'KI270378.1', 'chrUn_KI270378v1', 'GRCh38.p14', 1048); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (475, 'KI270379.1', 'chrUn_KI270379v1', 'GRCh38.p14', 1045); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (476, 'KI270381.1', 'chrUn_KI270381v1', 'GRCh38.p14', 1930); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (477, 'KI270382.1', 'chrUn_KI270382v1', 'GRCh38.p14', 4215); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (478, 'KI270383.1', 'chrUn_KI270383v1', 'GRCh38.p14', 1750); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (479, 'KI270384.1', 'chrUn_KI270384v1', 'GRCh38.p14', 1658); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (480, 'KI270385.1', 'chrUn_KI270385v1', 'GRCh38.p14', 990); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (481, 'KI270386.1', 'chrUn_KI270386v1', 'GRCh38.p14', 1788); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (482, 'KI270387.1', 'chrUn_KI270387v1', 'GRCh38.p14', 1537); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (483, 'KI270388.1', 'chrUn_KI270388v1', 'GRCh38.p14', 1216); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (484, 'KI270389.1', 'chrUn_KI270389v1', 'GRCh38.p14', 1298); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (485, 'KI270390.1', 'chrUn_KI270390v1', 'GRCh38.p14', 2387); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (486, 'KI270391.1', 'chrUn_KI270391v1', 'GRCh38.p14', 1484); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (487, 'KI270392.1', 'chrUn_KI270392v1', 'GRCh38.p14', 971); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (488, 'KI270393.1', 'chrUn_KI270393v1', 'GRCh38.p14', 1308); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (489, 'KI270394.1', 'chrUn_KI270394v1', 'GRCh38.p14', 970); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (490, 'KI270395.1', 'chrUn_KI270395v1', 'GRCh38.p14', 1143); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (491, 'KI270396.1', 'chrUn_KI270396v1', 'GRCh38.p14', 1880); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (492, 'KI270411.1', 'chrUn_KI270411v1', 'GRCh38.p14', 2646); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (493, 'KI270412.1', 'chrUn_KI270412v1', 'GRCh38.p14', 1179); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (494, 'KI270414.1', 'chrUn_KI270414v1', 'GRCh38.p14', 2489); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (495, 'KI270417.1', 'chrUn_KI270417v1', 'GRCh38.p14', 2043); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (496, 'KI270418.1', 'chrUn_KI270418v1', 'GRCh38.p14', 2145); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (497, 'KI270419.1', 'chrUn_KI270419v1', 'GRCh38.p14', 1029); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (498, 'KI270420.1', 'chrUn_KI270420v1', 'GRCh38.p14', 2321); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (499, 'KI270422.1', 'chrUn_KI270422v1', 'GRCh38.p14', 1445); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (500, 'KI270423.1', 'chrUn_KI270423v1', 'GRCh38.p14', 981); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (501, 'KI270424.1', 'chrUn_KI270424v1', 'GRCh38.p14', 2140); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (502, 'KI270425.1', 'chrUn_KI270425v1', 'GRCh38.p14', 1884); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (503, 'KI270429.1', 'chrUn_KI270429v1', 'GRCh38.p14', 1361); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (504, 'KI270435.1', 'chrUn_KI270435v1', 'GRCh38.p14', 92983); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (505, 'KI270438.1', 'chrUn_KI270438v1', 'GRCh38.p14', 112505); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (506, 'KI270442.1', 'chrUn_KI270442v1', 'GRCh38.p14', 392061); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (507, 'KI270448.1', 'chrUn_KI270448v1', 'GRCh38.p14', 7992); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (508, 'KI270465.1', 'chrUn_KI270465v1', 'GRCh38.p14', 1774); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (509, 'KI270466.1', 'chrUn_KI270466v1', 'GRCh38.p14', 1233); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (510, 'KI270467.1', 'chrUn_KI270467v1', 'GRCh38.p14', 3920); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (511, 'KI270468.1', 'chrUn_KI270468v1', 'GRCh38.p14', 4055); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (512, 'KI270507.1', 'chrUn_KI270507v1', 'GRCh38.p14', 5353); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (513, 'KI270508.1', 'chrUn_KI270508v1', 'GRCh38.p14', 1951); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (514, 'KI270509.1', 'chrUn_KI270509v1', 'GRCh38.p14', 2318); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (515, 'KI270510.1', 'chrUn_KI270510v1', 'GRCh38.p14', 2415); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (516, 'KI270511.1', 'chrUn_KI270511v1', 'GRCh38.p14', 8127); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (517, 'KI270512.1', 'chrUn_KI270512v1', 'GRCh38.p14', 22689); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (518, 'KI270515.1', 'chrUn_KI270515v1', 'GRCh38.p14', 6361); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (519, 'KI270516.1', 'chrUn_KI270516v1', 'GRCh38.p14', 1300); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (520, 'KI270517.1', 'chrUn_KI270517v1', 'GRCh38.p14', 3253); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (521, 'KI270518.1', 'chrUn_KI270518v1', 'GRCh38.p14', 2186); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (522, 'KI270519.1', 'chrUn_KI270519v1', 'GRCh38.p14', 138126); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (523, 'KI270521.1', 'chrUn_KI270521v1', 'GRCh38.p14', 7642); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (524, 'KI270522.1', 'chrUn_KI270522v1', 'GRCh38.p14', 5674); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (525, 'KI270528.1', 'chrUn_KI270528v1', 'GRCh38.p14', 2983); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (526, 'KI270529.1', 'chrUn_KI270529v1', 'GRCh38.p14', 1899); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (527, 'KI270530.1', 'chrUn_KI270530v1', 'GRCh38.p14', 2168); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (528, 'KI270538.1', 'chrUn_KI270538v1', 'GRCh38.p14', 91309); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (529, 'KI270539.1', 'chrUn_KI270539v1', 'GRCh38.p14', 993); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (530, 'KI270544.1', 'chrUn_KI270544v1', 'GRCh38.p14', 1202); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (531, 'KI270548.1', 'chrUn_KI270548v1', 'GRCh38.p14', 1599); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (532, 'KI270579.1', 'chrUn_KI270579v1', 'GRCh38.p14', 31033); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (533, 'KI270580.1', 'chrUn_KI270580v1', 'GRCh38.p14', 1553); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (534, 'KI270581.1', 'chrUn_KI270581v1', 'GRCh38.p14', 7046); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (535, 'KI270582.1', 'chrUn_KI270582v1', 'GRCh38.p14', 6504); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (536, 'KI270583.1', 'chrUn_KI270583v1', 'GRCh38.p14', 1400); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (537, 'KI270584.1', 'chrUn_KI270584v1', 'GRCh38.p14', 4513); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (538, 'KI270587.1', 'chrUn_KI270587v1', 'GRCh38.p14', 2969); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (539, 'KI270588.1', 'chrUn_KI270588v1', 'GRCh38.p14', 6158); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (540, 'KI270589.1', 'chrUn_KI270589v1', 'GRCh38.p14', 44474); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (541, 'KI270590.1', 'chrUn_KI270590v1', 'GRCh38.p14', 4685); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (542, 'KI270591.1', 'chrUn_KI270591v1', 'GRCh38.p14', 5796); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (543, 'KI270593.1', 'chrUn_KI270593v1', 'GRCh38.p14', 3041); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (544, 'KI270741.1', 'chrUn_KI270741v1', 'GRCh38.p14', 157432); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (545, 'KI270742.1', 'chrUn_KI270742v1', 'GRCh38.p14', 186739); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (546, 'KI270743.1', 'chrUn_KI270743v1', 'GRCh38.p14', 210658); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (547, 'KI270744.1', 'chrUn_KI270744v1', 'GRCh38.p14', 168472); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (548, 'KI270745.1', 'chrUn_KI270745v1', 'GRCh38.p14', 41891); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (549, 'KI270746.1', 'chrUn_KI270746v1', 'GRCh38.p14', 66486); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (550, 'KI270747.1', 'chrUn_KI270747v1', 'GRCh38.p14', 198735); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (551, 'KI270748.1', 'chrUn_KI270748v1', 'GRCh38.p14', 93321); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (552, 'KI270749.1', 'chrUn_KI270749v1', 'GRCh38.p14', 158759); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (553, 'KI270750.1', 'chrUn_KI270750v1', 'GRCh38.p14', 148850); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (554, 'KI270751.1', 'chrUn_KI270751v1', 'GRCh38.p14', 150742); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (555, 'KI270753.1', 'chrUn_KI270753v1', 'GRCh38.p14', 62944); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (556, 'KI270754.1', 'chrUn_KI270754v1', 'GRCh38.p14', 40191); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (557, 'KI270755.1', 'chrUn_KI270755v1', 'GRCh38.p14', 36723); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (558, 'KI270756.1', 'chrUn_KI270756v1', 'GRCh38.p14', 79590); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (559, 'KI270757.1', 'chrUn_KI270757v1', 'GRCh38.p14', 71251); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (560, 'KN196472.1', 'chr1_KN196472v1_fix', 'GRCh38.p14', 186494); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (561, 'KN196473.1', 'chr1_KN196473v1_fix', 'GRCh38.p14', 166200); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (562, 'KN196474.1', 'chr1_KN196474v1_fix', 'GRCh38.p14', 122022); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (563, 'KN538360.1', 'chr1_KN538360v1_fix', 'GRCh38.p14', 460100); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (564, 'KN538361.1', 'chr1_KN538361v1_fix', 'GRCh38.p14', 305542); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (565, 'KQ031383.1', 'chr1_KQ031383v1_fix', 'GRCh38.p14', 467143); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (566, 'KZ208906.1', 'chr1_KZ208906v1_fix', 'GRCh38.p14', 330031); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (567, 'KZ559100.1', 'chr1_KZ559100v1_fix', 'GRCh38.p14', 44955); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (568, 'MU273333.1', 'chr1_MU273333v1_fix', 'GRCh38.p14', 1572686); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (569, 'MU273334.1', 'chr1_MU273334v1_fix', 'GRCh38.p14', 210426); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (570, 'MU273335.1', 'chr1_MU273335v1_fix', 'GRCh38.p14', 211934); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (571, 'MU273336.1', 'chr1_MU273336v1_fix', 'GRCh38.p14', 250447); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (572, 'KQ458382.1', 'chr1_KQ458382v1_alt', 'GRCh38.p14', 141019); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (573, 'KQ458383.1', 'chr1_KQ458383v1_alt', 'GRCh38.p14', 349938); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (574, 'KQ458384.1', 'chr1_KQ458384v1_alt', 'GRCh38.p14', 212205); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (575, 'KQ983255.1', 'chr1_KQ983255v1_alt', 'GRCh38.p14', 278659); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (576, 'KV880763.1', 'chr1_KV880763v1_alt', 'GRCh38.p14', 551020); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (577, 'KZ208904.1', 'chr1_KZ208904v1_alt', 'GRCh38.p14', 166136); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (578, 'KZ208905.1', 'chr1_KZ208905v1_alt', 'GRCh38.p14', 140355); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (579, 'MU273330.1', 'chr1_MU273330v1_alt', 'GRCh38.p14', 516764); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (580, 'MU273331.1', 'chr1_MU273331v1_alt', 'GRCh38.p14', 847441); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (581, 'MU273332.1', 'chr1_MU273332v1_alt', 'GRCh38.p14', 335159); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (582, 'KN538362.1', 'chr2_KN538362v1_fix', 'GRCh38.p14', 208149); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (583, 'KN538363.1', 'chr2_KN538363v1_fix', 'GRCh38.p14', 365499); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (584, 'KQ031384.1', 'chr2_KQ031384v1_fix', 'GRCh38.p14', 481245); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (585, 'ML143341.1', 'chr2_ML143341v1_fix', 'GRCh38.p14', 145975); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (586, 'ML143342.1', 'chr2_ML143342v1_fix', 'GRCh38.p14', 84043); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (587, 'MU273341.1', 'chr2_MU273341v1_fix', 'GRCh38.p14', 120381); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (588, 'MU273342.1', 'chr2_MU273342v1_fix', 'GRCh38.p14', 955087); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (589, 'MU273343.1', 'chr2_MU273343v1_fix', 'GRCh38.p14', 489404); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (590, 'MU273344.1', 'chr2_MU273344v1_fix', 'GRCh38.p14', 244725); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (591, 'MU273345.1', 'chr2_MU273345v1_fix', 'GRCh38.p14', 174385); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (592, 'KQ983256.1', 'chr2_KQ983256v1_alt', 'GRCh38.p14', 535088); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (593, 'KZ208907.1', 'chr2_KZ208907v1_alt', 'GRCh38.p14', 181658); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (594, 'KZ208908.1', 'chr2_KZ208908v1_alt', 'GRCh38.p14', 140361); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (595, 'MU273337.1', 'chr2_MU273337v1_alt', 'GRCh38.p14', 431782); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (596, 'MU273338.1', 'chr2_MU273338v1_alt', 'GRCh38.p14', 535251); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (597, 'MU273339.1', 'chr2_MU273339v1_alt', 'GRCh38.p14', 500581); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (598, 'MU273340.1', 'chr2_MU273340v1_alt', 'GRCh38.p14', 284971); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (599, 'KN196475.1', 'chr3_KN196475v1_fix', 'GRCh38.p14', 451168); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (600, 'KN196476.1', 'chr3_KN196476v1_fix', 'GRCh38.p14', 305979); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (601, 'KN538364.1', 'chr3_KN538364v1_fix', 'GRCh38.p14', 415308); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (602, 'KQ031385.1', 'chr3_KQ031385v1_fix', 'GRCh38.p14', 373699); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (603, 'KQ031386.1', 'chr3_KQ031386v1_fix', 'GRCh38.p14', 165718); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (604, 'KV766192.1', 'chr3_KV766192v1_fix', 'GRCh38.p14', 411654); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (605, 'KZ559104.1', 'chr3_KZ559104v1_fix', 'GRCh38.p14', 105527); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (606, 'MU273346.1', 'chr3_MU273346v1_fix', 'GRCh38.p14', 469342); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (607, 'MU273347.1', 'chr3_MU273347v1_fix', 'GRCh38.p14', 301310); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (608, 'MU273348.1', 'chr3_MU273348v1_fix', 'GRCh38.p14', 475876); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (609, 'KZ208909.1', 'chr3_KZ208909v1_alt', 'GRCh38.p14', 175849); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (610, 'KZ559101.1', 'chr3_KZ559101v1_alt', 'GRCh38.p14', 164041); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (611, 'KZ559102.1', 'chr3_KZ559102v1_alt', 'GRCh38.p14', 197752); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (612, 'KZ559103.1', 'chr3_KZ559103v1_alt', 'GRCh38.p14', 302885); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (613, 'KZ559105.1', 'chr3_KZ559105v1_alt', 'GRCh38.p14', 195063); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (614, 'ML143343.1', 'chr3_ML143343v1_alt', 'GRCh38.p14', 215443); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (615, 'KQ983257.1', 'chr4_KQ983257v1_fix', 'GRCh38.p14', 230434); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (616, 'ML143344.1', 'chr4_ML143344v1_fix', 'GRCh38.p14', 235734); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (617, 'ML143345.1', 'chr4_ML143345v1_fix', 'GRCh38.p14', 341066); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (618, 'ML143346.1', 'chr4_ML143346v1_fix', 'GRCh38.p14', 53476); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (619, 'ML143347.1', 'chr4_ML143347v1_fix', 'GRCh38.p14', 176674); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (620, 'ML143348.1', 'chr4_ML143348v1_fix', 'GRCh38.p14', 125549); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (621, 'ML143349.1', 'chr4_ML143349v1_fix', 'GRCh38.p14', 276109); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (622, 'MU273350.1', 'chr4_MU273350v1_fix', 'GRCh38.p14', 113364); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (623, 'MU273351.1', 'chr4_MU273351v1_fix', 'GRCh38.p14', 205691); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (624, 'KQ090013.1', 'chr4_KQ090013v1_alt', 'GRCh38.p14', 90922); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (625, 'KQ090014.1', 'chr4_KQ090014v1_alt', 'GRCh38.p14', 163749); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (626, 'KQ090015.1', 'chr4_KQ090015v1_alt', 'GRCh38.p14', 236512); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (627, 'KQ983258.1', 'chr4_KQ983258v1_alt', 'GRCh38.p14', 205407); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (628, 'KV766193.1', 'chr4_KV766193v1_alt', 'GRCh38.p14', 420675); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (629, 'MU273349.1', 'chr4_MU273349v1_alt', 'GRCh38.p14', 308682); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (630, 'KV575244.1', 'chr5_KV575244v1_fix', 'GRCh38.p14', 673059); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (631, 'ML143350.1', 'chr5_ML143350v1_fix', 'GRCh38.p14', 89956); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (632, 'MU273352.1', 'chr5_MU273352v1_fix', 'GRCh38.p14', 34400); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (633, 'MU273353.1', 'chr5_MU273353v1_fix', 'GRCh38.p14', 208405); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (634, 'MU273354.1', 'chr5_MU273354v1_fix', 'GRCh38.p14', 2101585); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (635, 'MU273355.1', 'chr5_MU273355v1_fix', 'GRCh38.p14', 508332); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (636, 'KN196477.1', 'chr5_KN196477v1_alt', 'GRCh38.p14', 139087); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (637, 'KV575243.1', 'chr5_KV575243v1_alt', 'GRCh38.p14', 362221); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (638, 'KZ208910.1', 'chr5_KZ208910v1_alt', 'GRCh38.p14', 135987); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (639, 'MU273356.1', 'chr5_MU273356v1_alt', 'GRCh38.p14', 302485); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (640, 'KN196478.1', 'chr6_KN196478v1_fix', 'GRCh38.p14', 268330); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (641, 'KQ031387.1', 'chr6_KQ031387v1_fix', 'GRCh38.p14', 320750); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (642, 'KQ090016.1', 'chr6_KQ090016v1_fix', 'GRCh38.p14', 245716); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (643, 'KV766194.1', 'chr6_KV766194v1_fix', 'GRCh38.p14', 139427); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (644, 'KZ208911.1', 'chr6_KZ208911v1_fix', 'GRCh38.p14', 242796); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (645, 'ML143351.1', 'chr6_ML143351v1_fix', 'GRCh38.p14', 73265); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (646, 'KQ090017.1', 'chr6_KQ090017v1_alt', 'GRCh38.p14', 82315); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (647, 'MU273357.1', 'chr6_MU273357v1_alt', 'GRCh38.p14', 383128); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (648, 'KQ031388.1', 'chr7_KQ031388v1_fix', 'GRCh38.p14', 179932); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (649, 'KV880764.1', 'chr7_KV880764v1_fix', 'GRCh38.p14', 142129); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1135, 'CP068277.2', 'chr1', 'T2T-CHM13v2.0', 248387328); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1136, 'CP068276.2', 'chr2', 'T2T-CHM13v2.0', 242696752); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1137, 'CP068275.2', 'chr3', 'T2T-CHM13v2.0', 201105948); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1138, 'CP068274.2', 'chr4', 'T2T-CHM13v2.0', 193574945); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1139, 'CP068273.2', 'chr5', 'T2T-CHM13v2.0', 182045439); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1140, 'CP068272.2', 'chr6', 'T2T-CHM13v2.0', 172126628); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1141, 'CP068271.2', 'chr7', 'T2T-CHM13v2.0', 160567428); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1142, 'CP068270.2', 'chr8', 'T2T-CHM13v2.0', 146259331); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1143, 'CP068269.2', 'chr9', 'T2T-CHM13v2.0', 150617247); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1144, 'CP068268.2', 'chr10', 'T2T-CHM13v2.0', 134758134); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1145, 'CP068267.2', 'chr11', 'T2T-CHM13v2.0', 135127769); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1146, 'CP068266.2', 'chr12', 'T2T-CHM13v2.0', 133324548); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1147, 'CP068265.2', 'chr13', 'T2T-CHM13v2.0', 113566686); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1148, 'CP068264.2', 'chr14', 'T2T-CHM13v2.0', 101161492); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1149, 'CP068263.2', 'chr15', 'T2T-CHM13v2.0', 99753195); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (222, 'JH159147.1', null, 'GRCh37.p13', 70345); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (223, 'JH159148.1', null, 'GRCh37.p13', 88070); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (224, 'GL383567.1', null, 'GRCh37.p13', 289831); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (225, 'GL383568.1', null, 'GRCh37.p13', 104552); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (226, 'GL383569.1', null, 'GRCh37.p13', 167950); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (227, 'GL383570.1', null, 'GRCh37.p13', 164789); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (228, 'GL383571.1', null, 'GRCh37.p13', 198278); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (229, 'GL383572.1', null, 'GRCh37.p13', 159547); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (230, 'GL582977.2', null, 'GRCh37.p13', 580393); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (231, 'JH159149.1', null, 'GRCh37.p13', 245473); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (232, 'KB021647.1', null, 'GRCh37.p13', 1058686); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (233, 'KE332505.1', null, 'GRCh37.p13', 579598); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (234, 'GL383573.1', null, 'GRCh37.p13', 385657); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (235, 'GL383574.1', null, 'GRCh37.p13', 155864); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (236, 'GL383575.2', null, 'GRCh37.p13', 170222); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (237, 'GL383576.1', null, 'GRCh37.p13', 188024); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (238, 'GL949746.1', null, 'GRCh37.p13', 987716); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (239, 'GL949747.1', null, 'GRCh37.p13', 729519); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (240, 'GL949748.1', null, 'GRCh37.p13', 1064303); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (241, 'GL949749.1', null, 'GRCh37.p13', 1091840); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (242, 'GL949750.1', null, 'GRCh37.p13', 1066389); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (243, 'GL949751.1', null, 'GRCh37.p13', 1002682); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (244, 'GL949752.1', null, 'GRCh37.p13', 987100); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (245, 'GL949753.1', null, 'GRCh37.p13', 796478); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (246, 'GL582979.2', null, 'GRCh37.p13', 179899); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (247, 'JH720448.1', null, 'GRCh37.p13', 70483); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (248, 'KB663608.1', null, 'GRCh37.p13', 283551); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (249, 'GL383577.1', null, 'GRCh37.p13', 128385); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (650, 'KV880765.1', 'chr7_KV880765v1_fix', 'GRCh38.p14', 468267); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (651, 'KZ208912.1', 'chr7_KZ208912v1_fix', 'GRCh38.p14', 589656); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (652, 'ML143352.1', 'chr7_ML143352v1_fix', 'GRCh38.p14', 254759); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (653, 'KZ208913.1', 'chr7_KZ208913v1_alt', 'GRCh38.p14', 680662); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (654, 'KZ559106.1', 'chr7_KZ559106v1_alt', 'GRCh38.p14', 172555); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1150, 'CP068262.2', 'chr16', 'T2T-CHM13v2.0', 96330374); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1151, 'CP068261.2', 'chr17', 'T2T-CHM13v2.0', 84276897); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1152, 'CP068260.2', 'chr18', 'T2T-CHM13v2.0', 80542538); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1153, 'CP068259.2', 'chr19', 'T2T-CHM13v2.0', 61707364); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1154, 'CP068258.2', 'chr20', 'T2T-CHM13v2.0', 66210255); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1155, 'CP068257.2', 'chr21', 'T2T-CHM13v2.0', 45090682); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1156, 'CP068256.2', 'chr22', 'T2T-CHM13v2.0', 51324926); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1157, 'CP068255.2', 'chrX', 'T2T-CHM13v2.0', 154259566); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1158, 'CP086569.2', 'chrY', 'T2T-CHM13v2.0', 62460029); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1159, 'CP068254.1', 'chrM', 'T2T-CHM13v2.0', 16569); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1, 'CM000663.1', 'chr1', 'GRCh37.p13', 249250621); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (2, 'CM000664.1', 'chr2', 'GRCh37.p13', 243199373); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (3, 'CM000665.1', 'chr3', 'GRCh37.p13', 198022430); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (4, 'CM000666.1', 'chr4', 'GRCh37.p13', 191154276); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (5, 'CM000667.1', 'chr5', 'GRCh37.p13', 180915260); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (6, 'CM000668.1', 'chr6', 'GRCh37.p13', 171115067); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (7, 'CM000669.1', 'chr7', 'GRCh37.p13', 159138663); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (8, 'CM000670.1', 'chr8', 'GRCh37.p13', 146364022); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (9, 'CM000671.1', 'chr9', 'GRCh37.p13', 141213431); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (10, 'CM000672.1', 'chr10', 'GRCh37.p13', 135534747); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (11, 'CM000673.1', 'chr11', 'GRCh37.p13', 135006516); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (12, 'CM000674.1', 'chr12', 'GRCh37.p13', 133851895); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (13, 'CM000675.1', 'chr13', 'GRCh37.p13', 115169878); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (14, 'CM000676.1', 'chr14', 'GRCh37.p13', 107349540); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (15, 'CM000677.1', 'chr15', 'GRCh37.p13', 102531392); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (16, 'CM000678.1', 'chr16', 'GRCh37.p13', 90354753); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (17, 'CM000679.1', 'chr17', 'GRCh37.p13', 81195210); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (18, 'CM000680.1', 'chr18', 'GRCh37.p13', 78077248); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (19, 'CM000681.1', 'chr19', 'GRCh37.p13', 59128983); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (20, 'CM000682.1', 'chr20', 'GRCh37.p13', 63025520); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (21, 'CM000683.1', 'chr21', 'GRCh37.p13', 48129895); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (22, 'CM000684.1', 'chr22', 'GRCh37.p13', 51304566); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (23, 'CM000685.1', 'chrX', 'GRCh37.p13', 155270560); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (24, 'CM000686.1', 'chrY', 'GRCh37.p13', 59373566); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (25, 'GL000191.1', 'chr1_gl000191_random', 'GRCh37.p13', 106433); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (26, 'GL000192.1', 'chr1_gl000192_random', 'GRCh37.p13', 547496); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (27, 'GL000193.1', 'chr4_gl000193_random', 'GRCh37.p13', 189789); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (28, 'GL000194.1', 'chr4_gl000194_random', 'GRCh37.p13', 191469); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (29, 'GL000195.1', 'chr7_gl000195_random', 'GRCh37.p13', 182896); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (30, 'GL000196.1', 'chr8_gl000196_random', 'GRCh37.p13', 38914); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (31, 'GL000197.1', 'chr8_gl000197_random', 'GRCh37.p13', 37175); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (32, 'GL000198.1', 'chr9_gl000198_random', 'GRCh37.p13', 90085); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (33, 'GL000199.1', 'chr9_gl000199_random', 'GRCh37.p13', 169874); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (34, 'GL000200.1', 'chr9_gl000200_random', 'GRCh37.p13', 187035); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (35, 'GL000201.1', 'chr9_gl000201_random', 'GRCh37.p13', 36148); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (36, 'GL000202.1', 'chr11_gl000202_random', 'GRCh37.p13', 40103); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (286, 'JH806603.1', null, 'GRCh37.p13', 182949); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (37, 'GL000203.1', 'chr17_gl000203_random', 'GRCh37.p13', 37498); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (38, 'GL000204.1', 'chr17_gl000204_random', 'GRCh37.p13', 81310); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (39, 'GL000205.1', 'chr17_gl000205_random', 'GRCh37.p13', 174588); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (40, 'GL000206.1', 'chr17_gl000206_random', 'GRCh37.p13', 41001); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (41, 'GL000207.1', 'chr18_gl000207_random', 'GRCh37.p13', 4262); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (42, 'GL000208.1', 'chr19_gl000208_random', 'GRCh37.p13', 92689); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (43, 'GL000209.1', 'chr19_gl000209_random', 'GRCh37.p13', 159169); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (44, 'GL000210.1', 'chr21_gl000210_random', 'GRCh37.p13', 27682); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (287, 'KB021648.1', null, 'GRCh37.p13', 469972); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (45, 'GL000211.1', 'chrUn_gl000211', 'GRCh37.p13', 166566); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (46, 'GL000212.1', 'chrUn_gl000212', 'GRCh37.p13', 186858); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (47, 'GL000213.1', 'chrUn_gl000213', 'GRCh37.p13', 164239); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (48, 'GL000214.1', 'chrUn_gl000214', 'GRCh37.p13', 137718); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (49, 'GL000215.1', 'chrUn_gl000215', 'GRCh37.p13', 172545); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (50, 'GL000216.1', 'chrUn_gl000216', 'GRCh37.p13', 172294); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (51, 'GL000217.1', 'chrUn_gl000217', 'GRCh37.p13', 172149); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (52, 'GL000218.1', 'chrUn_gl000218', 'GRCh37.p13', 161147); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (53, 'GL000219.1', 'chrUn_gl000219', 'GRCh37.p13', 179198); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (54, 'GL000220.1', 'chrUn_gl000220', 'GRCh37.p13', 161802); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (55, 'GL000221.1', 'chrUn_gl000221', 'GRCh37.p13', 155397); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (56, 'GL000222.1', 'chrUn_gl000222', 'GRCh37.p13', 186861); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (57, 'GL000223.1', 'chrUn_gl000223', 'GRCh37.p13', 180455); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (58, 'GL000224.1', 'chrUn_gl000224', 'GRCh37.p13', 179693); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (59, 'GL000225.1', 'chrUn_gl000225', 'GRCh37.p13', 211173); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (60, 'GL000226.1', 'chrUn_gl000226', 'GRCh37.p13', 15008); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (61, 'GL000227.1', 'chrUn_gl000227', 'GRCh37.p13', 128374); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (62, 'GL000228.1', 'chrUn_gl000228', 'GRCh37.p13', 129120); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (63, 'GL000229.1', 'chrUn_gl000229', 'GRCh37.p13', 19913); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (64, 'GL000230.1', 'chrUn_gl000230', 'GRCh37.p13', 43691); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (65, 'GL000231.1', 'chrUn_gl000231', 'GRCh37.p13', 27386); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (66, 'GL000232.1', 'chrUn_gl000232', 'GRCh37.p13', 40652); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (67, 'GL000233.1', 'chrUn_gl000233', 'GRCh37.p13', 45941); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (68, 'GL000234.1', 'chrUn_gl000234', 'GRCh37.p13', 40531); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (69, 'GL000235.1', 'chrUn_gl000235', 'GRCh37.p13', 34474); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (70, 'GL000236.1', 'chrUn_gl000236', 'GRCh37.p13', 41934); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (71, 'GL000237.1', 'chrUn_gl000237', 'GRCh37.p13', 45867); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (72, 'GL000238.1', 'chrUn_gl000238', 'GRCh37.p13', 39939); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (73, 'GL000239.1', 'chrUn_gl000239', 'GRCh37.p13', 33824); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (74, 'GL000240.1', 'chrUn_gl000240', 'GRCh37.p13', 41933); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (75, 'GL000241.1', 'chrUn_gl000241', 'GRCh37.p13', 42152); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (76, 'GL000242.1', 'chrUn_gl000242', 'GRCh37.p13', 43523); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (77, 'GL000243.1', 'chrUn_gl000243', 'GRCh37.p13', 43341); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (78, 'GL000244.1', 'chrUn_gl000244', 'GRCh37.p13', 39929); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (79, 'GL000245.1', 'chrUn_gl000245', 'GRCh37.p13', 36651); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (80, 'GL000246.1', 'chrUn_gl000246', 'GRCh37.p13', 38154); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (81, 'GL000247.1', 'chrUn_gl000247', 'GRCh37.p13', 36422); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (82, 'GL000248.1', 'chrUn_gl000248', 'GRCh37.p13', 39786); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (83, 'GL000249.1', 'chrUn_gl000249', 'GRCh37.p13', 38502); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (84, 'GL383516.1', null, 'GRCh37.p13', 49316); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (85, 'GL383517.1', null, 'GRCh37.p13', 49352); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (86, 'GL949741.1', null, 'GRCh37.p13', 151551); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (87, 'JH636052.4', null, 'GRCh37.p13', 7283150); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (88, 'JH636053.3', null, 'GRCh37.p13', 1676126); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (89, 'JH636054.1', null, 'GRCh37.p13', 758378); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (90, 'JH806573.1', null, 'GRCh37.p13', 24680); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (91, 'JH806574.2', null, 'GRCh37.p13', 22982); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (92, 'JH806575.1', null, 'GRCh37.p13', 47409); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (93, 'GL383518.1', null, 'GRCh37.p13', 182439); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (94, 'GL383519.1', null, 'GRCh37.p13', 110268); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (95, 'GL383520.1', null, 'GRCh37.p13', 366579); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (96, 'GL877870.2', null, 'GRCh37.p13', 66021); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (97, 'GL877871.1', null, 'GRCh37.p13', 389939); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (98, 'KB663603.1', null, 'GRCh37.p13', 599580); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (99, 'GL383521.1', null, 'GRCh37.p13', 143390); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (100, 'GL383522.1', null, 'GRCh37.p13', 123821); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (101, 'GL582966.2', null, 'GRCh37.p13', 96131); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (102, 'GL383523.1', null, 'GRCh37.p13', 171362); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (103, 'GL383524.1', null, 'GRCh37.p13', 78793); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (104, 'GL383525.1', null, 'GRCh37.p13', 65063); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (105, 'JH159131.1', null, 'GRCh37.p13', 393769); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (106, 'JH159132.1', null, 'GRCh37.p13', 100694); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (107, 'KE332495.1', null, 'GRCh37.p13', 263861); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (108, 'GL383526.1', null, 'GRCh37.p13', 180671); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (109, 'JH636055.1', null, 'GRCh37.p13', 173151); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (110, 'GL582967.1', null, 'GRCh37.p13', 248177); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (111, 'GL877872.1', null, 'GRCh37.p13', 297485); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (112, 'KE332496.1', null, 'GRCh37.p13', 503215); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (113, 'GL383527.1', null, 'GRCh37.p13', 164536); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (114, 'GL383528.1', null, 'GRCh37.p13', 376187); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (115, 'GL383529.1', null, 'GRCh37.p13', 121345); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (116, 'JH159133.1', null, 'GRCh37.p13', 266316); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (117, 'KE332497.1', null, 'GRCh37.p13', 543325); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (118, 'GL339449.2', null, 'GRCh37.p13', 1612928); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (119, 'GL383530.1', null, 'GRCh37.p13', 101241); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (120, 'GL383531.1', null, 'GRCh37.p13', 173459); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (121, 'GL383532.1', null, 'GRCh37.p13', 82728); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (122, 'GL949742.1', null, 'GRCh37.p13', 226852); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (123, 'JH636056.1', null, 'GRCh37.p13', 262912); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (124, 'JH636057.1', null, 'GRCh37.p13', 200195); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (125, 'JH806576.1', null, 'GRCh37.p13', 273386); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (126, 'KB663604.1', null, 'GRCh37.p13', 478993); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (127, 'KE332498.1', null, 'GRCh37.p13', 149443); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (128, 'GL383533.1', null, 'GRCh37.p13', 124736); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (129, 'KB021644.1', null, 'GRCh37.p13', 187824); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (130, 'GL582968.1', null, 'GRCh37.p13', 356330); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (131, 'GL582969.1', null, 'GRCh37.p13', 251823); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (132, 'GL582970.1', null, 'GRCh37.p13', 354970); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (133, 'GL582971.1', null, 'GRCh37.p13', 1284284); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (134, 'GL582972.1', null, 'GRCh37.p13', 327774); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (135, 'JH159134.2', null, 'GRCh37.p13', 3821770); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (136, 'JH636058.1', null, 'GRCh37.p13', 716227); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (137, 'KE332499.1', null, 'GRCh37.p13', 274521); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (138, 'GL383534.2', null, 'GRCh37.p13', 119183); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (139, 'GL383535.1', null, 'GRCh37.p13', 429806); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (140, 'GL383536.1', null, 'GRCh37.p13', 203777); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (141, 'GL949743.1', null, 'GRCh37.p13', 608579); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (142, 'JH159135.2', null, 'GRCh37.p13', 102251); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (143, 'KE332500.1', null, 'GRCh37.p13', 228602); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (144, 'GL339450.1', null, 'GRCh37.p13', 330164); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (145, 'GL383537.1', null, 'GRCh37.p13', 62435); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (146, 'GL383538.1', null, 'GRCh37.p13', 49281); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (147, 'JH636059.1', null, 'GRCh37.p13', 295379); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (148, 'JH806577.1', null, 'GRCh37.p13', 22394); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (149, 'JH806578.1', null, 'GRCh37.p13', 169437); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (150, 'JH806579.1', null, 'GRCh37.p13', 211307); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (151, 'KB663605.1', null, 'GRCh37.p13', 155926); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (152, 'GL383539.1', null, 'GRCh37.p13', 162988); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (153, 'GL383540.1', null, 'GRCh37.p13', 71551); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (154, 'GL383541.1', null, 'GRCh37.p13', 171286); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (155, 'GL383542.1', null, 'GRCh37.p13', 60032); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (156, 'GL383543.1', null, 'GRCh37.p13', 392792); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (157, 'GL383544.1', null, 'GRCh37.p13', 128378); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (158, 'GL877873.1', null, 'GRCh37.p13', 168465); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (159, 'JH591181.2', null, 'GRCh37.p13', 2281126); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (160, 'JH591182.1', null, 'GRCh37.p13', 196262); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (161, 'JH591183.1', null, 'GRCh37.p13', 177920); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (162, 'JH636060.1', null, 'GRCh37.p13', 437946); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (163, 'JH806580.1', null, 'GRCh37.p13', 93149); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (164, 'KB663606.1', null, 'GRCh37.p13', 305900); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (165, 'KE332501.1', null, 'GRCh37.p13', 1020827); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (166, 'GL383545.1', null, 'GRCh37.p13', 179254); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (167, 'GL383546.1', null, 'GRCh37.p13', 309802); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (168, 'GL582973.1', null, 'GRCh37.p13', 321004); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (169, 'GL949744.1', null, 'GRCh37.p13', 276448); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (170, 'JH159138.1', null, 'GRCh37.p13', 108875); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (171, 'JH159139.1', null, 'GRCh37.p13', 120441); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (172, 'JH159140.1', null, 'GRCh37.p13', 546435); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (173, 'JH159141.2', null, 'GRCh37.p13', 240775); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (174, 'JH159142.2', null, 'GRCh37.p13', 326647); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (175, 'JH159143.1', null, 'GRCh37.p13', 191402); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (176, 'JH591184.1', null, 'GRCh37.p13', 462282); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (177, 'JH591185.1', null, 'GRCh37.p13', 167437); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (178, 'JH720443.2', null, 'GRCh37.p13', 408430); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (179, 'JH806581.1', null, 'GRCh37.p13', 872115); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (180, 'GL383547.1', null, 'GRCh37.p13', 154407); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (181, 'JH159136.1', null, 'GRCh37.p13', 200998); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (182, 'JH159137.1', null, 'GRCh37.p13', 191409); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (183, 'GL383548.1', null, 'GRCh37.p13', 165247); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (184, 'GL582974.1', null, 'GRCh37.p13', 163298); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (185, 'JH720444.2', null, 'GRCh37.p13', 273128); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (186, 'KB663607.2', null, 'GRCh37.p13', 334922); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (187, 'GL383549.1', null, 'GRCh37.p13', 120804); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (188, 'GL383550.1', null, 'GRCh37.p13', 169178); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (189, 'GL383551.1', null, 'GRCh37.p13', 184319); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (190, 'GL383552.1', null, 'GRCh37.p13', 138655); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (191, 'GL383553.2', null, 'GRCh37.p13', 152874); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (192, 'GL877875.1', null, 'GRCh37.p13', 167313); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (193, 'GL877876.1', null, 'GRCh37.p13', 408271); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (194, 'GL949745.1', null, 'GRCh37.p13', 372609); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (195, 'GL582975.1', null, 'GRCh37.p13', 34662); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (196, 'KB021645.1', null, 'GRCh37.p13', 1523386); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (197, 'JH720445.1', null, 'GRCh37.p13', 170033); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (198, 'GL383554.1', null, 'GRCh37.p13', 296527); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (199, 'GL383555.1', null, 'GRCh37.p13', 388773); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (200, 'JH720446.1', null, 'GRCh37.p13', 97345); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (201, 'GL383556.1', null, 'GRCh37.p13', 192462); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (202, 'GL383557.1', null, 'GRCh37.p13', 89672); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (203, 'GL383558.1', null, 'GRCh37.p13', 457041); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (204, 'GL383559.2', null, 'GRCh37.p13', 338640); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (205, 'GL383560.1', null, 'GRCh37.p13', 534288); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (206, 'GL383561.2', null, 'GRCh37.p13', 644425); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (207, 'GL383562.1', null, 'GRCh37.p13', 45551); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (208, 'GL582976.1', null, 'GRCh37.p13', 412535); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (209, 'JH159144.1', null, 'GRCh37.p13', 388340); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (210, 'JH159145.1', null, 'GRCh37.p13', 194862); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (211, 'JH591186.1', null, 'GRCh37.p13', 376223); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (212, 'JH636061.1', null, 'GRCh37.p13', 186059); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (213, 'JH720447.1', null, 'GRCh37.p13', 454385); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (214, 'JH806582.2', null, 'GRCh37.p13', 342635); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (215, 'KB021646.2', null, 'GRCh37.p13', 211416); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (216, 'KE332502.1', null, 'GRCh37.p13', 341712); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (217, 'GL383563.2', null, 'GRCh37.p13', 270261); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (218, 'GL383564.1', null, 'GRCh37.p13', 133151); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (219, 'GL383565.1', null, 'GRCh37.p13', 223995); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (220, 'GL383566.1', null, 'GRCh37.p13', 90219); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (221, 'JH159146.1', null, 'GRCh37.p13', 278131); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (250, 'KE332506.1', null, 'GRCh37.p13', 307252); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (251, 'GL383578.1', null, 'GRCh37.p13', 63917); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (252, 'GL383579.1', null, 'GRCh37.p13', 201198); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (253, 'GL383580.1', null, 'GRCh37.p13', 74652); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (254, 'GL383581.1', null, 'GRCh37.p13', 116690); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (255, 'JH720449.1', null, 'GRCh37.p13', 212298); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (256, 'JH806583.1', null, 'GRCh37.p13', 167183); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (257, 'JH806584.1', null, 'GRCh37.p13', 70876); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (258, 'JH806585.1', null, 'GRCh37.p13', 73505); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (259, 'JH806586.1', null, 'GRCh37.p13', 43543); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (260, 'GL383582.2', null, 'GRCh37.p13', 162811); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (261, 'GL383583.1', null, 'GRCh37.p13', 96924); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (262, 'KB663609.1', null, 'GRCh37.p13', 74013); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (263, 'GL877877.2', null, 'GRCh37.p13', 284527); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (264, 'JH159150.3', null, 'GRCh37.p13', 3110903); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (265, 'JH720451.1', null, 'GRCh37.p13', 898979); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (266, 'JH720452.1', null, 'GRCh37.p13', 522319); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (267, 'JH720453.1', null, 'GRCh37.p13', 1461188); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (268, 'JH720454.3', null, 'GRCh37.p13', 752267); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (269, 'JH720455.1', null, 'GRCh37.p13', 65034); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (270, 'JH806587.1', null, 'GRCh37.p13', 4110759); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (271, 'JH806588.1', null, 'GRCh37.p13', 862483); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (272, 'JH806589.1', null, 'GRCh37.p13', 270630); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (273, 'JH806590.2', null, 'GRCh37.p13', 2418393); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (274, 'JH806591.1', null, 'GRCh37.p13', 882083); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (275, 'JH806592.1', null, 'GRCh37.p13', 835911); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (276, 'JH806593.1', null, 'GRCh37.p13', 389631); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (277, 'JH806594.1', null, 'GRCh37.p13', 390496); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (278, 'JH806595.1', null, 'GRCh37.p13', 444074); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (279, 'JH806596.1', null, 'GRCh37.p13', 413927); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (280, 'JH806597.1', null, 'GRCh37.p13', 1045622); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (281, 'JH806598.1', null, 'GRCh37.p13', 899320); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (282, 'JH806599.1', null, 'GRCh37.p13', 1214327); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (283, 'JH806600.2', null, 'GRCh37.p13', 6530008); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (284, 'JH806601.1', null, 'GRCh37.p13', 1389764); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (285, 'JH806602.1', null, 'GRCh37.p13', 713266); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (288, 'GL000250.1', 'chr6_apd_hap1', 'GRCh37.p13', 4622290); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (289, 'GL000251.1', 'chr6_cox_hap2', 'GRCh37.p13', 4795371); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (290, 'GL000252.1', 'chr6_dbb_hap3', 'GRCh37.p13', 4610396); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (291, 'GL000253.1', 'chr6_mann_hap4', 'GRCh37.p13', 4683263); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (292, 'GL000254.1', 'chr6_mcf_hap5', 'GRCh37.p13', 4833398); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (293, 'GL000255.1', 'chr6_qbl_hap6', 'GRCh37.p13', 4611984); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (294, 'GL000256.1', 'chr6_ssto_hap7', 'GRCh37.p13', 4928567); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (295, 'GL000257.1', 'chr4_ctg9_hap1', 'GRCh37.p13', 590426); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (296, 'GL000258.1', 'chr17_ctg5_hap1', 'GRCh37.p13', 1680828); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (297, 'J01415.2', 'chrM', 'GRCh37.p13', 16569); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (655, 'MU273358.1', 'chr7_MU273358v1_alt', 'GRCh38.p14', 464417); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (656, 'KV880766.1', 'chr8_KV880766v1_fix', 'GRCh38.p14', 156998); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (657, 'KV880767.1', 'chr8_KV880767v1_fix', 'GRCh38.p14', 265876); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (658, 'KZ208914.1', 'chr8_KZ208914v1_fix', 'GRCh38.p14', 165120); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (659, 'KZ208915.1', 'chr8_KZ208915v1_fix', 'GRCh38.p14', 6367528); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (660, 'MU273359.1', 'chr8_MU273359v1_fix', 'GRCh38.p14', 150302); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (661, 'MU273360.1', 'chr8_MU273360v1_fix', 'GRCh38.p14', 39290); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (662, 'MU273361.1', 'chr8_MU273361v1_fix', 'GRCh38.p14', 106905); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (663, 'MU273362.1', 'chr8_MU273362v1_fix', 'GRCh38.p14', 429744); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (664, 'MU273363.1', 'chr8_MU273363v1_fix', 'GRCh38.p14', 207371); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (665, 'KZ559107.1', 'chr8_KZ559107v1_alt', 'GRCh38.p14', 103072); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (666, 'KN196479.1', 'chr9_KN196479v1_fix', 'GRCh38.p14', 330164); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (667, 'ML143353.1', 'chr9_ML143353v1_fix', 'GRCh38.p14', 25408); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (668, 'MU273364.1', 'chr9_MU273364v1_fix', 'GRCh38.p14', 340717); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (669, 'MU273365.1', 'chr9_MU273365v1_fix', 'GRCh38.p14', 482250); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (670, 'MU273366.1', 'chr9_MU273366v1_fix', 'GRCh38.p14', 569668); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (671, 'KQ090018.1', 'chr9_KQ090018v1_alt', 'GRCh38.p14', 163882); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (672, 'KQ090019.1', 'chr9_KQ090019v1_alt', 'GRCh38.p14', 134099); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (673, 'KN196480.1', 'chr10_KN196480v1_fix', 'GRCh38.p14', 277797); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (674, 'KN538365.1', 'chr10_KN538365v1_fix', 'GRCh38.p14', 14347); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (675, 'KN538366.1', 'chr10_KN538366v1_fix', 'GRCh38.p14', 85284); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (676, 'KN538367.1', 'chr10_KN538367v1_fix', 'GRCh38.p14', 420164); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (677, 'KQ090021.1', 'chr10_KQ090021v1_fix', 'GRCh38.p14', 264545); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (678, 'ML143354.1', 'chr10_ML143354v1_fix', 'GRCh38.p14', 454963); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (679, 'ML143355.1', 'chr10_ML143355v1_fix', 'GRCh38.p14', 292944); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (680, 'MU273367.1', 'chr10_MU273367v1_fix', 'GRCh38.p14', 196262); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (681, 'KQ090020.1', 'chr10_KQ090020v1_alt', 'GRCh38.p14', 185507); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (682, 'KN196481.1', 'chr11_KN196481v1_fix', 'GRCh38.p14', 108875); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (683, 'KQ090022.1', 'chr11_KQ090022v1_fix', 'GRCh38.p14', 181958); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (684, 'KQ759759.2', 'chr11_KQ759759v2_fix', 'GRCh38.p14', 204999); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (685, 'KV766195.1', 'chr11_KV766195v1_fix', 'GRCh38.p14', 140877); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (686, 'KZ559108.1', 'chr11_KZ559108v1_fix', 'GRCh38.p14', 305244); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (687, 'KZ559109.1', 'chr11_KZ559109v1_fix', 'GRCh38.p14', 279644); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (688, 'ML143356.1', 'chr11_ML143356v1_fix', 'GRCh38.p14', 45257); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (689, 'ML143357.1', 'chr11_ML143357v1_fix', 'GRCh38.p14', 165419); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (690, 'ML143358.1', 'chr11_ML143358v1_fix', 'GRCh38.p14', 270122); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (691, 'ML143359.1', 'chr11_ML143359v1_fix', 'GRCh38.p14', 217075); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (692, 'ML143360.1', 'chr11_ML143360v1_fix', 'GRCh38.p14', 170928); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (693, 'MU273369.1', 'chr11_MU273369v1_fix', 'GRCh38.p14', 434831); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (694, 'MU273370.1', 'chr11_MU273370v1_fix', 'GRCh38.p14', 344606); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (695, 'MU273371.1', 'chr11_MU273371v1_fix', 'GRCh38.p14', 122722); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (696, 'KN538368.1', 'chr11_KN538368v1_alt', 'GRCh38.p14', 203552); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (697, 'KZ559110.1', 'chr11_KZ559110v1_alt', 'GRCh38.p14', 301637); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (698, 'KZ559111.1', 'chr11_KZ559111v1_alt', 'GRCh38.p14', 181167); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (699, 'MU273368.1', 'chr11_MU273368v1_alt', 'GRCh38.p14', 261194); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (700, 'KN196482.1', 'chr12_KN196482v1_fix', 'GRCh38.p14', 211377); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (701, 'KN538369.1', 'chr12_KN538369v1_fix', 'GRCh38.p14', 541038); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (702, 'KN538370.1', 'chr12_KN538370v1_fix', 'GRCh38.p14', 86533); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (703, 'KQ759760.1', 'chr12_KQ759760v1_fix', 'GRCh38.p14', 315610); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (704, 'KZ208916.1', 'chr12_KZ208916v1_fix', 'GRCh38.p14', 1046838); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (705, 'KZ208917.1', 'chr12_KZ208917v1_fix', 'GRCh38.p14', 64689); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (706, 'ML143361.1', 'chr12_ML143361v1_fix', 'GRCh38.p14', 297568); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (707, 'ML143362.1', 'chr12_ML143362v1_fix', 'GRCh38.p14', 192531); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (708, 'MU273372.1', 'chr12_MU273372v1_fix', 'GRCh38.p14', 104537); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (709, 'KQ090023.1', 'chr12_KQ090023v1_alt', 'GRCh38.p14', 109323); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (710, 'KZ208918.1', 'chr12_KZ208918v1_alt', 'GRCh38.p14', 174808); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (711, 'KZ559112.1', 'chr12_KZ559112v1_alt', 'GRCh38.p14', 154139); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (712, 'KN196483.1', 'chr13_KN196483v1_fix', 'GRCh38.p14', 35455); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (713, 'KN538371.1', 'chr13_KN538371v1_fix', 'GRCh38.p14', 206320); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (714, 'KN538372.1', 'chr13_KN538372v1_fix', 'GRCh38.p14', 356766); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (715, 'KN538373.1', 'chr13_KN538373v1_fix', 'GRCh38.p14', 148762); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (716, 'ML143363.1', 'chr13_ML143363v1_fix', 'GRCh38.p14', 7309); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (717, 'ML143364.1', 'chr13_ML143364v1_fix', 'GRCh38.p14', 158944); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (718, 'ML143365.1', 'chr13_ML143365v1_fix', 'GRCh38.p14', 65394); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (719, 'ML143366.1', 'chr13_ML143366v1_fix', 'GRCh38.p14', 409912); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (720, 'KQ090024.1', 'chr13_KQ090024v1_alt', 'GRCh38.p14', 168146); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (721, 'KQ090025.1', 'chr13_KQ090025v1_alt', 'GRCh38.p14', 123480); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (722, 'KZ208920.1', 'chr14_KZ208920v1_fix', 'GRCh38.p14', 690932); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (723, 'ML143367.1', 'chr14_ML143367v1_fix', 'GRCh38.p14', 399183); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (724, 'MU273373.1', 'chr14_MU273373v1_fix', 'GRCh38.p14', 722645); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (725, 'KZ208919.1', 'chr14_KZ208919v1_alt', 'GRCh38.p14', 171798); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (726, 'ML143368.1', 'chr14_ML143368v1_alt', 'GRCh38.p14', 264228); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (727, 'KN538374.1', 'chr15_KN538374v1_fix', 'GRCh38.p14', 4998962); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (728, 'ML143369.1', 'chr15_ML143369v1_fix', 'GRCh38.p14', 97763); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (729, 'ML143370.1', 'chr15_ML143370v1_fix', 'GRCh38.p14', 369264); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (730, 'ML143371.1', 'chr15_ML143371v1_fix', 'GRCh38.p14', 5500449); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (731, 'ML143372.1', 'chr15_ML143372v1_fix', 'GRCh38.p14', 396515); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (732, 'MU273374.1', 'chr15_MU273374v1_fix', 'GRCh38.p14', 1154574); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (733, 'KQ031389.1', 'chr15_KQ031389v1_alt', 'GRCh38.p14', 2365364); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (734, 'MU273375.1', 'chr15_MU273375v1_alt', 'GRCh38.p14', 204007); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (735, 'KV880768.1', 'chr16_KV880768v1_fix', 'GRCh38.p14', 1927115); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (736, 'KZ559113.1', 'chr16_KZ559113v1_fix', 'GRCh38.p14', 480415); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (737, 'ML143373.1', 'chr16_ML143373v1_fix', 'GRCh38.p14', 270967); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (738, 'MU273376.1', 'chr16_MU273376v1_fix', 'GRCh38.p14', 87715); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (739, 'MU273377.1', 'chr16_MU273377v1_fix', 'GRCh38.p14', 334997); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (740, 'KQ031390.1', 'chr16_KQ031390v1_alt', 'GRCh38.p14', 169136); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (741, 'KQ090026.1', 'chr16_KQ090026v1_alt', 'GRCh38.p14', 59016); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (742, 'KQ090027.1', 'chr16_KQ090027v1_alt', 'GRCh38.p14', 267463); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (743, 'KZ208921.1', 'chr16_KZ208921v1_alt', 'GRCh38.p14', 78609); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (744, 'KV575245.1', 'chr17_KV575245v1_fix', 'GRCh38.p14', 154723); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (745, 'KV766196.1', 'chr17_KV766196v1_fix', 'GRCh38.p14', 281919); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (746, 'ML143374.1', 'chr17_ML143374v1_fix', 'GRCh38.p14', 137908); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (747, 'ML143375.1', 'chr17_ML143375v1_fix', 'GRCh38.p14', 56695); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (748, 'MU273379.1', 'chr17_MU273379v1_fix', 'GRCh38.p14', 234878); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (749, 'MU273380.1', 'chr17_MU273380v1_fix', 'GRCh38.p14', 538541); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (750, 'MU273381.1', 'chr17_MU273381v1_fix', 'GRCh38.p14', 144689); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (751, 'MU273382.1', 'chr17_MU273382v1_fix', 'GRCh38.p14', 187626); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (752, 'MU273383.1', 'chr17_MU273383v1_fix', 'GRCh38.p14', 172609); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (753, 'KV766197.1', 'chr17_KV766197v1_alt', 'GRCh38.p14', 246895); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (754, 'KV766198.1', 'chr17_KV766198v1_alt', 'GRCh38.p14', 276292); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (755, 'KZ559114.1', 'chr17_KZ559114v1_alt', 'GRCh38.p14', 116753); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (756, 'MU273378.1', 'chr17_MU273378v1_alt', 'GRCh38.p14', 372839); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (757, 'KQ090028.1', 'chr18_KQ090028v1_fix', 'GRCh38.p14', 407387); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (758, 'KZ208922.1', 'chr18_KZ208922v1_fix', 'GRCh38.p14', 93070); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (759, 'KZ559115.1', 'chr18_KZ559115v1_fix', 'GRCh38.p14', 230843); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (760, 'KQ458385.1', 'chr18_KQ458385v1_alt', 'GRCh38.p14', 205101); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (761, 'KZ559116.1', 'chr18_KZ559116v1_alt', 'GRCh38.p14', 163186); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (762, 'KN196484.1', 'chr19_KN196484v1_fix', 'GRCh38.p14', 370917); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (763, 'KQ458386.1', 'chr19_KQ458386v1_fix', 'GRCh38.p14', 405389); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (764, 'ML143376.1', 'chr19_ML143376v1_fix', 'GRCh38.p14', 493165); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (765, 'MU273384.1', 'chr19_MU273384v1_fix', 'GRCh38.p14', 333754); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (766, 'MU273385.1', 'chr19_MU273385v1_fix', 'GRCh38.p14', 137818); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (767, 'MU273386.1', 'chr19_MU273386v1_fix', 'GRCh38.p14', 226166); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (768, 'KV575246.1', 'chr19_KV575246v1_alt', 'GRCh38.p14', 163926); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (769, 'KV575247.1', 'chr19_KV575247v1_alt', 'GRCh38.p14', 170206); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (815, 'KV575248.1', 'chr19_KV575248v1_alt', 'GRCh38.p14', 168131); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (816, 'KV575249.1', 'chr19_KV575249v1_alt', 'GRCh38.p14', 293522); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (817, 'KV575250.1', 'chr19_KV575250v1_alt', 'GRCh38.p14', 241058); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (818, 'KV575251.1', 'chr19_KV575251v1_alt', 'GRCh38.p14', 159285); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (819, 'KV575252.1', 'chr19_KV575252v1_alt', 'GRCh38.p14', 178197); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (820, 'KV575253.1', 'chr19_KV575253v1_alt', 'GRCh38.p14', 166713); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (821, 'KV575254.1', 'chr19_KV575254v1_alt', 'GRCh38.p14', 99845); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (822, 'KV575255.1', 'chr19_KV575255v1_alt', 'GRCh38.p14', 161095); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (823, 'KV575256.1', 'chr19_KV575256v1_alt', 'GRCh38.p14', 223118); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (824, 'KV575257.1', 'chr19_KV575257v1_alt', 'GRCh38.p14', 100553); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (825, 'KV575258.1', 'chr19_KV575258v1_alt', 'GRCh38.p14', 156965); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (826, 'KV575259.1', 'chr19_KV575259v1_alt', 'GRCh38.p14', 171263); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (827, 'KV575260.1', 'chr19_KV575260v1_alt', 'GRCh38.p14', 145691); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (828, 'MU273387.1', 'chr19_MU273387v1_alt', 'GRCh38.p14', 89211); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (829, 'MU273388.1', 'chr20_MU273388v1_fix', 'GRCh38.p14', 273725); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (830, 'MU273389.1', 'chr20_MU273389v1_fix', 'GRCh38.p14', 355731); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (831, 'ML143377.1', 'chr21_ML143377v1_fix', 'GRCh38.p14', 519485); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (832, 'MU273390.1', 'chr21_MU273390v1_fix', 'GRCh38.p14', 336752); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (833, 'MU273391.1', 'chr21_MU273391v1_fix', 'GRCh38.p14', 1020778); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (834, 'MU273392.1', 'chr21_MU273392v1_fix', 'GRCh38.p14', 189707); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (835, 'KQ759762.2', 'chr22_KQ759762v2_fix', 'GRCh38.p14', 101040); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (836, 'ML143378.1', 'chr22_ML143378v1_fix', 'GRCh38.p14', 461303); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (837, 'ML143379.1', 'chr22_ML143379v1_fix', 'GRCh38.p14', 12295); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (838, 'ML143380.1', 'chr22_ML143380v1_fix', 'GRCh38.p14', 412368); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (839, 'KN196485.1', 'chr22_KN196485v1_alt', 'GRCh38.p14', 156562); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (840, 'KN196486.1', 'chr22_KN196486v1_alt', 'GRCh38.p14', 153027); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (841, 'KQ458387.1', 'chr22_KQ458387v1_alt', 'GRCh38.p14', 155930); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (842, 'KQ458388.1', 'chr22_KQ458388v1_alt', 'GRCh38.p14', 174749); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (843, 'KQ759761.1', 'chr22_KQ759761v1_alt', 'GRCh38.p14', 145162); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (844, 'ML143381.1', 'chrX_ML143381v1_fix', 'GRCh38.p14', 403128); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (845, 'ML143382.1', 'chrX_ML143382v1_fix', 'GRCh38.p14', 28824); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (846, 'ML143383.1', 'chrX_ML143383v1_fix', 'GRCh38.p14', 68192); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (847, 'ML143384.1', 'chrX_ML143384v1_fix', 'GRCh38.p14', 14678); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (848, 'ML143385.1', 'chrX_ML143385v1_fix', 'GRCh38.p14', 17435); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (849, 'MU273393.1', 'chrX_MU273393v1_fix', 'GRCh38.p14', 68810); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (850, 'MU273394.1', 'chrX_MU273394v1_fix', 'GRCh38.p14', 140567); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (851, 'KV766199.1', 'chrX_KV766199v1_alt', 'GRCh38.p14', 188004); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (852, 'MU273395.1', 'chrX_MU273395v1_alt', 'GRCh38.p14', 619716); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (853, 'MU273396.1', 'chrX_MU273396v1_alt', 'GRCh38.p14', 294119); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (854, 'MU273397.1', 'chrX_MU273397v1_alt', 'GRCh38.p14', 330493); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (855, 'KN196487.1', 'chrY_KN196487v1_fix', 'GRCh38.p14', 101150); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (856, 'KZ208923.1', 'chrY_KZ208923v1_fix', 'GRCh38.p14', 48370); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (857, 'KZ208924.1', 'chrY_KZ208924v1_fix', 'GRCh38.p14', 209722); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (858, 'MU273398.1', 'chrY_MU273398v1_fix', 'GRCh38.p14', 865743); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (861, 'GL383520.2', 'chr1_GL383520v2_alt', 'GRCh38.p14', 366580); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (862, 'KI270759.1', 'chr1_KI270759v1_alt', 'GRCh38.p14', 425601); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (863, 'KI270760.1', 'chr1_KI270760v1_alt', 'GRCh38.p14', 109528); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (864, 'KI270761.1', 'chr1_KI270761v1_alt', 'GRCh38.p14', 165834); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (865, 'KI270762.1', 'chr1_KI270762v1_alt', 'GRCh38.p14', 354444); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (866, 'KI270763.1', 'chr1_KI270763v1_alt', 'GRCh38.p14', 911658); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (867, 'KI270764.1', 'chr1_KI270764v1_alt', 'GRCh38.p14', 50258); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (868, 'KI270765.1', 'chr1_KI270765v1_alt', 'GRCh38.p14', 185285); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (869, 'KI270766.1', 'chr1_KI270766v1_alt', 'GRCh38.p14', 256271); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (873, 'KI270767.1', 'chr2_KI270767v1_alt', 'GRCh38.p14', 161578); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (874, 'KI270768.1', 'chr2_KI270768v1_alt', 'GRCh38.p14', 110099); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (875, 'KI270769.1', 'chr2_KI270769v1_alt', 'GRCh38.p14', 120616); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (876, 'KI270770.1', 'chr2_KI270770v1_alt', 'GRCh38.p14', 136240); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (877, 'KI270771.1', 'chr2_KI270771v1_alt', 'GRCh38.p14', 110395); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (878, 'KI270772.1', 'chr2_KI270772v1_alt', 'GRCh38.p14', 133041); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (879, 'KI270773.1', 'chr2_KI270773v1_alt', 'GRCh38.p14', 70887); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (880, 'KI270774.1', 'chr2_KI270774v1_alt', 'GRCh38.p14', 223625); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (881, 'KI270775.1', 'chr2_KI270775v1_alt', 'GRCh38.p14', 138019); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (882, 'KI270776.1', 'chr2_KI270776v1_alt', 'GRCh38.p14', 174166); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (884, 'JH636055.2', 'chr3_JH636055v2_alt', 'GRCh38.p14', 173151); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (885, 'KI270777.1', 'chr3_KI270777v1_alt', 'GRCh38.p14', 173649); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (886, 'KI270778.1', 'chr3_KI270778v1_alt', 'GRCh38.p14', 248252); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (887, 'KI270779.1', 'chr3_KI270779v1_alt', 'GRCh38.p14', 205312); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (888, 'KI270780.1', 'chr3_KI270780v1_alt', 'GRCh38.p14', 224108); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (889, 'KI270781.1', 'chr3_KI270781v1_alt', 'GRCh38.p14', 113034); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (890, 'KI270782.1', 'chr3_KI270782v1_alt', 'GRCh38.p14', 162429); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (891, 'KI270783.1', 'chr3_KI270783v1_alt', 'GRCh38.p14', 109187); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (892, 'KI270784.1', 'chr3_KI270784v1_alt', 'GRCh38.p14', 184404); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (893, 'GL000257.2', 'chr4_GL000257v2_alt', 'GRCh38.p14', 586476); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (896, 'KI270785.1', 'chr4_KI270785v1_alt', 'GRCh38.p14', 119912); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (897, 'KI270786.1', 'chr4_KI270786v1_alt', 'GRCh38.p14', 244096); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (898, 'KI270787.1', 'chr4_KI270787v1_alt', 'GRCh38.p14', 111943); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (899, 'KI270788.1', 'chr4_KI270788v1_alt', 'GRCh38.p14', 158965); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (900, 'KI270789.1', 'chr4_KI270789v1_alt', 'GRCh38.p14', 205944); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (901, 'KI270790.1', 'chr4_KI270790v1_alt', 'GRCh38.p14', 220246); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (907, 'KI270791.1', 'chr5_KI270791v1_alt', 'GRCh38.p14', 195710); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (908, 'KI270792.1', 'chr5_KI270792v1_alt', 'GRCh38.p14', 179043); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (909, 'KI270793.1', 'chr5_KI270793v1_alt', 'GRCh38.p14', 126136); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (910, 'KI270794.1', 'chr5_KI270794v1_alt', 'GRCh38.p14', 164558); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (911, 'KI270795.1', 'chr5_KI270795v1_alt', 'GRCh38.p14', 131892); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (912, 'KI270796.1', 'chr5_KI270796v1_alt', 'GRCh38.p14', 172708); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (913, 'GL000250.2', 'chr6_GL000250v2_alt', 'GRCh38.p14', 4672374); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (923, 'KB021644.2', 'chr6_KB021644v2_alt', 'GRCh38.p14', 185823); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (924, 'KI270797.1', 'chr6_KI270797v1_alt', 'GRCh38.p14', 197536); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (925, 'KI270798.1', 'chr6_KI270798v1_alt', 'GRCh38.p14', 271782); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (926, 'KI270799.1', 'chr6_KI270799v1_alt', 'GRCh38.p14', 152148); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (927, 'KI270800.1', 'chr6_KI270800v1_alt', 'GRCh38.p14', 175808); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (928, 'KI270801.1', 'chr6_KI270801v1_alt', 'GRCh38.p14', 870480); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (929, 'KI270802.1', 'chr6_KI270802v1_alt', 'GRCh38.p14', 75005); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (931, 'KI270803.1', 'chr7_KI270803v1_alt', 'GRCh38.p14', 1111570); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (932, 'KI270804.1', 'chr7_KI270804v1_alt', 'GRCh38.p14', 157952); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (933, 'KI270805.1', 'chr7_KI270805v1_alt', 'GRCh38.p14', 209988); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (934, 'KI270806.1', 'chr7_KI270806v1_alt', 'GRCh38.p14', 158166); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (935, 'KI270807.1', 'chr7_KI270807v1_alt', 'GRCh38.p14', 126434); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (936, 'KI270808.1', 'chr7_KI270808v1_alt', 'GRCh38.p14', 271455); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (937, 'KI270809.1', 'chr7_KI270809v1_alt', 'GRCh38.p14', 209586); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (938, 'KI270810.1', 'chr8_KI270810v1_alt', 'GRCh38.p14', 374415); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (939, 'KI270811.1', 'chr8_KI270811v1_alt', 'GRCh38.p14', 292436); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (940, 'KI270812.1', 'chr8_KI270812v1_alt', 'GRCh38.p14', 282736); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (941, 'KI270813.1', 'chr8_KI270813v1_alt', 'GRCh38.p14', 300230); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (942, 'KI270814.1', 'chr8_KI270814v1_alt', 'GRCh38.p14', 141812); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (943, 'KI270815.1', 'chr8_KI270815v1_alt', 'GRCh38.p14', 132244); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (944, 'KI270816.1', 'chr8_KI270816v1_alt', 'GRCh38.p14', 305841); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (945, 'KI270817.1', 'chr8_KI270817v1_alt', 'GRCh38.p14', 158983); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (946, 'KI270818.1', 'chr8_KI270818v1_alt', 'GRCh38.p14', 145606); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (947, 'KI270819.1', 'chr8_KI270819v1_alt', 'GRCh38.p14', 133535); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (948, 'KI270820.1', 'chr8_KI270820v1_alt', 'GRCh38.p14', 36640); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (949, 'KI270821.1', 'chr8_KI270821v1_alt', 'GRCh38.p14', 985506); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (950, 'KI270822.1', 'chr8_KI270822v1_alt', 'GRCh38.p14', 624492); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (955, 'KI270823.1', 'chr9_KI270823v1_alt', 'GRCh38.p14', 439082); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (958, 'KI270824.1', 'chr10_KI270824v1_alt', 'GRCh38.p14', 181496); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (962, 'KI270826.1', 'chr11_KI270826v1_alt', 'GRCh38.p14', 186169); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (963, 'KI270827.1', 'chr11_KI270827v1_alt', 'GRCh38.p14', 67707); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (964, 'KI270829.1', 'chr11_KI270829v1_alt', 'GRCh38.p14', 204059); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (965, 'KI270830.1', 'chr11_KI270830v1_alt', 'GRCh38.p14', 177092); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (966, 'KI270831.1', 'chr11_KI270831v1_alt', 'GRCh38.p14', 296895); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (967, 'KI270832.1', 'chr11_KI270832v1_alt', 'GRCh38.p14', 210133); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (969, 'GL383550.2', 'chr12_GL383550v2_alt', 'GRCh38.p14', 169178); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (975, 'KI270833.1', 'chr12_KI270833v1_alt', 'GRCh38.p14', 76061); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (976, 'KI270834.1', 'chr12_KI270834v1_alt', 'GRCh38.p14', 119498); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (977, 'KI270835.1', 'chr12_KI270835v1_alt', 'GRCh38.p14', 238139); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (978, 'KI270836.1', 'chr12_KI270836v1_alt', 'GRCh38.p14', 56134); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (979, 'KI270837.1', 'chr12_KI270837v1_alt', 'GRCh38.p14', 40090); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (980, 'KI270838.1', 'chr13_KI270838v1_alt', 'GRCh38.p14', 306913); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (981, 'KI270839.1', 'chr13_KI270839v1_alt', 'GRCh38.p14', 180306); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (982, 'KI270840.1', 'chr13_KI270840v1_alt', 'GRCh38.p14', 191684); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (983, 'KI270841.1', 'chr13_KI270841v1_alt', 'GRCh38.p14', 169134); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (984, 'KI270842.1', 'chr13_KI270842v1_alt', 'GRCh38.p14', 37287); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (985, 'KI270843.1', 'chr13_KI270843v1_alt', 'GRCh38.p14', 103832); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (986, 'KI270844.1', 'chr14_KI270844v1_alt', 'GRCh38.p14', 322166); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (987, 'KI270845.1', 'chr14_KI270845v1_alt', 'GRCh38.p14', 180703); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (988, 'KI270846.1', 'chr14_KI270846v1_alt', 'GRCh38.p14', 1351393); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (989, 'KI270847.1', 'chr14_KI270847v1_alt', 'GRCh38.p14', 1511111); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (991, 'GL383555.2', 'chr15_GL383555v2_alt', 'GRCh38.p14', 388773); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (992, 'KI270848.1', 'chr15_KI270848v1_alt', 'GRCh38.p14', 327382); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (993, 'KI270849.1', 'chr15_KI270849v1_alt', 'GRCh38.p14', 244917); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (994, 'KI270850.1', 'chr15_KI270850v1_alt', 'GRCh38.p14', 430880); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (995, 'KI270851.1', 'chr15_KI270851v1_alt', 'GRCh38.p14', 263054); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (996, 'KI270852.1', 'chr15_KI270852v1_alt', 'GRCh38.p14', 478999); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (999, 'KI270853.1', 'chr16_KI270853v1_alt', 'GRCh38.p14', 2659700); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1000, 'KI270854.1', 'chr16_KI270854v1_alt', 'GRCh38.p14', 134193); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1001, 'KI270855.1', 'chr16_KI270855v1_alt', 'GRCh38.p14', 232857); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1002, 'KI270856.1', 'chr16_KI270856v1_alt', 'GRCh38.p14', 63982); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1003, 'GL000258.2', 'chr17_GL000258v2_alt', 'GRCh38.p14', 1821992); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1004, 'GL383563.3', 'chr17_GL383563v3_alt', 'GRCh38.p14', 375691); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1005, 'GL383564.2', 'chr17_GL383564v2_alt', 'GRCh38.p14', 133151); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1010, 'KI270857.1', 'chr17_KI270857v1_alt', 'GRCh38.p14', 2877074); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1011, 'KI270858.1', 'chr17_KI270858v1_alt', 'GRCh38.p14', 235827); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1012, 'KI270859.1', 'chr17_KI270859v1_alt', 'GRCh38.p14', 108763); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1013, 'KI270860.1', 'chr17_KI270860v1_alt', 'GRCh38.p14', 178921); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1014, 'KI270861.1', 'chr17_KI270861v1_alt', 'GRCh38.p14', 196688); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1015, 'KI270862.1', 'chr17_KI270862v1_alt', 'GRCh38.p14', 391357); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1022, 'KI270863.1', 'chr18_KI270863v1_alt', 'GRCh38.p14', 167999); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1025, 'KI270864.1', 'chr18_KI270864v1_alt', 'GRCh38.p14', 111737); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1031, 'KI270865.1', 'chr19_KI270865v1_alt', 'GRCh38.p14', 52969); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1032, 'KI270866.1', 'chr19_KI270866v1_alt', 'GRCh38.p14', 43156); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1033, 'KI270867.1', 'chr19_KI270867v1_alt', 'GRCh38.p14', 233762); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1034, 'KI270868.1', 'chr19_KI270868v1_alt', 'GRCh38.p14', 61734); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1035, 'GL383577.2', 'chr20_GL383577v2_alt', 'GRCh38.p14', 128386); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1036, 'KI270869.1', 'chr20_KI270869v1_alt', 'GRCh38.p14', 118774); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1037, 'KI270870.1', 'chr20_KI270870v1_alt', 'GRCh38.p14', 183433); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1038, 'KI270871.1', 'chr20_KI270871v1_alt', 'GRCh38.p14', 58661); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1039, 'GL383578.2', 'chr21_GL383578v2_alt', 'GRCh38.p14', 63917); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1040, 'GL383579.2', 'chr21_GL383579v2_alt', 'GRCh38.p14', 201197); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1041, 'GL383580.2', 'chr21_GL383580v2_alt', 'GRCh38.p14', 74653); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1042, 'GL383581.2', 'chr21_GL383581v2_alt', 'GRCh38.p14', 116689); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1043, 'KI270872.1', 'chr21_KI270872v1_alt', 'GRCh38.p14', 82692); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1044, 'KI270873.1', 'chr21_KI270873v1_alt', 'GRCh38.p14', 143900); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1045, 'KI270874.1', 'chr21_KI270874v1_alt', 'GRCh38.p14', 166743); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1047, 'GL383583.2', 'chr22_GL383583v2_alt', 'GRCh38.p14', 96924); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1048, 'KI270875.1', 'chr22_KI270875v1_alt', 'GRCh38.p14', 259914); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1049, 'KI270876.1', 'chr22_KI270876v1_alt', 'GRCh38.p14', 263666); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1050, 'KI270877.1', 'chr22_KI270877v1_alt', 'GRCh38.p14', 101331); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1051, 'KI270878.1', 'chr22_KI270878v1_alt', 'GRCh38.p14', 186262); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1052, 'KI270879.1', 'chr22_KI270879v1_alt', 'GRCh38.p14', 304135); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1053, 'KI270880.1', 'chrX_KI270880v1_alt', 'GRCh38.p14', 284869); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1054, 'KI270881.1', 'chrX_KI270881v1_alt', 'GRCh38.p14', 144206); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1055, 'KI270892.1', 'chr1_KI270892v1_alt', 'GRCh38.p14', 162212); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1056, 'KI270893.1', 'chr2_KI270893v1_alt', 'GRCh38.p14', 161218); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1057, 'KI270894.1', 'chr2_KI270894v1_alt', 'GRCh38.p14', 214158); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1058, 'KI270895.1', 'chr3_KI270895v1_alt', 'GRCh38.p14', 162896); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1059, 'KI270896.1', 'chr4_KI270896v1_alt', 'GRCh38.p14', 378547); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1060, 'KI270897.1', 'chr5_KI270897v1_alt', 'GRCh38.p14', 1144418); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1061, 'KI270898.1', 'chr5_KI270898v1_alt', 'GRCh38.p14', 130957); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1062, 'GL000251.2', 'chr6_GL000251v2_alt', 'GRCh38.p14', 4795265); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1063, 'KI270899.1', 'chr7_KI270899v1_alt', 'GRCh38.p14', 190869); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1064, 'KI270900.1', 'chr8_KI270900v1_alt', 'GRCh38.p14', 318687); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1065, 'KI270901.1', 'chr8_KI270901v1_alt', 'GRCh38.p14', 136959); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1066, 'KI270902.1', 'chr11_KI270902v1_alt', 'GRCh38.p14', 106711); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1067, 'KI270903.1', 'chr11_KI270903v1_alt', 'GRCh38.p14', 214625); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1068, 'KI270904.1', 'chr12_KI270904v1_alt', 'GRCh38.p14', 572349); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1069, 'KI270905.1', 'chr15_KI270905v1_alt', 'GRCh38.p14', 5161414); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1070, 'KI270906.1', 'chr15_KI270906v1_alt', 'GRCh38.p14', 196384); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1072, 'KI270907.1', 'chr17_KI270907v1_alt', 'GRCh38.p14', 137721); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1073, 'KI270908.1', 'chr17_KI270908v1_alt', 'GRCh38.p14', 1423190); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1074, 'KI270909.1', 'chr17_KI270909v1_alt', 'GRCh38.p14', 325800); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1075, 'KI270910.1', 'chr17_KI270910v1_alt', 'GRCh38.p14', 157099); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1076, 'KI270911.1', 'chr18_KI270911v1_alt', 'GRCh38.p14', 157710); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1077, 'KI270912.1', 'chr18_KI270912v1_alt', 'GRCh38.p14', 174061); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1078, 'GL949747.2', 'chr19_GL949747v2_alt', 'GRCh38.p14', 729520); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1080, 'KI270913.1', 'chrX_KI270913v1_alt', 'GRCh38.p14', 274009); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1081, 'KI270924.1', 'chr3_KI270924v1_alt', 'GRCh38.p14', 166540); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1082, 'KI270925.1', 'chr4_KI270925v1_alt', 'GRCh38.p14', 555799); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1083, 'GL000252.2', 'chr6_GL000252v2_alt', 'GRCh38.p14', 4604811); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1084, 'KI270926.1', 'chr8_KI270926v1_alt', 'GRCh38.p14', 229282); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1085, 'KI270927.1', 'chr11_KI270927v1_alt', 'GRCh38.p14', 218612); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1086, 'GL949748.2', 'chr19_GL949748v2_alt', 'GRCh38.p14', 1064304); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1087, 'KI270928.1', 'chr22_KI270928v1_alt', 'GRCh38.p14', 176103); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1088, 'KI270934.1', 'chr3_KI270934v1_alt', 'GRCh38.p14', 163458); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1089, 'GL000253.2', 'chr6_GL000253v2_alt', 'GRCh38.p14', 4677643); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1090, 'GL949749.2', 'chr19_GL949749v2_alt', 'GRCh38.p14', 1091841); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1091, 'KI270935.1', 'chr3_KI270935v1_alt', 'GRCh38.p14', 197351); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1092, 'GL000254.2', 'chr6_GL000254v2_alt', 'GRCh38.p14', 4827813); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1093, 'GL949750.2', 'chr19_GL949750v2_alt', 'GRCh38.p14', 1066390); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1094, 'KI270936.1', 'chr3_KI270936v1_alt', 'GRCh38.p14', 164170); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1095, 'GL000255.2', 'chr6_GL000255v2_alt', 'GRCh38.p14', 4606388); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1096, 'GL949751.2', 'chr19_GL949751v2_alt', 'GRCh38.p14', 1002683); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1097, 'KI270937.1', 'chr3_KI270937v1_alt', 'GRCh38.p14', 165607); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1098, 'GL000256.2', 'chr6_GL000256v2_alt', 'GRCh38.p14', 4929269); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1100, 'KI270758.1', 'chr6_KI270758v1_alt', 'GRCh38.p14', 76752); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1101, 'GL949753.2', 'chr19_GL949753v2_alt', 'GRCh38.p14', 796479); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1102, 'KI270938.1', 'chr19_KI270938v1_alt', 'GRCh38.p14', 1066800); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1103, 'KI270882.1', 'chr19_KI270882v1_alt', 'GRCh38.p14', 248807); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1104, 'KI270883.1', 'chr19_KI270883v1_alt', 'GRCh38.p14', 170399); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1105, 'KI270884.1', 'chr19_KI270884v1_alt', 'GRCh38.p14', 157053); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1106, 'KI270885.1', 'chr19_KI270885v1_alt', 'GRCh38.p14', 171027); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1107, 'KI270886.1', 'chr19_KI270886v1_alt', 'GRCh38.p14', 204239); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1108, 'KI270887.1', 'chr19_KI270887v1_alt', 'GRCh38.p14', 209512); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1109, 'KI270888.1', 'chr19_KI270888v1_alt', 'GRCh38.p14', 155532); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1110, 'KI270889.1', 'chr19_KI270889v1_alt', 'GRCh38.p14', 170698); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1111, 'KI270890.1', 'chr19_KI270890v1_alt', 'GRCh38.p14', 184499); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1112, 'KI270891.1', 'chr19_KI270891v1_alt', 'GRCh38.p14', 170680); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1113, 'KI270914.1', 'chr19_KI270914v1_alt', 'GRCh38.p14', 205194); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1114, 'KI270915.1', 'chr19_KI270915v1_alt', 'GRCh38.p14', 170665); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1115, 'KI270916.1', 'chr19_KI270916v1_alt', 'GRCh38.p14', 184516); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1116, 'KI270917.1', 'chr19_KI270917v1_alt', 'GRCh38.p14', 190932); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1117, 'KI270918.1', 'chr19_KI270918v1_alt', 'GRCh38.p14', 123111); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1118, 'KI270919.1', 'chr19_KI270919v1_alt', 'GRCh38.p14', 170701); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1119, 'KI270920.1', 'chr19_KI270920v1_alt', 'GRCh38.p14', 198005); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1120, 'KI270921.1', 'chr19_KI270921v1_alt', 'GRCh38.p14', 282224); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1121, 'KI270922.1', 'chr19_KI270922v1_alt', 'GRCh38.p14', 187935); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1122, 'KI270923.1', 'chr19_KI270923v1_alt', 'GRCh38.p14', 189352); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1123, 'KI270929.1', 'chr19_KI270929v1_alt', 'GRCh38.p14', 186203); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1124, 'KI270930.1', 'chr19_KI270930v1_alt', 'GRCh38.p14', 200773); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1130, 'KI270931.1', 'chr19_KI270931v1_alt', 'GRCh38.p14', 170148); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1131, 'KI270932.1', 'chr19_KI270932v1_alt', 'GRCh38.p14', 215732); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1132, 'KI270933.1', 'chr19_KI270933v1_alt', 'GRCh38.p14', 170537); -INSERT INTO genbank_contig (genbank_contig_id, accession, common_name, reference_genome, seq_length) VALUES (1133, 'GL000209.2', 'chr19_GL000209v2_alt', 'GRCh38.p14', 177381); - - -# --- !Downs -truncate genbank_contig restart identity cascade; \ No newline at end of file diff --git a/conf/evolutions/default/20.sql b/conf/evolutions/default/20.sql deleted file mode 100644 index 90c50264..00000000 --- a/conf/evolutions/default/20.sql +++ /dev/null @@ -1,94 +0,0 @@ -# --- !Ups - --- Create new alignment_metadata table for linear references -CREATE TABLE public.alignment_metadata -( - id bigserial - PRIMARY KEY, - sequence_file_id bigint NOT NULL - REFERENCES public.sequence_file - ON DELETE CASCADE, - genbank_contig_id integer NOT NULL - REFERENCES public.genbank_contig - ON DELETE CASCADE, - metric_level varchar(50) NOT NULL - CONSTRAINT alignment_metadata_metric_level_check - CHECK ((metric_level)::text = ANY - ((ARRAY ['CONTIG_OVERALL'::character varying, 'REGION'::character varying])::text[])), - region_name varchar(255), - region_start_pos bigint, - region_end_pos bigint, - region_length_bp bigint, - metrics_date timestamp DEFAULT now() NOT NULL, - analysis_tool varchar(255) NOT NULL, - analysis_tool_version varchar(50), - notes text, - metadata jsonb, - CONSTRAINT valid_region_coordinates - CHECK ( - (metric_level = 'CONTIG_OVERALL' AND region_start_pos IS NULL AND region_end_pos IS NULL) - OR - (metric_level = 'REGION' AND region_start_pos IS NOT NULL AND region_end_pos IS NOT NULL - AND region_start_pos > 0 AND region_end_pos >= region_start_pos) - ) -); - --- Create new alignment_coverage table -CREATE TABLE public.alignment_coverage -( - alignment_metadata_id bigint NOT NULL - PRIMARY KEY - REFERENCES public.alignment_metadata - ON DELETE CASCADE, - mean_depth double precision, - median_depth double precision, - percent_coverage_at_1x double precision, - percent_coverage_at_5x double precision, - percent_coverage_at_10x double precision, - percent_coverage_at_20x double precision, - percent_coverage_at_30x double precision, - bases_no_coverage bigint, - bases_low_quality_mapping bigint, - bases_callable bigint, - mean_mapping_quality double precision -); - --- Create indices for efficient querying -CREATE INDEX idx_alignment_metadata_sequence_file - ON public.alignment_metadata(sequence_file_id); - -CREATE INDEX idx_alignment_metadata_genbank_contig - ON public.alignment_metadata(genbank_contig_id); - -CREATE INDEX idx_alignment_metadata_metric_level - ON public.alignment_metadata(metric_level); - -CREATE INDEX idx_alignment_metadata_region - ON public.alignment_metadata(genbank_contig_id, region_start_pos, region_end_pos) - WHERE metric_level = 'REGION'; - --- Remove pangenome_path_id from genbank_contig -ALTER TABLE public.genbank_contig - DROP COLUMN IF EXISTS pangenome_path_id; - --- Add comment explaining the migration -COMMENT ON TABLE public.alignment_metadata IS - 'Linear reference-based alignment statistics. Replaces pangenome_alignment_metadata.'; - -COMMENT ON TABLE public.alignment_coverage IS - 'Coverage statistics for linear reference alignments. Replaces pangenome_alignment_coverage.'; - -# --- !Downs - --- Restore pangenome_path_id to genbank_contig -ALTER TABLE public.genbank_contig - ADD COLUMN IF NOT EXISTS pangenome_path_id integer; - --- Drop the new linear reference tables and their indices -DROP INDEX IF EXISTS public.idx_alignment_metadata_region; -DROP INDEX IF EXISTS public.idx_alignment_metadata_metric_level; -DROP INDEX IF EXISTS public.idx_alignment_metadata_genbank_contig; -DROP INDEX IF EXISTS public.idx_alignment_metadata_sequence_file; - -DROP TABLE IF EXISTS public.alignment_coverage; -DROP TABLE IF EXISTS public.alignment_metadata; \ No newline at end of file diff --git a/conf/evolutions/default/21.sql b/conf/evolutions/default/21.sql deleted file mode 100644 index 2825b7ff..00000000 --- a/conf/evolutions/default/21.sql +++ /dev/null @@ -1,36 +0,0 @@ -# --- !Ups -create table public.sequencing_lab -( - id serial - primary key, - name varchar(255) not null - unique, - is_d2c boolean default false not null, - website_url varchar(255), -- URL to the lab's official website - description_markdown text, -- Rich text description (e.g., accreditation, methods) - created_at timestamp not null default now(), - updated_at timestamp -); - -create table public.sequencer_instrument -( - id serial - primary key, - instrument_id varchar(255) not null - unique, -- The ID found in the BAM/CRAM read header (e.g., 'A00123') - lab_id integer not null - references public.sequencing_lab (id), -- Foreign key to the lab - manufacturer varchar(255), -- Optional: e.g., 'Illumina', 'PacBio' - model varchar(255), -- Optional: e.g., 'NovaSeq 6000', 'MiSeq' - created_at timestamp not null default now(), - updated_at timestamp -); - --- An index to optimize lookups by the instrument ID for the API -create unique index sequencer_instrument_instrument_id_uindex - on public.sequencer_instrument (instrument_id); - -# --- !Downs - -drop table public.sequencer_instrument; -drop table public.sequencing_lab; diff --git a/conf/evolutions/default/22.sql b/conf/evolutions/default/22.sql deleted file mode 100644 index 52ca84a4..00000000 --- a/conf/evolutions/default/22.sql +++ /dev/null @@ -1,60 +0,0 @@ -# --- !Ups -ALTER TABLE citizen_biosample RENAME COLUMN citizen_biosample_did TO at_uri; -ALTER TABLE citizen_biosample ADD COLUMN deleted BOOLEAN DEFAULT false NOT NULL; -ALTER TABLE citizen_biosample ADD COLUMN at_cid VARCHAR(255); -ALTER TABLE citizen_biosample ADD COLUMN created_at TIMESTAMP DEFAULT now() NOT NULL; -ALTER TABLE citizen_biosample ADD COLUMN updated_at TIMESTAMP DEFAULT now() NOT NULL; -ALTER TABLE citizen_biosample ADD COLUMN accession VARCHAR(255); -ALTER TABLE citizen_biosample ADD COLUMN alias VARCHAR(255); -ALTER TABLE citizen_biosample ADD COLUMN y_haplogroup JSONB; -ALTER TABLE citizen_biosample ADD COLUMN mt_haplogroup JSONB; -CREATE UNIQUE INDEX citizen_biosample_accession_uindex ON citizen_biosample (accession); - -CREATE TABLE publication_citizen_biosample -( - publication_id INT REFERENCES publication (id) ON DELETE CASCADE, - citizen_biosample_id INT REFERENCES citizen_biosample (id) ON DELETE CASCADE, - PRIMARY KEY (publication_id, citizen_biosample_id) -); - -CREATE TABLE citizen_biosample_original_haplogroup -( - id SERIAL PRIMARY KEY, - citizen_biosample_id INT REFERENCES citizen_biosample (id) ON DELETE CASCADE, - publication_id INT REFERENCES publication (id) ON DELETE CASCADE, - y_haplogroup_result JSONB, - mt_haplogroup_result JSONB, - notes TEXT, - UNIQUE (citizen_biosample_id, publication_id) -); - -CREATE TABLE project -( - id SERIAL PRIMARY KEY, - project_guid UUID NOT NULL UNIQUE, - name VARCHAR(255) NOT NULL, - description TEXT, - owner_did VARCHAR(255) NOT NULL, - created_at TIMESTAMP NOT NULL DEFAULT now(), - updated_at TIMESTAMP NOT NULL DEFAULT now(), - deleted BOOLEAN DEFAULT false NOT NULL, - at_uri VARCHAR(255), - at_cid VARCHAR(255) -); - -CREATE UNIQUE INDEX project_at_uri_uindex ON project (at_uri); - -# --- !Downs -DROP TABLE project; -DROP TABLE citizen_biosample_original_haplogroup; -DROP TABLE publication_citizen_biosample; -DROP INDEX citizen_biosample_accession_uindex; -ALTER TABLE citizen_biosample DROP COLUMN mt_haplogroup; -ALTER TABLE citizen_biosample DROP COLUMN y_haplogroup; -ALTER TABLE citizen_biosample DROP COLUMN alias; -ALTER TABLE citizen_biosample DROP COLUMN accession; -ALTER TABLE citizen_biosample DROP COLUMN updated_at; -ALTER TABLE citizen_biosample DROP COLUMN created_at; -ALTER TABLE citizen_biosample DROP COLUMN at_cid; -ALTER TABLE citizen_biosample DROP COLUMN deleted; -ALTER TABLE citizen_biosample RENAME COLUMN at_uri TO citizen_biosample_did; diff --git a/conf/evolutions/default/23.sql b/conf/evolutions/default/23.sql deleted file mode 100644 index 7940b504..00000000 --- a/conf/evolutions/default/23.sql +++ /dev/null @@ -1,9 +0,0 @@ -# --- !Ups -ALTER TABLE biosample_original_haplogroup ADD COLUMN y_haplogroup_result JSONB; -ALTER TABLE biosample_original_haplogroup ADD COLUMN mt_haplogroup_result JSONB; -ALTER TABLE specimen_donor RENAME COLUMN citizen_biosample_did TO at_uri; - -# --- !Downs -ALTER TABLE specimen_donor RENAME COLUMN at_uri TO citizen_biosample_did; -ALTER TABLE biosample_original_haplogroup DROP COLUMN mt_haplogroup_result; -ALTER TABLE biosample_original_haplogroup DROP COLUMN y_haplogroup_result; diff --git a/conf/evolutions/default/24.sql b/conf/evolutions/default/24.sql deleted file mode 100644 index c1239be0..00000000 --- a/conf/evolutions/default/24.sql +++ /dev/null @@ -1,7 +0,0 @@ -# --- !Ups -ALTER TABLE citizen_biosample ADD COLUMN specimen_donor_id INT REFERENCES specimen_donor(id); -CREATE INDEX citizen_biosample_specimen_donor_id_idx ON citizen_biosample(specimen_donor_id); - -# --- !Downs -DROP INDEX citizen_biosample_specimen_donor_id_idx; -ALTER TABLE citizen_biosample DROP COLUMN specimen_donor_id; diff --git a/conf/evolutions/default/25.sql b/conf/evolutions/default/25.sql deleted file mode 100644 index 38213a72..00000000 --- a/conf/evolutions/default/25.sql +++ /dev/null @@ -1,37 +0,0 @@ -# --- !Ups - -ALTER TABLE sequence_library ADD COLUMN at_uri VARCHAR(255); -ALTER TABLE sequence_library ADD COLUMN at_cid VARCHAR(255); -CREATE INDEX idx_sequence_library_at_uri ON sequence_library(at_uri); - -ALTER TABLE alignment_metadata ADD COLUMN reference_build VARCHAR(255); -ALTER TABLE alignment_metadata ADD COLUMN variant_caller VARCHAR(255); -ALTER TABLE alignment_metadata ADD COLUMN genome_territory BIGINT; -ALTER TABLE alignment_metadata ADD COLUMN mean_coverage DOUBLE PRECISION; -ALTER TABLE alignment_metadata ADD COLUMN median_coverage DOUBLE PRECISION; -ALTER TABLE alignment_metadata ADD COLUMN sd_coverage DOUBLE PRECISION; -ALTER TABLE alignment_metadata ADD COLUMN pct_exc_dupe DOUBLE PRECISION; -ALTER TABLE alignment_metadata ADD COLUMN pct_exc_mapq DOUBLE PRECISION; -ALTER TABLE alignment_metadata ADD COLUMN pct_10x DOUBLE PRECISION; -ALTER TABLE alignment_metadata ADD COLUMN pct_20x DOUBLE PRECISION; -ALTER TABLE alignment_metadata ADD COLUMN pct_30x DOUBLE PRECISION; -ALTER TABLE alignment_metadata ADD COLUMN het_snp_sensitivity DOUBLE PRECISION; - -# --- !Downs - -ALTER TABLE alignment_metadata DROP COLUMN het_snp_sensitivity; -ALTER TABLE alignment_metadata DROP COLUMN pct_30x; -ALTER TABLE alignment_metadata DROP COLUMN pct_20x; -ALTER TABLE alignment_metadata DROP COLUMN pct_10x; -ALTER TABLE alignment_metadata DROP COLUMN pct_exc_mapq; -ALTER TABLE alignment_metadata DROP COLUMN pct_exc_dupe; -ALTER TABLE alignment_metadata DROP COLUMN sd_coverage; -ALTER TABLE alignment_metadata DROP COLUMN median_coverage; -ALTER TABLE alignment_metadata DROP COLUMN mean_coverage; -ALTER TABLE alignment_metadata DROP COLUMN genome_territory; -ALTER TABLE alignment_metadata DROP COLUMN variant_caller; -ALTER TABLE alignment_metadata DROP COLUMN reference_build; - -DROP INDEX idx_sequence_library_at_uri; -ALTER TABLE sequence_library DROP COLUMN at_cid; -ALTER TABLE sequence_library DROP COLUMN at_uri; diff --git a/conf/evolutions/default/26.sql b/conf/evolutions/default/26.sql deleted file mode 100644 index 7ba0986f..00000000 --- a/conf/evolutions/default/26.sql +++ /dev/null @@ -1,110 +0,0 @@ -# --- !Ups -CREATE SCHEMA IF NOT EXISTS social; - --- 1. Move existing reputation tables to social schema --- NOTE: We must drop constraints that reference these tables if they are not schema-qualified or if necessary, --- but typically changing schema preserves data. Foreign keys might need adjustment if they are schema-bound. --- In Postgres, moving a table to a new schema preserves its data and indexes. --- However, we should be careful about the FKs from public.users. --- The existing FKs in 6.sql were: --- fk_reputation_events_user_id references public.users --- fk_reputation_events_event_type_id references public.reputation_event_types --- fk_reputation_events_source_user_id references public.users --- fk_user_reputation_scores_user_id references public.users - -ALTER TABLE public.reputation_event_types SET SCHEMA social; -ALTER TABLE public.reputation_events SET SCHEMA social; -ALTER TABLE public.user_reputation_scores SET SCHEMA social; - --- 2. Create new social tables - --- User Relationships (Foes/Blocks) -CREATE TABLE social.user_blocks ( - blocker_did VARCHAR(255) NOT NULL, - blocked_did VARCHAR(255) NOT NULL, - reason TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - PRIMARY KEY (blocker_did, blocked_did) -); - -CREATE INDEX idx_user_blocks_blocker ON social.user_blocks(blocker_did); -CREATE INDEX idx_user_blocks_blocked ON social.user_blocks(blocked_did); - --- Conversations (Threads) -CREATE TABLE social.conversations ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - type VARCHAR(50) NOT NULL, -- 'DIRECT', 'GROUP', 'SYSTEM', 'RECRUITMENT' - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW() -); - --- Conversation Participants -CREATE TABLE social.conversation_participants ( - conversation_id UUID NOT NULL, - user_did VARCHAR(255) NOT NULL, - role VARCHAR(50) DEFAULT 'MEMBER', -- 'ADMIN', 'MEMBER' - last_read_at TIMESTAMP, - joined_at TIMESTAMP NOT NULL DEFAULT NOW(), - PRIMARY KEY (conversation_id, user_did), - CONSTRAINT fk_conversation_participants_conversation_id FOREIGN KEY (conversation_id) REFERENCES social.conversations(id) ON DELETE CASCADE -); - --- Messages -CREATE TABLE social.messages ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - conversation_id UUID NOT NULL, - sender_did VARCHAR(255) NOT NULL, -- User DID or 'SYSTEM' - content TEXT NOT NULL, - content_type VARCHAR(50) DEFAULT 'TEXT', -- 'TEXT', 'MARKDOWN', 'JSON_PAYLOAD' - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - is_edited BOOLEAN DEFAULT FALSE, - CONSTRAINT fk_messages_conversation_id FOREIGN KEY (conversation_id) REFERENCES social.conversations(id) ON DELETE CASCADE -); - -CREATE INDEX idx_messages_conversation_id ON social.messages(conversation_id); -CREATE INDEX idx_messages_sender_did ON social.messages(sender_did); - --- Feed Posts (Public/Community) -CREATE TABLE social.feed_posts ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - author_did VARCHAR(255) NOT NULL, - content TEXT NOT NULL, - parent_post_id UUID, -- For replies - root_post_id UUID, -- Thread context - topic VARCHAR(100), -- 'GENERAL', 'HAPLOGROUP_R', etc. - author_reputation_score INT DEFAULT 0, -- Snapshot at time of posting - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - CONSTRAINT fk_feed_posts_parent_id FOREIGN KEY (parent_post_id) REFERENCES social.feed_posts(id), - CONSTRAINT fk_feed_posts_root_id FOREIGN KEY (root_post_id) REFERENCES social.feed_posts(id) -); - -CREATE INDEX idx_feed_posts_author_did ON social.feed_posts(author_did); -CREATE INDEX idx_feed_posts_topic ON social.feed_posts(topic); -CREATE INDEX idx_feed_posts_created_at ON social.feed_posts(created_at); - --- 3. Seed initial reputation event types -INSERT INTO social.reputation_event_types (name, description, default_points_change, is_positive, is_system_generated) VALUES -('ACCOUNT_VERIFIED', 'Email and identity verification complete', 10, TRUE, TRUE), -('LAB_OBSERVATION_ACCEPTED', 'Submitted sequencer metadata verified by consensus', 5, TRUE, TRUE), -('FEED_POST_UPVOTED', 'Community member upvoted a post', 1, TRUE, FALSE), -('FEED_POST_DOWNVOTED', 'Community member downvoted a post', -1, FALSE, FALSE), -('SPAM_REPORT_VALIDATED', 'Content marked as spam by moderator or consensus', -50, FALSE, TRUE), -('RECRUITMENT_ACCEPTED', 'User accepted a recruitment request', 2, TRUE, FALSE), -('NEW_USER_BONUS', 'Welcome bonus for new users', 5, TRUE, TRUE) -ON CONFLICT (name) DO NOTHING; - -# --- !Downs - -DROP TABLE social.feed_posts; -DROP TABLE social.messages; -DROP TABLE social.conversation_participants; -DROP TABLE social.conversations; -DROP TABLE social.user_blocks; - --- Move tables back to public -ALTER TABLE social.user_reputation_scores SET SCHEMA public; -ALTER TABLE social.reputation_events SET SCHEMA public; -ALTER TABLE social.reputation_event_types SET SCHEMA public; - -DROP SCHEMA social; diff --git a/conf/evolutions/default/27.sql b/conf/evolutions/default/27.sql deleted file mode 100644 index 2fa98fae..00000000 --- a/conf/evolutions/default/27.sql +++ /dev/null @@ -1,106 +0,0 @@ --- !Ups - -ALTER TABLE sequence_file ADD COLUMN checksums JSONB DEFAULT '[]'::jsonb; -ALTER TABLE sequence_file ADD COLUMN http_locations JSONB DEFAULT '[]'::jsonb; -ALTER TABLE sequence_file ADD COLUMN atp_location JSONB; - -UPDATE sequence_file sf SET checksums = ( - SELECT COALESCE(jsonb_agg(to_jsonb(sfc) - 'sequence_file_id'), '[]'::jsonb) - FROM sequence_file_checksum sfc WHERE sfc.sequence_file_id = sf.id -); - -UPDATE sequence_file sf SET http_locations = ( - SELECT COALESCE(jsonb_agg(to_jsonb(shl) - 'sequence_file_id'), '[]'::jsonb) - FROM sequence_http_location shl WHERE shl.sequence_file_id = sf.id -); - -UPDATE sequence_file sf SET atp_location = ( - SELECT to_jsonb(sal) - 'sequence_file_id' - FROM sequence_atp_location sal WHERE sal.sequence_file_id = sf.id -); - -CREATE INDEX idx_sf_checksums ON sequence_file USING GIN (checksums jsonb_path_ops); -CREATE INDEX idx_sf_http_locations ON sequence_file USING GIN (http_locations jsonb_path_ops); -CREATE INDEX idx_sf_atp_location ON sequence_file USING GIN (atp_location jsonb_path_ops); - -DROP TABLE sequence_file_checksum; -DROP TABLE sequence_http_location; -DROP TABLE sequence_atp_location; - --- !Downs - -CREATE TABLE sequence_file_checksum ( - id BIGSERIAL PRIMARY KEY, - sequence_file_id INT NOT NULL, - checksum VARCHAR(255) NOT NULL, - algorithm VARCHAR(50) NOT NULL, - verified_at TIMESTAMP WITH TIME ZONE, - created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), - FOREIGN KEY (sequence_file_id) REFERENCES sequence_file(id) ON DELETE CASCADE, - UNIQUE (sequence_file_id, algorithm) -); - -CREATE TABLE sequence_http_location ( - id BIGSERIAL PRIMARY KEY, - sequence_file_id INT NOT NULL, - url VARCHAR(2048) NOT NULL, - url_hash VARCHAR(64) NOT NULL, - created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), - FOREIGN KEY (sequence_file_id) REFERENCES sequence_file(id) ON DELETE CASCADE, - UNIQUE (sequence_file_id, url_hash) -); - -CREATE TABLE sequence_atp_location ( - id BIGSERIAL PRIMARY KEY, - sequence_file_id INT NOT NULL, - repo_did VARCHAR(255) NOT NULL, - record_uri VARCHAR(255) NOT NULL, - cid VARCHAR(255) NOT NULL, - created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), - FOREIGN KEY (sequence_file_id) REFERENCES sequence_file(id) ON DELETE CASCADE, - UNIQUE (sequence_file_id, record_uri) -); - --- Re-populate sequence_file_checksum from sequence_file.checksums (simplified, assumes single checksum for now) -INSERT INTO sequence_file_checksum (sequence_file_id, checksum, algorithm, verified_at, created_at, updated_at) -SELECT - sf.id, - (jsonb_array_elements(sf.checksums)->>'checksum')::VARCHAR, - (jsonb_array_elements(sf.checksums)->>'algorithm')::VARCHAR, - (jsonb_array_elements(sf.checksums)->>'verified_at')::TIMESTAMP WITH TIME ZONE, - (jsonb_array_elements(sf.checksums)->>'created_at')::TIMESTAMP WITH TIME ZONE, - (jsonb_array_elements(sf.checksums)->>'updated_at')::TIMESTAMP WITH TIME ZONE -FROM sequence_file sf -WHERE jsonb_array_length(sf.checksums) > 0; - - --- Re-populate sequence_http_location from sequence_file.http_locations (simplified) -INSERT INTO sequence_http_location (sequence_file_id, url, url_hash, created_at, updated_at) -SELECT - sf.id, - (jsonb_array_elements(sf.http_locations)->>'url')::VARCHAR, - (jsonb_array_elements(sf.http_locations)->>'url_hash')::VARCHAR, - (jsonb_array_elements(sf.http_locations)->>'created_at')::TIMESTAMP WITH TIME ZONE, - (jsonb_array_elements(sf.http_locations)->>'updated_at')::TIMESTAMP WITH TIME ZONE -FROM sequence_file sf -WHERE jsonb_array_length(sf.http_locations) > 0; - --- Re-populate sequence_atp_location from sequence_file.atp_location (simplified) -INSERT INTO sequence_atp_location (sequence_file_id, repo_did, record_uri, cid, created_at, updated_at) -SELECT - sf.id, - (sf.atp_location->>'repo_did')::VARCHAR, - (sf.atp_location->>'record_uri')::VARCHAR, - (sf.atp_location->>'cid')::VARCHAR, - (sf.atp_location->>'created_at')::TIMESTAMP WITH TIME ZONE, - (sf.atp_location->>'updated_at')::TIMESTAMP WITH TIME ZONE -FROM sequence_file sf -WHERE sf.atp_location IS NOT NULL; - - -ALTER TABLE sequence_file DROP COLUMN checksums; -ALTER TABLE sequence_file DROP COLUMN http_locations; -ALTER TABLE sequence_file DROP COLUMN atp_location; diff --git a/conf/evolutions/default/28.sql b/conf/evolutions/default/28.sql deleted file mode 100644 index d0ded6b9..00000000 --- a/conf/evolutions/default/28.sql +++ /dev/null @@ -1,23 +0,0 @@ --- !Ups - --- 1. Create the 'tree' schema if it doesn't already exist -CREATE SCHEMA IF NOT EXISTS tree; - --- 2. Migrate existing haplogroup tables from 'public' schema to 'tree' schema -ALTER TABLE public.haplogroup SET SCHEMA tree; -ALTER TABLE public.haplogroup_relationship SET SCHEMA tree; -ALTER TABLE public.haplogroup_variant SET SCHEMA tree; -ALTER TABLE public.haplogroup_variant_metadata SET SCHEMA tree; -ALTER TABLE public.relationship_revision_metadata SET SCHEMA tree; - --- !Downs - --- 1. Revert haplogroup tables from 'tree' schema back to 'public' schema -ALTER TABLE tree.haplogroup SET SCHEMA public; -ALTER TABLE tree.haplogroup_relationship SET SCHEMA public; -ALTER TABLE tree.haplogroup_variant SET SCHEMA public; -ALTER TABLE tree.haplogroup_variant_metadata SET SCHEMA public; -ALTER TABLE tree.relationship_revision_metadata SET SCHEMA public; - --- 2. Drop the 'tree' schema if it exists (CASCADE will remove tables within it) -DROP SCHEMA IF EXISTS tree CASCADE; diff --git a/conf/evolutions/default/29.sql b/conf/evolutions/default/29.sql deleted file mode 100644 index 7f8e9db1..00000000 --- a/conf/evolutions/default/29.sql +++ /dev/null @@ -1,63 +0,0 @@ --- !Ups - --- Create ENUM types first -CREATE TYPE data_generation_method AS ENUM ('SEQUENCING', 'GENOTYPING'); -CREATE TYPE target_type AS ENUM ('WHOLE_GENOME', 'Y_CHROMOSOME', 'MT_DNA', 'AUTOSOMAL', 'X_CHROMOSOME', 'MIXED'); - --- Create the test_type_definition table -CREATE TABLE test_type_definition ( - id SERIAL PRIMARY KEY, - code VARCHAR(50) NOT NULL UNIQUE, -- Maps to TestTypeRow.code - display_name VARCHAR(100) NOT NULL, -- Maps to TestTypeRow.displayName - category data_generation_method NOT NULL, -- Maps to TestTypeRow.category - vendor VARCHAR(100), -- Maps to TestTypeRow.vendor - target_type target_type NOT NULL, -- Maps to TestTypeRow.targetType - expected_min_depth DOUBLE PRECISION, -- Maps to TestTypeRow.expectedMinDepth - expected_target_depth DOUBLE PRECISION, -- Maps to TestTypeRow.expectedTargetDepth - expected_marker_count INTEGER, -- Maps to TestTypeRow.expectedMarkerCount - supports_haplogroup_y BOOLEAN NOT NULL DEFAULT FALSE, -- Maps to TestTypeRow.supportsHaplogroupY - supports_haplogroup_mt BOOLEAN NOT NULL DEFAULT FALSE, -- Maps to TestTypeRow.supportsHaplogroupMt - supports_autosomal_ibd BOOLEAN NOT NULL DEFAULT FALSE, -- Maps to TestTypeRow.supportsAutosomalIbd - supports_ancestry BOOLEAN NOT NULL DEFAULT FALSE, -- Maps to TestTypeRow.supportsAncestry - typical_file_formats TEXT[], -- Maps to TestTypeRow.typicalFileFormats - version VARCHAR(20), -- Maps to TestTypeRow.version - release_date DATE, -- Maps to TestTypeRow.releaseDate - deprecated_at DATE, -- Maps to TestTypeRow.deprecatedAt - successor_test_type_id INTEGER REFERENCES test_type_definition(id), -- Maps to TestTypeRow.successorTestTypeId - description TEXT, -- Maps to TestTypeRow.description - documentation_url VARCHAR(500) -- Maps to TestTypeRow.documentationUrl -); - --- Insert initial data for known test types -INSERT INTO test_type_definition ( - code, display_name, category, vendor, target_type, expected_target_depth, - supports_haplogroup_y, supports_haplogroup_mt, supports_autosomal_ibd, supports_ancestry, - typical_file_formats, description -) VALUES -('WGS', 'Whole Genome Sequencing', 'SEQUENCING', NULL, 'WHOLE_GENOME', 30.0, - TRUE, TRUE, TRUE, TRUE, ARRAY['BAM', 'CRAM', 'VCF'], 'Standard whole genome sequencing.'), - -('WES', 'Whole Exome Sequencing', 'SEQUENCING', NULL, 'AUTOSOMAL', 100.0, - FALSE, FALSE, FALSE, FALSE, ARRAY['BAM', 'VCF'], 'Whole exome sequencing.'), - -('TARGETED_Y', 'Targeted Y-DNA Sequencing', 'SEQUENCING', NULL, 'Y_CHROMOSOME', 50.0, - TRUE, FALSE, FALSE, FALSE, ARRAY['BAM', 'VCF', 'BED'], 'Targeted sequencing of Y-chromosome.'), - -('TARGETED_MT', 'Targeted mtDNA Sequencing', 'SEQUENCING', NULL, 'MT_DNA', 1000.0, - FALSE, TRUE, FALSE, FALSE, ARRAY['BAM', 'FASTA', 'VCF'], 'Targeted sequencing of mitochondrial DNA.'), - -('SNP_ARRAY_23ANDME', '23andMe v5 Chip', 'GENOTYPING', '23andMe', 'MIXED', NULL, - TRUE, TRUE, TRUE, TRUE, ARRAY['TXT', 'CSV'], 'SNP Array data from 23andMe v5.'), - -('SNP_ARRAY_ANCESTRY', 'AncestryDNA v2', 'GENOTYPING', 'AncestryDNA', 'MIXED', NULL, - TRUE, TRUE, TRUE, TRUE, ARRAY['TXT', 'CSV'], 'SNP Array data from AncestryDNA v2.'), - -('ARRAY_FTDNA_FF', 'FTDNA Family Finder', 'GENOTYPING', 'FamilyTreeDNA', 'AUTOSOMAL', NULL, - FALSE, FALSE, TRUE, TRUE, ARRAY['CSV'], 'FTDNA Family Finder autosomal chip data.'); - --- !Downs - --- Drop the test_type_definition table -DROP TABLE IF EXISTS test_type_definition CASCADE; -DROP TYPE IF EXISTS data_generation_method; -DROP TYPE IF EXISTS target_type; \ No newline at end of file diff --git a/conf/evolutions/default/3.sql b/conf/evolutions/default/3.sql deleted file mode 100644 index 229b55f6..00000000 --- a/conf/evolutions/default/3.sql +++ /dev/null @@ -1,10 +0,0 @@ -# --- !Ups ---- Add author and abstract columns to publications - -ALTER TABLE publication ADD COLUMN authors VARCHAR(1000) NULL; -ALTER TABLE publication ADD COLUMN abstract_summary TEXT NULL; - -# --- !Downs - -ALTER TABLE publication DROP COLUMN abstract_summary; -ALTER TABLE publication DROP COLUMN authors; \ No newline at end of file diff --git a/conf/evolutions/default/30.sql b/conf/evolutions/default/30.sql deleted file mode 100644 index d7af5b04..00000000 --- a/conf/evolutions/default/30.sql +++ /dev/null @@ -1,40 +0,0 @@ --- !Ups - --- 1. Add new test_type_id column -ALTER TABLE sequence_library ADD COLUMN test_type_id INTEGER; - --- 2. Migrate data from old test_type string to new test_type_id using test_type_definition -UPDATE sequence_library sl -SET test_type_id = ttd.id -FROM test_type_definition ttd -WHERE UPPER(sl.test_type) = ttd.code; -- Corrected to ttd.code - --- 3. Add foreign key constraint -ALTER TABLE sequence_library -ADD CONSTRAINT fk_sequence_library_test_type -FOREIGN KEY (test_type_id) REFERENCES test_type_definition(id) ON DELETE RESTRICT; - --- 4. Make the test_type_id column NOT NULL (if all existing data could be migrated) --- Note: If there are unmappable values in sequence_library.test_type, this step will fail. --- Assuming all existing values map to an entry in test_type_definition. -ALTER TABLE sequence_library ALTER COLUMN test_type_id SET NOT NULL; - --- 5. Drop the old test_type column (optional, can be done later after verification) -ALTER TABLE sequence_library DROP COLUMN test_type; - --- !Downs - --- 1. Re-add the old test_type column -ALTER TABLE sequence_library ADD COLUMN test_type VARCHAR(255); - --- 2. Migrate data back from test_type_id to test_type string -UPDATE sequence_library sl -SET test_type = ttd.code -FROM test_type_definition ttd -WHERE sl.test_type_id = ttd.id; - --- 3. Drop foreign key constraint -ALTER TABLE sequence_library DROP CONSTRAINT IF EXISTS fk_sequence_library_test_type; - --- 4. Drop the new test_type_id column -ALTER TABLE sequence_library DROP COLUMN test_type_id; diff --git a/conf/evolutions/default/31.sql b/conf/evolutions/default/31.sql deleted file mode 100644 index 02b6b29e..00000000 --- a/conf/evolutions/default/31.sql +++ /dev/null @@ -1,55 +0,0 @@ --- !Ups - -CREATE TABLE publication_candidates ( - id SERIAL PRIMARY KEY, - openalex_id VARCHAR(255) UNIQUE NOT NULL, - doi VARCHAR(255), - title TEXT NOT NULL, - abstract TEXT, - publication_date DATE, - journal_name VARCHAR(500), - relevance_score DOUBLE PRECISION, - discovery_date TIMESTAMP DEFAULT NOW(), - status VARCHAR(50) DEFAULT 'pending', -- pending, accepted, rejected, deferred - reviewed_by UUID, - reviewed_at TIMESTAMP, - rejection_reason TEXT, - raw_metadata JSONB, -- Full OpenAlex response - FOREIGN KEY (reviewed_by) REFERENCES public.users(id) ON DELETE SET NULL -); - -CREATE TABLE publication_search_configs ( - id SERIAL PRIMARY KEY, - name VARCHAR(255) NOT NULL, - search_query TEXT NOT NULL, -- OpenAlex query string - concepts JSONB, -- OpenAlex concept IDs to filter - journals JSONB, -- Journal/source filters - enabled BOOLEAN DEFAULT TRUE, - last_run TIMESTAMP, - created_at TIMESTAMP DEFAULT NOW() -); - -CREATE TABLE publication_search_runs ( - id SERIAL PRIMARY KEY, - config_id INT REFERENCES publication_search_configs(id) ON DELETE CASCADE, - run_at TIMESTAMP DEFAULT NOW(), - candidates_found INT, - new_candidates INT, -- After deduplication - query_used TEXT, - duration_ms INT -); - -CREATE INDEX idx_pub_candidates_status ON publication_candidates(status); -CREATE INDEX idx_pub_candidates_relevance ON publication_candidates(relevance_score DESC) WHERE status = 'pending'; -CREATE INDEX idx_pub_candidates_openalex ON publication_candidates(openalex_id); -CREATE INDEX idx_pub_candidates_doi ON publication_candidates(doi); - --- Insert default search config -INSERT INTO publication_search_configs (name, search_query, enabled) -VALUES ('Y-DNA Haplogroup Discovery', 'Y-DNA haplogroup', TRUE); - --- !Downs - -DROP TABLE IF EXISTS publication_search_runs; -DROP TABLE IF EXISTS publication_search_configs; -DROP TABLE IF EXISTS publication_candidates; diff --git a/conf/evolutions/default/32.sql b/conf/evolutions/default/32.sql deleted file mode 100644 index 3b1eba1e..00000000 --- a/conf/evolutions/default/32.sql +++ /dev/null @@ -1,14 +0,0 @@ --- !Ups - -INSERT INTO publication_search_configs (name, search_query, concepts, enabled) VALUES -('Forensic Anthropology and Bioarchaeology Studies', 'forensic anthropology bioarchaeology', '["https://api.openalex.org/concepts/wikidata:Q28065", "https://api.openalex.org/concepts/wikidata:Q13404081"]'::jsonb, TRUE), -('Archaeology and Ancient Environmental Studies', 'archaeology ancient environmental', '["https://api.openalex.org/concepts/wikidata:Q23498", "https://api.openalex.org/concepts/wikidata:Q1561862"]'::jsonb, TRUE), -('Forensic and Genetic Research', 'forensic genetic research', '["https://api.openalex.org/concepts/wikidata:Q495304", "https://api.openalex.org/concepts/wikidata:Q69953209"]'::jsonb, TRUE); - --- !Downs - -DELETE FROM publication_search_configs WHERE name IN ( -'Forensic Anthropology and Bioarchaeology Studies', -'Archaeology and Ancient Environmental Studies', -'Forensic and Genetic Research' -); diff --git a/conf/evolutions/default/33.sql b/conf/evolutions/default/33.sql deleted file mode 100644 index b604aedb..00000000 --- a/conf/evolutions/default/33.sql +++ /dev/null @@ -1,12 +0,0 @@ --- !Ups - --- Insert default roles -INSERT INTO auth.roles (id, name, description, created_at, updated_at) -VALUES -(gen_random_uuid(), 'Admin', 'Administrator with full access', NOW(), NOW()), -(gen_random_uuid(), 'Curator', 'Curator access for managing content', NOW(), NOW()) -ON CONFLICT (name) DO NOTHING; - --- !Downs - -DELETE FROM auth.roles WHERE name IN ('Admin', 'Curator'); diff --git a/conf/evolutions/default/34.sql b/conf/evolutions/default/34.sql deleted file mode 100644 index 2e6dc258..00000000 --- a/conf/evolutions/default/34.sql +++ /dev/null @@ -1,7 +0,0 @@ --- !Ups - -ALTER TABLE public.users RENAME COLUMN email TO email_hash; - --- !Downs - -ALTER TABLE public.users RENAME COLUMN email_hash TO email; diff --git a/conf/evolutions/default/35.sql b/conf/evolutions/default/35.sql deleted file mode 100644 index a3efff90..00000000 --- a/conf/evolutions/default/35.sql +++ /dev/null @@ -1,7 +0,0 @@ --- !Ups - -ALTER TABLE public.users RENAME COLUMN email_hash TO email_encrypted; - --- !Downs - -ALTER TABLE public.users RENAME COLUMN email_encrypted TO email_hash; diff --git a/conf/evolutions/default/36.sql b/conf/evolutions/default/36.sql deleted file mode 100644 index b268dd8f..00000000 --- a/conf/evolutions/default/36.sql +++ /dev/null @@ -1,17 +0,0 @@ --- !Ups - --- Insert permission -INSERT INTO auth.permissions (id, name, description, created_at, updated_at) -VALUES (gen_random_uuid(), 'view_publication_candidates', 'View and manage publication candidates', NOW(), NOW()) -ON CONFLICT (name) DO NOTHING; - --- Assign permission to Admin and Curator roles -INSERT INTO auth.role_permissions (role_id, permission_id) -SELECT r.id, p.id -FROM auth.roles r, auth.permissions p -WHERE r.name IN ('Admin', 'Curator') AND p.name = 'view_publication_candidates' -ON CONFLICT DO NOTHING; - --- !Downs - -DELETE FROM auth.permissions WHERE name = 'view_publication_candidates'; diff --git a/conf/evolutions/default/37.sql b/conf/evolutions/default/37.sql deleted file mode 100644 index 4377d5fd..00000000 --- a/conf/evolutions/default/37.sql +++ /dev/null @@ -1,17 +0,0 @@ --- !Ups - --- Add haplogroup reconciliation references to specimen_donor --- Reconciliation is at the donor level since a donor may have multiple biosamples/runs --- These link to HaplogroupReconciliation records for multi-run consensus -ALTER TABLE specimen_donor - ADD COLUMN y_dna_reconciliation_ref VARCHAR, - ADD COLUMN mt_dna_reconciliation_ref VARCHAR; - -COMMENT ON COLUMN specimen_donor.y_dna_reconciliation_ref IS 'AT URI reference to Y-DNA haplogroup reconciliation record'; -COMMENT ON COLUMN specimen_donor.mt_dna_reconciliation_ref IS 'AT URI reference to MT-DNA haplogroup reconciliation record'; - --- !Downs - -ALTER TABLE specimen_donor - DROP COLUMN IF EXISTS y_dna_reconciliation_ref, - DROP COLUMN IF EXISTS mt_dna_reconciliation_ref; diff --git a/conf/evolutions/default/38.sql b/conf/evolutions/default/38.sql deleted file mode 100644 index 3aadb991..00000000 --- a/conf/evolutions/default/38.sql +++ /dev/null @@ -1,81 +0,0 @@ --- !Ups - --- Population breakdown table for ancestry analysis results --- Stores ADMIXTURE-style ancestry breakdowns at sub-continental granularity -CREATE TABLE population_breakdown ( - id SERIAL PRIMARY KEY, - at_uri VARCHAR UNIQUE, - at_cid VARCHAR, - sample_guid UUID NOT NULL, - analysis_method VARCHAR NOT NULL, -- PCA_PROJECTION_GMM, ADMIXTURE, FASTSTRUCTURE, etc. - panel_type VARCHAR, -- 'aims' (~5k SNPs) or 'genome-wide' (~500k SNPs) - reference_populations VARCHAR, -- '1000G_HGDP_v1', '1000G', 'HGDP', etc. - snps_analyzed INT, -- Total SNPs in the analysis panel - snps_with_genotype INT, -- SNPs with valid genotype calls - snps_missing INT, -- SNPs with no call or missing data - confidence_level DOUBLE PRECISION, -- Overall confidence 0.0-1.0 - pca_coordinates JSONB, -- First 3 PCA coordinates [x, y, z] - analysis_date TIMESTAMP, - pipeline_version VARCHAR, - reference_version VARCHAR, - deleted BOOLEAN DEFAULT FALSE, - created_at TIMESTAMP DEFAULT NOW(), - updated_at TIMESTAMP DEFAULT NOW() -); - -CREATE INDEX idx_population_breakdown_sample_guid ON population_breakdown(sample_guid); -CREATE INDEX idx_population_breakdown_at_uri ON population_breakdown(at_uri) WHERE at_uri IS NOT NULL; - -COMMENT ON TABLE population_breakdown IS 'Ancestry composition analysis results using PCA projection onto 1000G + HGDP reference populations'; - --- Population components (sub-continental level, ~33 populations) -CREATE TABLE population_component ( - id SERIAL PRIMARY KEY, - population_breakdown_id INT NOT NULL REFERENCES population_breakdown(id) ON DELETE CASCADE, - population_code VARCHAR NOT NULL, -- CEU, YRI, CHB, GIH, etc. - population_name VARCHAR, -- Northwestern European, Yoruba, Han Chinese, etc. - super_population VARCHAR, -- European, African, East Asian, South Asian, etc. - percentage DOUBLE PRECISION NOT NULL, -- 0.0-100.0 - confidence_lower DOUBLE PRECISION, -- 95% CI lower bound - confidence_upper DOUBLE PRECISION, -- 95% CI upper bound - rank INT -- Display rank by percentage (1 = highest) -); - -CREATE INDEX idx_population_component_breakdown ON population_component(population_breakdown_id); - -COMMENT ON TABLE population_component IS 'Individual population components in an ancestry breakdown (~33 reference populations)'; - --- Super-population summary (continental level, 9 super-populations) -CREATE TABLE super_population_summary ( - id SERIAL PRIMARY KEY, - population_breakdown_id INT NOT NULL REFERENCES population_breakdown(id) ON DELETE CASCADE, - super_population VARCHAR NOT NULL, -- European, African, East Asian, etc. - percentage DOUBLE PRECISION NOT NULL, -- Combined percentage 0.0-100.0 - populations JSONB -- Array of contributing population codes -); - -CREATE INDEX idx_super_population_breakdown ON super_population_summary(population_breakdown_id); - -COMMENT ON TABLE super_population_summary IS 'Aggregated ancestry at continental level (9 super-populations)'; - --- Seed reference populations lookup table if it doesn't exist with all codes --- First check if population table exists and add missing populations -INSERT INTO population (population_name) -SELECT unnest(ARRAY[ - 'CEU', 'FIN', 'GBR', 'IBS', 'TSI', -- European - 'YRI', 'LWK', 'ESN', 'MSL', 'GWD', -- African - 'CHB', 'JPT', 'KHV', 'CHS', 'CDX', -- East Asian - 'GIH', 'PJL', 'BEB', 'STU', 'ITU', -- South Asian - 'MXL', 'PUR', 'PEL', 'CLM', -- Americas - 'Druze', 'Palestinian', 'Bedouin', -- West Asian (HGDP) - 'Papuan', 'Melanesian', -- Oceanian (HGDP) - 'Yakut', -- Central Asian (HGDP) - 'Maya', 'Pima', 'Karitiana' -- Native American (HGDP) -]) -ON CONFLICT (population_name) DO NOTHING; - --- !Downs - -DROP TABLE IF EXISTS super_population_summary; -DROP TABLE IF EXISTS population_component; -DROP TABLE IF EXISTS population_breakdown; diff --git a/conf/evolutions/default/39.sql b/conf/evolutions/default/39.sql deleted file mode 100644 index a572a056..00000000 --- a/conf/evolutions/default/39.sql +++ /dev/null @@ -1,38 +0,0 @@ --- !Ups - --- Genotype data table for chip/array-based genetic data --- Stores metadata about SNP array files and their quality metrics -CREATE TABLE genotype_data ( - id SERIAL PRIMARY KEY, - at_uri VARCHAR UNIQUE, - at_cid VARCHAR, - sample_guid UUID NOT NULL, - test_type_id INT REFERENCES test_type_definition(id), - provider VARCHAR, -- 23andMe, AncestryDNA, FTDNA, LivingDNA, MyHeritage - chip_version VARCHAR, - build_version VARCHAR, -- GRCh37, GRCh38 - source_file_hash VARCHAR, -- SHA-256 for deduplication - -- Metrics consolidated into JSONB to reduce column count - -- Contains: totalMarkersCalled, totalMarkersPossible, callRate, noCallRate, - -- yMarkersCalled, yMarkersTotal, mtMarkersCalled, mtMarkersTotal, - -- autosomalMarkersCalled, hetRate, testDate, processedAt, - -- derivedYHaplogroup, derivedMtHaplogroup, files - metrics JSONB NOT NULL DEFAULT '{}', - population_breakdown_id INT REFERENCES population_breakdown(id), - deleted BOOLEAN DEFAULT FALSE, - created_at TIMESTAMP DEFAULT NOW(), - updated_at TIMESTAMP DEFAULT NOW() -); - -CREATE INDEX idx_genotype_sample_guid ON genotype_data(sample_guid); -CREATE INDEX idx_genotype_test_type ON genotype_data(test_type_id); -CREATE INDEX idx_genotype_at_uri ON genotype_data(at_uri) WHERE at_uri IS NOT NULL; -CREATE INDEX idx_genotype_provider ON genotype_data(provider); - -COMMENT ON TABLE genotype_data IS 'SNP array/chip genotype data with quality metrics and derived haplogroups'; -COMMENT ON COLUMN genotype_data.source_file_hash IS 'SHA-256 hash for file deduplication'; -COMMENT ON COLUMN genotype_data.metrics IS 'JSONB containing: totalMarkersCalled, totalMarkersPossible, callRate, noCallRate, yMarkersCalled, yMarkersTotal, mtMarkersCalled, mtMarkersTotal, autosomalMarkersCalled, hetRate, testDate, processedAt, derivedYHaplogroup, derivedMtHaplogroup, files'; - --- !Downs - -DROP TABLE IF EXISTS genotype_data; diff --git a/conf/evolutions/default/4.sql b/conf/evolutions/default/4.sql deleted file mode 100644 index 333eeb15..00000000 --- a/conf/evolutions/default/4.sql +++ /dev/null @@ -1,25 +0,0 @@ -# --- !Ups ---- Add revision tracking table -CREATE TABLE relationship_revision_metadata -( - haplogroup_relationship_id INT NOT NULL, - revision_id INT NOT NULL, - author VARCHAR(255) NOT NULL, - timestamp TIMESTAMP NOT NULL, - comment TEXT NOT NULL, - change_type VARCHAR(50) NOT NULL, - previous_revision_id INT, - PRIMARY KEY (haplogroup_relationship_id, revision_id), - FOREIGN KEY (haplogroup_relationship_id) - REFERENCES haplogroup_relationship (haplogroup_relationship_id) - ON DELETE CASCADE -); - --- Indexes for common queries -CREATE INDEX idx_revision_metadata_author ON relationship_revision_metadata (author); -CREATE INDEX idx_revision_metadata_timestamp ON relationship_revision_metadata (timestamp); -CREATE INDEX idx_revision_metadata_change_type ON relationship_revision_metadata (change_type); - -# --- !Downs - -DROP TABLE relationship_revision_metadata; \ No newline at end of file diff --git a/conf/evolutions/default/40.sql b/conf/evolutions/default/40.sql deleted file mode 100644 index 3c350565..00000000 --- a/conf/evolutions/default/40.sql +++ /dev/null @@ -1,71 +0,0 @@ --- !Ups - --- Haplogroup reconciliation table for multi-run/multi-biosample consensus --- Stored at specimen_donor level since a donor may have multiple biosamples --- from different testing companies or labs that need reconciliation - -CREATE TYPE dna_type AS ENUM ('Y_DNA', 'MT_DNA'); - -CREATE TABLE haplogroup_reconciliation ( - id SERIAL PRIMARY KEY, - at_uri VARCHAR UNIQUE, - at_cid VARCHAR, - specimen_donor_id INT NOT NULL REFERENCES specimen_donor(id), - dna_type dna_type NOT NULL, - - -- Reconciliation status metrics consolidated into JSONB - -- Contains: compatibilityLevel, consensusHaplogroup, statusConfidence, - -- branchCompatibilityScore, snpConcordance, runCount, warnings - status JSONB NOT NULL DEFAULT '{}', - - -- Run calls stored as JSONB array of RunHaplogroupCall objects - -- Each call: { sourceRef, haplogroup, confidence, callMethod, score, - -- supportingSnps, conflictingSnps, noCalls, technology, - -- meanCoverage, treeVersion, strPrediction } - run_calls JSONB NOT NULL, - - -- Optional conflict/heteroplasmy data - -- Each conflict: { position, snpName, contigAccession, calls[], resolution, resolvedValue } - snp_conflicts JSONB, - - -- Each observation: { position, majorAllele, minorAllele, majorAlleleFrequency, - -- depth, isDefiningSnp, affectedHaplogroup } - heteroplasmy_observations JSONB, - - -- Identity verification metrics - -- { kinshipCoefficient, fingerprintSnpConcordance, yStrDistance, - -- verificationStatus, verificationMethod } - identity_verification JSONB, - - -- Manual override if user corrected the consensus - -- { overriddenHaplogroup, reason, overriddenAt, overriddenBy } - manual_override JSONB, - - -- Audit log of reconciliation changes - -- Each entry: { timestamp, action, previousConsensus, newConsensus, runRef, notes } - audit_log JSONB, - - last_reconciliation_at TIMESTAMP, - deleted BOOLEAN DEFAULT FALSE, - created_at TIMESTAMP DEFAULT NOW(), - updated_at TIMESTAMP DEFAULT NOW() -); - --- Unique constraint: one reconciliation per donor per DNA type -CREATE UNIQUE INDEX idx_reconciliation_donor_dna_type - ON haplogroup_reconciliation(specimen_donor_id, dna_type) - WHERE deleted = FALSE; - -CREATE INDEX idx_reconciliation_specimen_donor ON haplogroup_reconciliation(specimen_donor_id); -CREATE INDEX idx_reconciliation_at_uri ON haplogroup_reconciliation(at_uri) WHERE at_uri IS NOT NULL; --- Index on JSONB field for consensus haplogroup queries -CREATE INDEX idx_reconciliation_consensus ON haplogroup_reconciliation((status->>'consensusHaplogroup')); - -COMMENT ON TABLE haplogroup_reconciliation IS 'Multi-run haplogroup reconciliation at specimen donor level'; -COMMENT ON COLUMN haplogroup_reconciliation.run_calls IS 'Array of RunHaplogroupCall objects from each source (runs, alignments, STR profiles)'; -COMMENT ON COLUMN haplogroup_reconciliation.status IS 'JSONB containing: compatibilityLevel, consensusHaplogroup, statusConfidence, branchCompatibilityScore (LCA_depth / max(depth_A, depth_B) - 1.0 = fully compatible), snpConcordance, runCount, warnings'; - --- !Downs - -DROP TABLE IF EXISTS haplogroup_reconciliation; -DROP TYPE IF EXISTS dna_type; diff --git a/conf/evolutions/default/41.sql b/conf/evolutions/default/41.sql deleted file mode 100644 index 2578c3de..00000000 --- a/conf/evolutions/default/41.sql +++ /dev/null @@ -1,56 +0,0 @@ --- !Ups - --- Move user_pds_info from public schema to auth schema --- This table stores where each user's AT Protocol identity lives (their home PDS) - --- Step 1: Create the new table in auth schema -CREATE TABLE auth.user_pds_info -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - user_id UUID UNIQUE NOT NULL, - pds_url VARCHAR(512) NOT NULL, -- Increased length for longer PDS URLs - did VARCHAR(255) UNIQUE NOT NULL, - handle VARCHAR(255), -- Cache the resolved handle - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - CONSTRAINT fk_auth_user_pds_info_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE -); - --- Step 2: Migrate existing data -INSERT INTO auth.user_pds_info (id, user_id, pds_url, did, created_at, updated_at) -SELECT id, user_id, pds_url, did, created_at, updated_at -FROM public.user_pds_info; - --- Step 3: Drop the old table -DROP TABLE public.user_pds_info; - --- Step 4: Add indexes for common lookups -CREATE INDEX idx_auth_user_pds_info_did ON auth.user_pds_info(did); -CREATE INDEX idx_auth_user_pds_info_handle ON auth.user_pds_info(handle) WHERE handle IS NOT NULL; - -COMMENT ON TABLE auth.user_pds_info IS 'Stores the home PDS URL for each user - where their AT Protocol identity lives'; -COMMENT ON COLUMN auth.user_pds_info.pds_url IS 'The resolved PDS endpoint URL (e.g., https://bsky.social or https://pds.decodingus.com)'; -COMMENT ON COLUMN auth.user_pds_info.handle IS 'Cached handle for quick lookups without re-resolution'; - --- !Downs - --- Recreate the table in public schema -CREATE TABLE public.user_pds_info -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - user_id UUID UNIQUE NOT NULL, - pds_url VARCHAR(255) NOT NULL, - did VARCHAR(255) UNIQUE NOT NULL, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - CONSTRAINT fk_user_pds_info_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE, - CONSTRAINT fk_user_pds_info_did FOREIGN KEY (did) REFERENCES public.users (did) ON DELETE CASCADE -); - --- Migrate data back -INSERT INTO public.user_pds_info (id, user_id, pds_url, did, created_at, updated_at) -SELECT id, user_id, pds_url, did, created_at, updated_at -FROM auth.user_pds_info; - --- Drop the auth table -DROP TABLE auth.user_pds_info; diff --git a/conf/evolutions/default/42.sql b/conf/evolutions/default/42.sql deleted file mode 100644 index 16140219..00000000 --- a/conf/evolutions/default/42.sql +++ /dev/null @@ -1,29 +0,0 @@ --- !Ups - --- Rename email_encrypted back to email and ensure CITEXT for case-insensitive uniqueness --- Drop any existing constraints on the column (both possible names) -ALTER TABLE public.users DROP CONSTRAINT IF EXISTS users_email_encrypted_key; -ALTER TABLE public.users DROP CONSTRAINT IF EXISTS users_email_key; - --- Rename the column -ALTER TABLE public.users RENAME COLUMN email_encrypted TO email; - --- Change type to CITEXT for case-insensitive comparison (if not already) -ALTER TABLE public.users ALTER COLUMN email TYPE CITEXT USING email::CITEXT; - --- Add unique constraint (case-insensitive via CITEXT) -ALTER TABLE public.users ADD CONSTRAINT users_email_key UNIQUE (email); - --- !Downs - --- Drop the unique constraint -ALTER TABLE public.users DROP CONSTRAINT IF EXISTS users_email_key; - --- Change type back to VARCHAR (stored encrypted values were text) -ALTER TABLE public.users ALTER COLUMN email TYPE VARCHAR(255); - --- Rename back to email_encrypted -ALTER TABLE public.users RENAME COLUMN email TO email_encrypted; - --- Re-add the original constraint -ALTER TABLE public.users ADD CONSTRAINT users_email_encrypted_key UNIQUE (email_encrypted); diff --git a/conf/evolutions/default/43.sql b/conf/evolutions/default/43.sql deleted file mode 100644 index 9d7b1866..00000000 --- a/conf/evolutions/default/43.sql +++ /dev/null @@ -1,31 +0,0 @@ --- !Ups - --- Cookie consent tracking for GDPR compliance --- Tracks when users accept the cookie policy - -CREATE TABLE auth.cookie_consents ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - user_id UUID REFERENCES public.users(id) ON DELETE CASCADE, - session_id VARCHAR(255), -- For anonymous users before login - ip_address_hash VARCHAR(64), -- Hashed for privacy, used for anonymous consent - consent_given BOOLEAN NOT NULL DEFAULT FALSE, - consent_timestamp TIMESTAMP NOT NULL DEFAULT NOW(), - policy_version VARCHAR(20) NOT NULL DEFAULT '1.0', -- Track which version they accepted - user_agent TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - - -- Either user_id or session_id must be present - CONSTRAINT chk_consent_identity CHECK (user_id IS NOT NULL OR session_id IS NOT NULL) -); - --- Index for quick lookups -CREATE INDEX idx_cookie_consents_user_id ON auth.cookie_consents(user_id) WHERE user_id IS NOT NULL; -CREATE INDEX idx_cookie_consents_session_id ON auth.cookie_consents(session_id) WHERE session_id IS NOT NULL; - -COMMENT ON TABLE auth.cookie_consents IS 'Tracks user acceptance of cookie policy for GDPR compliance'; -COMMENT ON COLUMN auth.cookie_consents.policy_version IS 'Version of the cookie policy the user accepted'; -COMMENT ON COLUMN auth.cookie_consents.ip_address_hash IS 'SHA-256 hash of IP address for anonymous consent tracking'; - --- !Downs - -DROP TABLE auth.cookie_consents; diff --git a/conf/evolutions/default/44.sql b/conf/evolutions/default/44.sql deleted file mode 100644 index 4f26a92a..00000000 --- a/conf/evolutions/default/44.sql +++ /dev/null @@ -1,60 +0,0 @@ --- !Ups - --- Support schema for contact/messaging system -CREATE SCHEMA support; - --- Contact messages from users (both authenticated and anonymous) -CREATE TABLE support.contact_messages ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- For authenticated users - user_id UUID REFERENCES public.users(id) ON DELETE SET NULL, - - -- For anonymous users (captured from form) - sender_name VARCHAR(255), - sender_email VARCHAR(255), - - -- Message content - subject VARCHAR(500) NOT NULL, - message TEXT NOT NULL, - - -- Status tracking - status VARCHAR(50) NOT NULL DEFAULT 'new', -- new, read, replied, closed - - -- Metadata - ip_address_hash VARCHAR(64), - user_agent TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW() -); - --- Admin replies to contact messages -CREATE TABLE support.message_replies ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - message_id UUID NOT NULL REFERENCES support.contact_messages(id) ON DELETE CASCADE, - admin_user_id UUID NOT NULL REFERENCES public.users(id) ON DELETE RESTRICT, - reply_text TEXT NOT NULL, - - -- For anonymous users, track if email was sent - email_sent BOOLEAN NOT NULL DEFAULT FALSE, - email_sent_at TIMESTAMP, - - created_at TIMESTAMP NOT NULL DEFAULT NOW() -); - --- Indexes -CREATE INDEX idx_contact_messages_user_id ON support.contact_messages(user_id) WHERE user_id IS NOT NULL; -CREATE INDEX idx_contact_messages_status ON support.contact_messages(status); -CREATE INDEX idx_contact_messages_created_at ON support.contact_messages(created_at DESC); -CREATE INDEX idx_message_replies_message_id ON support.message_replies(message_id); - -COMMENT ON SCHEMA support IS 'Support ticket and contact message system'; -COMMENT ON TABLE support.contact_messages IS 'Contact form submissions from authenticated and anonymous users'; -COMMENT ON TABLE support.message_replies IS 'Admin replies to contact messages'; -COMMENT ON COLUMN support.contact_messages.status IS 'Message status: new, read, replied, closed'; - --- !Downs - -DROP TABLE support.message_replies; -DROP TABLE support.contact_messages; -DROP SCHEMA support; diff --git a/conf/evolutions/default/45.sql b/conf/evolutions/default/45.sql deleted file mode 100644 index 9d1b9986..00000000 --- a/conf/evolutions/default/45.sql +++ /dev/null @@ -1,11 +0,0 @@ --- !Ups - --- Track when authenticated users last viewed their message history --- This allows us to show a badge for new replies since their last visit -ALTER TABLE support.contact_messages ADD COLUMN user_last_viewed_at TIMESTAMP; - -COMMENT ON COLUMN support.contact_messages.user_last_viewed_at IS 'Timestamp when authenticated user last viewed this message thread'; - --- !Downs - -ALTER TABLE support.contact_messages DROP COLUMN user_last_viewed_at; diff --git a/conf/evolutions/default/46.sql b/conf/evolutions/default/46.sql deleted file mode 100644 index 17aa03ff..00000000 --- a/conf/evolutions/default/46.sql +++ /dev/null @@ -1,72 +0,0 @@ --- !Ups - --- Add TreeCurator role -INSERT INTO auth.roles (id, name, description, created_at, updated_at) -VALUES (gen_random_uuid(), 'TreeCurator', 'Curator access for haplogroups and variants', NOW(), NOW()) -ON CONFLICT (name) DO NOTHING; - --- Create curator permissions -INSERT INTO auth.permissions (id, name, description, created_at, updated_at) VALUES - (gen_random_uuid(), 'haplogroup.view', 'View haplogroup details', NOW(), NOW()), - (gen_random_uuid(), 'haplogroup.create', 'Create new haplogroups', NOW(), NOW()), - (gen_random_uuid(), 'haplogroup.update', 'Update existing haplogroups', NOW(), NOW()), - (gen_random_uuid(), 'haplogroup.delete', 'Delete haplogroups', NOW(), NOW()), - (gen_random_uuid(), 'variant.view', 'View variant details', NOW(), NOW()), - (gen_random_uuid(), 'variant.create', 'Create new variants', NOW(), NOW()), - (gen_random_uuid(), 'variant.update', 'Update existing variants', NOW(), NOW()), - (gen_random_uuid(), 'variant.delete', 'Delete variants', NOW(), NOW()), - (gen_random_uuid(), 'audit.view', 'View audit history', NOW(), NOW()) -ON CONFLICT (name) DO NOTHING; - --- Grant all curator permissions to TreeCurator role -INSERT INTO auth.role_permissions (role_id, permission_id) -SELECT r.id, p.id FROM auth.roles r, auth.permissions p -WHERE r.name = 'TreeCurator' - AND p.name IN ('haplogroup.view', 'haplogroup.create', 'haplogroup.update', 'haplogroup.delete', - 'variant.view', 'variant.create', 'variant.update', 'variant.delete', 'audit.view') -ON CONFLICT DO NOTHING; - --- Grant all curator permissions to Admin role -INSERT INTO auth.role_permissions (role_id, permission_id) -SELECT r.id, p.id FROM auth.roles r, auth.permissions p -WHERE r.name = 'Admin' - AND p.name IN ('haplogroup.view', 'haplogroup.create', 'haplogroup.update', 'haplogroup.delete', - 'variant.view', 'variant.create', 'variant.update', 'variant.delete', 'audit.view') -ON CONFLICT DO NOTHING; - --- Create curator schema -CREATE SCHEMA IF NOT EXISTS curator; - --- Create audit_log table -CREATE TABLE curator.audit_log ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - user_id UUID NOT NULL, - entity_type VARCHAR(50) NOT NULL, - entity_id INT NOT NULL, - action VARCHAR(20) NOT NULL, - old_value JSONB, - new_value JSONB, - comment TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - CONSTRAINT fk_audit_log_user_id FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE SET NULL -); - -CREATE INDEX idx_audit_log_entity ON curator.audit_log(entity_type, entity_id); -CREATE INDEX idx_audit_log_user ON curator.audit_log(user_id); -CREATE INDEX idx_audit_log_created_at ON curator.audit_log(created_at DESC); - -COMMENT ON TABLE curator.audit_log IS 'Audit trail for all curator actions on haplogroups and variants'; - --- !Downs - -DROP TABLE IF EXISTS curator.audit_log; -DROP SCHEMA IF EXISTS curator; - -DELETE FROM auth.role_permissions -WHERE permission_id IN (SELECT id FROM auth.permissions WHERE name LIKE 'haplogroup.%' OR name LIKE 'variant.%' OR name = 'audit.view'); - -DELETE FROM auth.permissions -WHERE name IN ('haplogroup.view', 'haplogroup.create', 'haplogroup.update', 'haplogroup.delete', - 'variant.view', 'variant.create', 'variant.update', 'variant.delete', 'audit.view'); - -DELETE FROM auth.roles WHERE name = 'TreeCurator'; diff --git a/conf/evolutions/default/47.sql b/conf/evolutions/default/47.sql deleted file mode 100644 index 4acc1e67..00000000 --- a/conf/evolutions/default/47.sql +++ /dev/null @@ -1,36 +0,0 @@ -# --- !Ups - --- Variant Alias Table --- Stores alternative names for variants from different sources (YBrowse, ISOGG, YFull, publications, etc.) --- A single variant may be known by multiple names across different research groups. - -CREATE TABLE variant_alias ( - id SERIAL PRIMARY KEY, - variant_id INT NOT NULL REFERENCES variant(variant_id) ON DELETE CASCADE, - alias_type VARCHAR(50) NOT NULL, -- 'common_name', 'rs_id', 'isogg', 'yfull', 'ftdna', etc. - alias_value VARCHAR(255) NOT NULL, - source VARCHAR(255), -- Origin: 'ybrowse', 'isogg', 'curator', 'yfull', etc. - is_primary BOOLEAN DEFAULT FALSE, -- Primary alias for this type (for display preference) - created_at TIMESTAMP DEFAULT NOW() NOT NULL, - UNIQUE(variant_id, alias_type, alias_value) -); - -CREATE INDEX idx_variant_alias_variant ON variant_alias(variant_id); -CREATE INDEX idx_variant_alias_value ON variant_alias(alias_value); -CREATE INDEX idx_variant_alias_type_value ON variant_alias(alias_type, alias_value); - --- Migrate existing names to alias table --- This preserves the current common_name and rs_id as aliases -INSERT INTO variant_alias (variant_id, alias_type, alias_value, source, is_primary) -SELECT variant_id, 'common_name', common_name, 'migration', TRUE -FROM variant -WHERE common_name IS NOT NULL; - -INSERT INTO variant_alias (variant_id, alias_type, alias_value, source, is_primary) -SELECT variant_id, 'rs_id', rs_id, 'migration', TRUE -FROM variant -WHERE rs_id IS NOT NULL; - -# --- !Downs - -DROP TABLE IF EXISTS variant_alias; diff --git a/conf/evolutions/default/48.sql b/conf/evolutions/default/48.sql deleted file mode 100644 index f38a4dad..00000000 --- a/conf/evolutions/default/48.sql +++ /dev/null @@ -1,32 +0,0 @@ --- # --- !Ups - --- Add branch age estimate columns to haplogroup table --- Dates stored as years before present (YBP) with optional confidence intervals - -ALTER TABLE tree.haplogroup - ADD COLUMN formed_ybp INTEGER, - ADD COLUMN formed_ybp_lower INTEGER, - ADD COLUMN formed_ybp_upper INTEGER, - ADD COLUMN tmrca_ybp INTEGER, - ADD COLUMN tmrca_ybp_lower INTEGER, - ADD COLUMN tmrca_ybp_upper INTEGER, - ADD COLUMN age_estimate_source VARCHAR(100); - -COMMENT ON COLUMN tree.haplogroup.formed_ybp IS 'Estimated years before present when branch formed (mutation occurred)'; -COMMENT ON COLUMN tree.haplogroup.formed_ybp_lower IS 'Lower bound of 95% confidence interval for formed date'; -COMMENT ON COLUMN tree.haplogroup.formed_ybp_upper IS 'Upper bound of 95% confidence interval for formed date'; -COMMENT ON COLUMN tree.haplogroup.tmrca_ybp IS 'Estimated years before present for Time to Most Recent Common Ancestor'; -COMMENT ON COLUMN tree.haplogroup.tmrca_ybp_lower IS 'Lower bound of 95% confidence interval for TMRCA'; -COMMENT ON COLUMN tree.haplogroup.tmrca_ybp_upper IS 'Upper bound of 95% confidence interval for TMRCA'; -COMMENT ON COLUMN tree.haplogroup.age_estimate_source IS 'Source of age estimates (e.g., YFull, internal calculation)'; - --- # --- !Downs - -ALTER TABLE tree.haplogroup - DROP COLUMN IF EXISTS formed_ybp, - DROP COLUMN IF EXISTS formed_ybp_lower, - DROP COLUMN IF EXISTS formed_ybp_upper, - DROP COLUMN IF EXISTS tmrca_ybp, - DROP COLUMN IF EXISTS tmrca_ybp_lower, - DROP COLUMN IF EXISTS tmrca_ybp_upper, - DROP COLUMN IF EXISTS age_estimate_source; diff --git a/conf/evolutions/default/49.sql b/conf/evolutions/default/49.sql deleted file mode 100644 index 05cd3b47..00000000 --- a/conf/evolutions/default/49.sql +++ /dev/null @@ -1,16 +0,0 @@ --- !Ups - --- Simplify reference genome naming in genbank_contig table --- - Remove patch versions: GRCh37.p13 -> GRCh37, GRCh38.p14 -> GRCh38 --- - Use UCSC convention for T2T-CHM13: T2T-CHM13v2.0 -> hs1 - -UPDATE genbank_contig SET reference_genome = 'GRCh37' WHERE reference_genome = 'GRCh37.p13'; -UPDATE genbank_contig SET reference_genome = 'GRCh38' WHERE reference_genome = 'GRCh38.p14'; -UPDATE genbank_contig SET reference_genome = 'hs1' WHERE reference_genome = 'T2T-CHM13v2.0'; - --- !Downs - --- Restore original reference genome naming -UPDATE genbank_contig SET reference_genome = 'GRCh37.p13' WHERE reference_genome = 'GRCh37'; -UPDATE genbank_contig SET reference_genome = 'GRCh38.p14' WHERE reference_genome = 'GRCh38'; -UPDATE genbank_contig SET reference_genome = 'T2T-CHM13v2.0' WHERE reference_genome = 'hs1'; diff --git a/conf/evolutions/default/5.sql b/conf/evolutions/default/5.sql deleted file mode 100644 index c3b2a4a3..00000000 --- a/conf/evolutions/default/5.sql +++ /dev/null @@ -1,18 +0,0 @@ -# --- !Ups ---- Add variant revision tracking table -CREATE TABLE haplogroup_variant_metadata ( - haplogroup_variant_id INT NOT NULL, - revision_id INT NOT NULL, - author VARCHAR(255) NOT NULL, - timestamp TIMESTAMP NOT NULL, - comment TEXT NOT NULL, - change_type VARCHAR(50) NOT NULL, - previous_revision_id INT, - PRIMARY KEY (haplogroup_variant_id, revision_id), - FOREIGN KEY (haplogroup_variant_id) - REFERENCES haplogroup_variant (haplogroup_variant_id) - ON DELETE CASCADE -); - -# --- !Downs -DROP TABLE haplogroup_variant_metadata; \ No newline at end of file diff --git a/conf/evolutions/default/50.sql b/conf/evolutions/default/50.sql deleted file mode 100644 index abdb4d8f..00000000 --- a/conf/evolutions/default/50.sql +++ /dev/null @@ -1,69 +0,0 @@ -# --- !Ups - --- Genome region structural annotations --- References existing genbank_contig table for chromosome data - --- Version tracking for ETag generation -CREATE TABLE genome_region_version ( - id SERIAL PRIMARY KEY, - reference_genome VARCHAR(20) NOT NULL UNIQUE, -- GRCh37, GRCh38, hs1 - data_version VARCHAR(20) NOT NULL, -- e.g., "2024.12.1" - updated_at TIMESTAMP DEFAULT NOW() -); - --- Structural regions (centromere, telomere, PAR, XTR, ampliconic, etc.) -CREATE TABLE genome_region ( - id SERIAL PRIMARY KEY, - genbank_contig_id INT NOT NULL REFERENCES genbank_contig(genbank_contig_id), - region_type VARCHAR(30) NOT NULL, -- Centromere, Telomere_P, Telomere_Q, PAR1, PAR2, XTR, Ampliconic, Palindrome, Heterochromatin, XDegenerate - name VARCHAR(50), -- For named regions (P1-P8 palindromes) - start_pos BIGINT NOT NULL, - end_pos BIGINT NOT NULL, - modifier DECIMAL(3,2), -- Quality modifier (0.1-1.0) - UNIQUE(genbank_contig_id, region_type, name, start_pos) -); - --- Cytoband annotations for ideogram display -CREATE TABLE cytoband ( - id SERIAL PRIMARY KEY, - genbank_contig_id INT NOT NULL REFERENCES genbank_contig(genbank_contig_id), - name VARCHAR(20) NOT NULL, -- p11.32, q11.21, etc. - start_pos BIGINT NOT NULL, - end_pos BIGINT NOT NULL, - stain VARCHAR(10) NOT NULL, -- gneg, gpos25, gpos50, gpos75, gpos100, acen, gvar, stalk - UNIQUE(genbank_contig_id, name) -); - --- STR marker positions -CREATE TABLE str_marker ( - id SERIAL PRIMARY KEY, - genbank_contig_id INT NOT NULL REFERENCES genbank_contig(genbank_contig_id), - name VARCHAR(30) NOT NULL, -- DYS389I, DYS456, etc. - start_pos BIGINT NOT NULL, - end_pos BIGINT NOT NULL, - period INT NOT NULL, -- Repeat unit length in bp - verified BOOLEAN DEFAULT false, - note TEXT, - UNIQUE(genbank_contig_id, name) -); - --- Insert initial version records -INSERT INTO genome_region_version (reference_genome, data_version) VALUES - ('GRCh37', '2024.12.1'), - ('GRCh38', '2024.12.1'), - ('hs1', '2024.12.1'); - -CREATE INDEX idx_genome_region_contig ON genome_region(genbank_contig_id); -CREATE INDEX idx_cytoband_contig ON cytoband(genbank_contig_id); -CREATE INDEX idx_str_marker_contig ON str_marker(genbank_contig_id); - -# --- !Downs - -DROP INDEX IF EXISTS idx_str_marker_contig; -DROP INDEX IF EXISTS idx_cytoband_contig; -DROP INDEX IF EXISTS idx_genome_region_contig; - -DROP TABLE IF EXISTS str_marker; -DROP TABLE IF EXISTS cytoband; -DROP TABLE IF EXISTS genome_region; -DROP TABLE IF EXISTS genome_region_version; diff --git a/conf/evolutions/default/51.sql b/conf/evolutions/default/51.sql deleted file mode 100644 index 200c24a1..00000000 --- a/conf/evolutions/default/51.sql +++ /dev/null @@ -1,43 +0,0 @@ --- !Ups - --- Genome regions curator permissions -INSERT INTO auth.permissions (id, name, description, created_at, updated_at) VALUES - (gen_random_uuid(), 'genome_region.view', 'View genome region details', NOW(), NOW()), - (gen_random_uuid(), 'genome_region.create', 'Create genome regions', NOW(), NOW()), - (gen_random_uuid(), 'genome_region.update', 'Update genome regions', NOW(), NOW()), - (gen_random_uuid(), 'genome_region.delete', 'Delete genome regions', NOW(), NOW()), - (gen_random_uuid(), 'cytoband.view', 'View cytoband details', NOW(), NOW()), - (gen_random_uuid(), 'cytoband.create', 'Create cytobands', NOW(), NOW()), - (gen_random_uuid(), 'cytoband.update', 'Update cytobands', NOW(), NOW()), - (gen_random_uuid(), 'cytoband.delete', 'Delete cytobands', NOW(), NOW()), - (gen_random_uuid(), 'str_marker.view', 'View STR marker details', NOW(), NOW()), - (gen_random_uuid(), 'str_marker.create', 'Create STR markers', NOW(), NOW()), - (gen_random_uuid(), 'str_marker.update', 'Update STR markers', NOW(), NOW()), - (gen_random_uuid(), 'str_marker.delete', 'Delete STR markers', NOW(), NOW()) -ON CONFLICT (name) DO NOTHING; - --- Grant to Curator role -INSERT INTO auth.role_permissions (role_id, permission_id) -SELECT r.id, p.id FROM auth.roles r, auth.permissions p -WHERE r.name = 'Curator' - AND p.name IN ('genome_region.view', 'genome_region.create', 'genome_region.update', 'genome_region.delete', - 'cytoband.view', 'cytoband.create', 'cytoband.update', 'cytoband.delete', - 'str_marker.view', 'str_marker.create', 'str_marker.update', 'str_marker.delete') -ON CONFLICT DO NOTHING; - --- Grant to Admin role -INSERT INTO auth.role_permissions (role_id, permission_id) -SELECT r.id, p.id FROM auth.roles r, auth.permissions p -WHERE r.name = 'Admin' - AND p.name IN ('genome_region.view', 'genome_region.create', 'genome_region.update', 'genome_region.delete', - 'cytoband.view', 'cytoband.create', 'cytoband.update', 'cytoband.delete', - 'str_marker.view', 'str_marker.create', 'str_marker.update', 'str_marker.delete') -ON CONFLICT DO NOTHING; - --- !Downs - -DELETE FROM auth.role_permissions -WHERE permission_id IN (SELECT id FROM auth.permissions WHERE name LIKE 'genome_region.%' OR name LIKE 'cytoband.%' OR name LIKE 'str_marker.%'); - -DELETE FROM auth.permissions -WHERE name LIKE 'genome_region.%' OR name LIKE 'cytoband.%' OR name LIKE 'str_marker.%'; diff --git a/conf/evolutions/default/52.sql b/conf/evolutions/default/52.sql deleted file mode 100644 index 44e581f9..00000000 --- a/conf/evolutions/default/52.sql +++ /dev/null @@ -1,15 +0,0 @@ -# --- !Ups - --- Add provenance JSONB column to haplogroup table for multi-source attribution tracking -ALTER TABLE tree.haplogroup ADD COLUMN provenance JSONB; - --- Add GIN index for efficient querying by provenance fields -CREATE INDEX idx_haplogroup_provenance ON tree.haplogroup USING GIN (provenance); - --- Add comment for documentation -COMMENT ON COLUMN tree.haplogroup.provenance IS 'JSONB tracking node and variant provenance from multiple sources. Structure: {primaryCredit, nodeProvenance[], variantProvenance{}, lastMergedAt, lastMergedFrom}'; - -# --- !Downs - -DROP INDEX IF EXISTS tree.idx_haplogroup_provenance; -ALTER TABLE tree.haplogroup DROP COLUMN IF EXISTS provenance; diff --git a/conf/evolutions/default/53.sql b/conf/evolutions/default/53.sql deleted file mode 100644 index d0d283f6..00000000 --- a/conf/evolutions/default/53.sql +++ /dev/null @@ -1,251 +0,0 @@ -# --- !Ups - --- ============================================================================== --- VARIANT_V2: Consolidated variant table with JSONB coordinates and aliases --- Replaces: variant, variant_alias tables --- Reference: documents/proposals/variant-schema-simplification.md --- ============================================================================== - --- mutation_type values: --- Point mutations: SNP, INDEL, MNP --- Repeat variations: STR --- Structural variants: DEL, DUP, INS, INV, CNV, TRANS --- naming_status values: UNNAMED, PENDING_REVIEW, NAMED --- aliases structure: {common_names: [], rs_ids: [], sources: {ybrowse: [], isogg: [], ...}} --- coordinates structure: {hs1: {contig, position, ref, alt}, GRCh38: {...}, ...} - -CREATE TABLE variant_v2 ( - variant_id SERIAL PRIMARY KEY, - canonical_name TEXT, - mutation_type TEXT NOT NULL DEFAULT 'SNP', - naming_status TEXT NOT NULL DEFAULT 'UNNAMED', - aliases JSONB DEFAULT '{}'::jsonb, - coordinates JSONB DEFAULT '{}'::jsonb, - defining_haplogroup_id INTEGER REFERENCES tree.haplogroup(haplogroup_id) ON DELETE SET NULL, - evidence JSONB DEFAULT '{}'::jsonb, - primers JSONB DEFAULT '{}'::jsonb, - notes TEXT, - created_at TIMESTAMPTZ DEFAULT NOW(), - updated_at TIMESTAMPTZ DEFAULT NOW() -); - -COMMENT ON TABLE variant_v2 IS 'Consolidated variant table with JSONB coordinates supporting multiple reference genomes. One row per logical variant.'; -COMMENT ON COLUMN variant_v2.canonical_name IS 'Primary variant name (e.g., M269, DYS456). NULL for unnamed/novel variants.'; -COMMENT ON COLUMN variant_v2.mutation_type IS 'Variant type: SNP, INDEL, MNP (point) | STR (repeat) | DEL, DUP, INS, INV, CNV, TRANS (structural)'; -COMMENT ON COLUMN variant_v2.aliases IS 'JSONB containing all known names: {common_names: [], rs_ids: [], sources: {source: [names]}}'; -COMMENT ON COLUMN variant_v2.coordinates IS 'Per-assembly coordinates. Structure varies by mutation_type. hs1 is primary reference.'; -COMMENT ON COLUMN variant_v2.defining_haplogroup_id IS 'Haplogroup this variant defines. Distinguishes parallel mutations (same name, different lineages).'; - --- Unique constraint for named variants (allows parallel mutations with different haplogroups) -CREATE UNIQUE INDEX idx_variant_v2_name_haplogroup - ON variant_v2(canonical_name, COALESCE(defining_haplogroup_id, -1)) - WHERE canonical_name IS NOT NULL; - --- For unnamed variants, uniqueness based on hs1 coordinates (primary reference) -CREATE UNIQUE INDEX idx_variant_v2_unnamed_coordinates - ON variant_v2( - (coordinates->'hs1'->>'contig'), - ((coordinates->'hs1'->>'position')::int), - (coordinates->'hs1'->>'ref'), - (coordinates->'hs1'->>'alt') - ) - WHERE canonical_name IS NULL AND coordinates ? 'hs1'; - --- Performance indexes -CREATE INDEX idx_variant_v2_canonical ON variant_v2(canonical_name); -CREATE INDEX idx_variant_v2_aliases ON variant_v2 USING GIN(aliases); -CREATE INDEX idx_variant_v2_coordinates ON variant_v2 USING GIN(coordinates); -CREATE INDEX idx_variant_v2_mutation_type ON variant_v2(mutation_type); -CREATE INDEX idx_variant_v2_defining_haplogroup ON variant_v2(defining_haplogroup_id); - --- Search index for alias common_names array -CREATE INDEX idx_variant_v2_alias_common_names ON variant_v2 - USING GIN((aliases->'common_names') jsonb_path_ops); - --- ============================================================================== --- SUPPORTING TABLES: ASR and branch mutation tracking --- ============================================================================== - --- Haplogroup character states (ASR reconstructed states at tree nodes) -CREATE TABLE haplogroup_character_state ( - id SERIAL PRIMARY KEY, - haplogroup_id INT NOT NULL REFERENCES tree.haplogroup(haplogroup_id) ON DELETE CASCADE, - variant_id INT NOT NULL REFERENCES variant_v2(variant_id) ON DELETE CASCADE, - - -- The inferred state at this node - -- For SNPs: "ancestral" or "derived" (or the actual allele: "G", "A") - -- For STRs: the repeat count as string (e.g., "15") or "NULL" for null alleles - inferred_state TEXT NOT NULL, - - -- Confidence from ASR algorithm - confidence DECIMAL(5,4), -- 0.0000 to 1.0000 - - -- For uncertain reconstructions: probability distribution over states - state_probabilities JSONB, - -- Example: {"13": 0.05, "14": 0.25, "15": 0.65, "16": 0.05} - - -- ASR metadata - algorithm TEXT, -- "parsimony", "ml", "bayesian" - reconstructed_at TIMESTAMPTZ DEFAULT NOW(), - - UNIQUE(haplogroup_id, variant_id) -); - -CREATE INDEX idx_character_state_haplogroup ON haplogroup_character_state(haplogroup_id); -CREATE INDEX idx_character_state_variant ON haplogroup_character_state(variant_id); - -COMMENT ON TABLE haplogroup_character_state IS 'ASR reconstructed character states at haplogroup nodes. Replaces haplogroup_ancestral_str concept.'; -COMMENT ON COLUMN haplogroup_character_state.inferred_state IS 'Inferred state: SNP allele, STR repeat count, SV presence, etc.'; - --- Branch mutations (state changes along tree branches) -CREATE TABLE branch_mutation ( - id SERIAL PRIMARY KEY, - variant_id INT NOT NULL REFERENCES variant_v2(variant_id) ON DELETE CASCADE, - - -- The branch where the mutation occurred (parent -> child) - parent_haplogroup_id INT NOT NULL REFERENCES tree.haplogroup(haplogroup_id) ON DELETE CASCADE, - child_haplogroup_id INT NOT NULL REFERENCES tree.haplogroup(haplogroup_id) ON DELETE CASCADE, - - -- State transition - from_state TEXT NOT NULL, -- "G" or "15" - to_state TEXT NOT NULL, -- "A" or "16" - - -- For STRs: direction of change (+1 = expansion, -1 = contraction, NULL for SNPs) - step_direction INT, - - -- Confidence from ASR - confidence DECIMAL(5,4), - - UNIQUE(variant_id, parent_haplogroup_id, child_haplogroup_id) -); - -CREATE INDEX idx_branch_mutation_child ON branch_mutation(child_haplogroup_id); -CREATE INDEX idx_branch_mutation_parent ON branch_mutation(parent_haplogroup_id); -CREATE INDEX idx_branch_mutation_variant ON branch_mutation(variant_id); - -COMMENT ON TABLE branch_mutation IS 'State transitions along tree branches for all variant types (SNP, STR, SV).'; - --- Biosample variant calls (observed values from samples, input to ASR) -CREATE TABLE biosample_variant_call ( - id SERIAL PRIMARY KEY, - biosample_id INT NOT NULL REFERENCES biosample(id) ON DELETE CASCADE, - variant_id INT NOT NULL REFERENCES variant_v2(variant_id) ON DELETE CASCADE, - - -- The observed state - -- For SNPs: "ref", "alt", "het", or actual alleles - -- For STRs: repeat count as string (e.g., "15") or "NULL" - observed_state TEXT NOT NULL, - - -- Call quality metrics - quality_score INT, - read_depth INT, - confidence TEXT, -- "high", "medium", "low" - - -- Source attribution - source TEXT, -- "ftdna", "yfull", "user_upload" - created_at TIMESTAMPTZ DEFAULT NOW(), - - UNIQUE(biosample_id, variant_id) -); - -CREATE INDEX idx_biosample_variant_call_biosample ON biosample_variant_call(biosample_id); -CREATE INDEX idx_biosample_variant_call_variant ON biosample_variant_call(variant_id); - -COMMENT ON TABLE biosample_variant_call IS 'Observed variant calls from biosamples. Input data for ASR.'; - --- STR mutation rates (reference data for ASR and age estimation) -CREATE TABLE str_mutation_rate ( - id SERIAL PRIMARY KEY, - marker_name TEXT NOT NULL UNIQUE, -- DYS456, DYS389I, etc. - panel_names TEXT[], -- PowerPlex, YHRD, BigY, etc. - - -- Mutation rate per generation - mutation_rate DECIMAL(12,10) NOT NULL, - mutation_rate_lower DECIMAL(12,10), -- 95% CI lower - mutation_rate_upper DECIMAL(12,10), -- 95% CI upper - - -- Directional bias (for stepwise mutation model) - omega_plus DECIMAL(5,4) DEFAULT 0.5, -- Probability of expansion - omega_minus DECIMAL(5,4) DEFAULT 0.5, -- Probability of contraction - - -- Multi-step mutation frequencies - multi_step_rate DECIMAL(5,4), -- omega_2 + omega_3 + ... - - source TEXT, -- Ballantyne 2010, Willems 2016, etc. - created_at TIMESTAMPTZ DEFAULT NOW() -); - -CREATE INDEX idx_str_mutation_rate_marker ON str_mutation_rate(marker_name); - -COMMENT ON TABLE str_mutation_rate IS 'Per-marker STR mutation rates for ASR and age estimation. Sources: Ballantyne 2010, Willems 2016.'; - --- ============================================================================== --- NOTE: Old tables (variant, variant_alias, str_marker) are NOT dropped here. --- Data migration and cleanup should be done manually: --- 1. Run migration script to consolidate data into variant_v2 --- 2. Update haplogroup_variant FK references --- 3. Drop old tables after verification --- ============================================================================== - -# --- !Downs - --- Recreate old tables (structure only - data would need restoration from backup) -CREATE TABLE variant ( - variant_id SERIAL PRIMARY KEY, - genbank_contig_id INT NOT NULL, - position INTEGER NOT NULL, - reference_allele VARCHAR(255) NOT NULL, - alternate_allele VARCHAR(255) NOT NULL, - variant_type VARCHAR(10) NOT NULL CHECK (variant_type IN ('SNP', 'INDEL')), - rs_id VARCHAR(255), - common_name VARCHAR(255), - FOREIGN KEY (genbank_contig_id) REFERENCES genbank_contig(genbank_contig_id) ON DELETE CASCADE, - UNIQUE (genbank_contig_id, position, reference_allele, alternate_allele) -); - -CREATE INDEX idx_variant_common_name ON variant(common_name); -CREATE INDEX idx_variant_rs_id ON variant(rs_id); -CREATE INDEX idx_variant_position ON variant(genbank_contig_id, position); - -CREATE TABLE variant_alias ( - id SERIAL PRIMARY KEY, - variant_id INT NOT NULL REFERENCES variant(variant_id) ON DELETE CASCADE, - alias_type VARCHAR(50) NOT NULL, - alias_value VARCHAR(255) NOT NULL, - source VARCHAR(255), - is_primary BOOLEAN DEFAULT FALSE, - created_at TIMESTAMP DEFAULT NOW() NOT NULL, - UNIQUE(variant_id, alias_type, alias_value) -); - -CREATE INDEX idx_variant_alias_variant ON variant_alias(variant_id); -CREATE INDEX idx_variant_alias_value ON variant_alias(alias_value); -CREATE INDEX idx_variant_alias_type_value ON variant_alias(alias_type, alias_value); - -CREATE TABLE str_marker ( - id SERIAL PRIMARY KEY, - genbank_contig_id INT NOT NULL REFERENCES genbank_contig(genbank_contig_id) ON DELETE CASCADE, - name VARCHAR(50) NOT NULL, - start_pos BIGINT NOT NULL, - end_pos BIGINT NOT NULL, - period INT NOT NULL, - verified BOOLEAN DEFAULT FALSE, - note TEXT, - UNIQUE(genbank_contig_id, name) -); - -CREATE INDEX idx_str_marker_contig ON str_marker(genbank_contig_id); - --- Drop new tables -DROP TABLE IF EXISTS str_mutation_rate CASCADE; -DROP TABLE IF EXISTS biosample_variant_call CASCADE; -DROP TABLE IF EXISTS branch_mutation CASCADE; -DROP TABLE IF EXISTS haplogroup_character_state CASCADE; - --- Restore FK on haplogroup_variant (will need manual data restoration) -ALTER TABLE tree.haplogroup_variant DROP CONSTRAINT IF EXISTS haplogroup_variant_variant_id_fkey; -ALTER TABLE tree.haplogroup_variant -ADD CONSTRAINT haplogroup_variant_variant_id_fkey -FOREIGN KEY (variant_id) REFERENCES variant(variant_id) ON DELETE CASCADE; - -DROP TABLE IF EXISTS variant_v2 CASCADE; diff --git a/conf/evolutions/default/54.sql b/conf/evolutions/default/54.sql deleted file mode 100644 index 644c7bfa..00000000 --- a/conf/evolutions/default/54.sql +++ /dev/null @@ -1,13 +0,0 @@ -# --- !Ups - --- Sequence for DecodingUs variant names -CREATE SEQUENCE IF NOT EXISTS du_variant_name_seq START WITH 1; - -COMMENT ON SEQUENCE du_variant_name_seq IS 'Sequence for DecodingUs (DU) variant naming authority'; - -# --- !Downs - -DROP FUNCTION IF EXISTS is_du_name(TEXT); -DROP FUNCTION IF EXISTS current_du_name(); -DROP FUNCTION IF EXISTS next_du_name(); -DROP SEQUENCE IF EXISTS du_variant_name_seq; diff --git a/conf/evolutions/default/55.sql b/conf/evolutions/default/55.sql deleted file mode 100644 index 58236a1b..00000000 --- a/conf/evolutions/default/55.sql +++ /dev/null @@ -1,45 +0,0 @@ -# --- !Ups - -DROP TABLE IF EXISTS genome_region; -DROP TABLE IF EXISTS cytoband; - -CREATE TABLE genome_region_v2 ( - region_id SERIAL PRIMARY KEY, - region_type TEXT NOT NULL, - name TEXT, - coordinates JSONB NOT NULL, - properties JSONB DEFAULT '{}', - UNIQUE(region_type, name) -); - -CREATE INDEX idx_genome_region_v2_coords ON genome_region_v2 USING GIN(coordinates); - --- Efficient lookup: "What region contains GRCh38:chrY:15000000?" -CREATE INDEX idx_genome_region_v2_grch38_range ON genome_region_v2 ( - (coordinates->'GRCh38'->>'contig'), - ((coordinates->'GRCh38'->>'start')::bigint), - ((coordinates->'GRCh38'->>'end')::bigint) -); - -# --- !Downs - -DROP TABLE IF EXISTS genome_region_v2; - -CREATE TABLE genome_region ( - id SERIAL PRIMARY KEY, - genbank_contig_id INT NOT NULL, - region_type TEXT NOT NULL, - name TEXT, - start_pos BIGINT NOT NULL, - end_pos BIGINT NOT NULL, - modifier NUMERIC -); - -CREATE TABLE cytoband ( - id SERIAL PRIMARY KEY, - genbank_contig_id INT NOT NULL, - name TEXT NOT NULL, - start_pos BIGINT NOT NULL, - end_pos BIGINT NOT NULL, - stain TEXT NOT NULL -); diff --git a/conf/evolutions/default/56.sql b/conf/evolutions/default/56.sql deleted file mode 100644 index cd9439f9..00000000 --- a/conf/evolutions/default/56.sql +++ /dev/null @@ -1,12 +0,0 @@ -# --- !Ups - -ALTER TABLE variant_v2 ADD COLUMN annotations JSONB DEFAULT '{}'::jsonb; - -COMMENT ON COLUMN variant_v2.annotations IS 'Computed region overlaps (e.g., Cytobands, PAR, STR overlaps). Managed by background jobs.'; - -CREATE INDEX idx_variant_v2_annotations ON variant_v2 USING GIN(annotations); - -# --- !Downs - -DROP INDEX IF EXISTS idx_variant_v2_annotations; -ALTER TABLE variant_v2 DROP COLUMN annotations; diff --git a/conf/evolutions/default/57.sql b/conf/evolutions/default/57.sql deleted file mode 100644 index 437863e3..00000000 --- a/conf/evolutions/default/57.sql +++ /dev/null @@ -1,207 +0,0 @@ -# --- !Ups - --- ============================================================================ --- Evolution 57: Tree Versioning System --- ============================================================================ --- Introduces Production/WIP tree versioning for bulk merge operations. --- Change sets track groups of changes from external sources (ISOGG, ytree.net). --- Individual changes are recorded for curator review before promotion. --- ============================================================================ - --- Change set status enum -CREATE TYPE tree.change_set_status AS ENUM ( - 'DRAFT', -- Being built (merge in progress) - 'READY_FOR_REVIEW', -- Merge complete, awaiting curator - 'UNDER_REVIEW', -- Curator actively reviewing - 'APPLIED', -- Changes applied to Production - 'DISCARDED' -- Changes abandoned -); - --- Tree change type enum -CREATE TYPE tree.tree_change_type AS ENUM ( - 'CREATE', -- New haplogroup created - 'UPDATE', -- Haplogroup metadata updated - 'DELETE', -- Haplogroup deleted (soft) - 'REPARENT', -- Parent relationship changed - 'ADD_VARIANT', -- Variant associated with haplogroup - 'REMOVE_VARIANT' -- Variant disassociated from haplogroup -); - --- Change status enum -CREATE TYPE tree.change_status AS ENUM ( - 'PENDING', -- Not yet applied - 'APPLIED', -- Successfully applied to Production - 'REVERTED', -- Undone by curator - 'SKIPPED' -- Excluded from promotion by curator -); - --- ============================================================================ --- Change Sets: Groups of related changes from a single merge operation --- ============================================================================ - -CREATE TABLE tree.change_set ( - id SERIAL PRIMARY KEY, - haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')), - name VARCHAR(100) NOT NULL, - description TEXT, - source_name VARCHAR(100) NOT NULL, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - created_by VARCHAR(255) NOT NULL, - finalized_at TIMESTAMP, - applied_at TIMESTAMP, - applied_by VARCHAR(255), - discarded_at TIMESTAMP, - discarded_by VARCHAR(255), - discard_reason TEXT, - status tree.change_set_status NOT NULL DEFAULT 'DRAFT', - - -- Statistics snapshot from merge - nodes_processed INTEGER DEFAULT 0, - nodes_created INTEGER DEFAULT 0, - nodes_updated INTEGER DEFAULT 0, - nodes_unchanged INTEGER DEFAULT 0, - variants_added INTEGER DEFAULT 0, - relationships_created INTEGER DEFAULT 0, - relationships_updated INTEGER DEFAULT 0, - split_operations INTEGER DEFAULT 0, - ambiguity_count INTEGER DEFAULT 0, - - -- Path to generated ambiguity report - ambiguity_report_path VARCHAR(500), - - -- Additional metadata - metadata JSONB DEFAULT '{}', - - UNIQUE(haplogroup_type, name) -); - -CREATE INDEX idx_change_set_type ON tree.change_set(haplogroup_type); -CREATE INDEX idx_change_set_status ON tree.change_set(status); -CREATE INDEX idx_change_set_source ON tree.change_set(source_name); -CREATE INDEX idx_change_set_created ON tree.change_set(created_at); - --- ============================================================================ --- Tree Changes: Individual changes within a change set --- ============================================================================ - -CREATE TABLE tree.tree_change ( - id SERIAL PRIMARY KEY, - change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, - change_type tree.tree_change_type NOT NULL, - - -- Target identification (for UPDATE/DELETE/REPARENT) - haplogroup_id INTEGER REFERENCES tree.haplogroup(haplogroup_id), - - -- For variant operations - variant_id INTEGER REFERENCES variant(variant_id), - - -- Parent tracking (for CREATE and REPARENT) - old_parent_id INTEGER REFERENCES tree.haplogroup(haplogroup_id), - new_parent_id INTEGER REFERENCES tree.haplogroup(haplogroup_id), - - -- Full data snapshots (JSONB for flexibility) - haplogroup_data JSONB, -- Full haplogroup for CREATE, new values for UPDATE - old_data JSONB, -- Previous state for UPDATE (audit trail) - - -- For newly created haplogroups, track the assigned ID after apply - created_haplogroup_id INTEGER REFERENCES tree.haplogroup(haplogroup_id), - - -- Ordering and status - sequence_num INTEGER NOT NULL, - status tree.change_status NOT NULL DEFAULT 'PENDING', - - -- Curator review - reviewed_at TIMESTAMP, - reviewed_by VARCHAR(255), - review_notes TEXT, - - -- Timestamps - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - applied_at TIMESTAMP, - - -- Ambiguity reference (if this change relates to an ambiguous placement) - ambiguity_type VARCHAR(50), - ambiguity_confidence DOUBLE PRECISION -); - -CREATE INDEX idx_tree_change_set ON tree.tree_change(change_set_id); -CREATE INDEX idx_tree_change_hg ON tree.tree_change(haplogroup_id); -CREATE INDEX idx_tree_change_type ON tree.tree_change(change_type); -CREATE INDEX idx_tree_change_status ON tree.tree_change(status); -CREATE INDEX idx_tree_change_seq ON tree.tree_change(change_set_id, sequence_num); - --- ============================================================================ --- Change Set Comments: Discussion thread for curator collaboration --- ============================================================================ - -CREATE TABLE tree.change_set_comment ( - id SERIAL PRIMARY KEY, - change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, - tree_change_id INTEGER REFERENCES tree.tree_change(id) ON DELETE CASCADE, - author VARCHAR(255) NOT NULL, - content TEXT NOT NULL, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP -); - -CREATE INDEX idx_change_set_comment_set ON tree.change_set_comment(change_set_id); -CREATE INDEX idx_change_set_comment_change ON tree.change_set_comment(tree_change_id); - --- ============================================================================ --- Views for easy querying --- ============================================================================ - --- Active (non-applied, non-discarded) change sets -CREATE VIEW tree.active_change_sets AS -SELECT * -FROM tree.change_set -WHERE status NOT IN ('APPLIED', 'DISCARDED'); - --- Change set summary with review progress -CREATE VIEW tree.change_set_summary AS -SELECT - cs.id, - cs.haplogroup_type, - cs.name, - cs.source_name, - cs.status, - cs.created_at, - cs.created_by, - cs.nodes_created, - cs.nodes_updated, - cs.ambiguity_count, - COUNT(tc.id) AS total_changes, - COUNT(tc.id) FILTER (WHERE tc.status = 'PENDING') AS pending_changes, - COUNT(tc.id) FILTER (WHERE tc.status = 'APPLIED') AS applied_changes, - COUNT(tc.id) FILTER (WHERE tc.status = 'SKIPPED') AS skipped_changes, - COUNT(tc.id) FILTER (WHERE tc.reviewed_at IS NOT NULL) AS reviewed_changes -FROM tree.change_set cs -LEFT JOIN tree.tree_change tc ON tc.change_set_id = cs.id -GROUP BY cs.id; - --- Pending changes requiring review (high priority = low confidence) -CREATE VIEW tree.pending_review_changes AS -SELECT - tc.*, - cs.name AS change_set_name, - cs.source_name, - h.name AS haplogroup_name -FROM tree.tree_change tc -JOIN tree.change_set cs ON cs.id = tc.change_set_id -LEFT JOIN tree.haplogroup h ON h.haplogroup_id = tc.haplogroup_id -WHERE tc.status = 'PENDING' - AND cs.status IN ('READY_FOR_REVIEW', 'UNDER_REVIEW') -ORDER BY tc.ambiguity_confidence ASC NULLS LAST, tc.sequence_num; - - -# --- !Downs - -DROP VIEW IF EXISTS tree.pending_review_changes; -DROP VIEW IF EXISTS tree.change_set_summary; -DROP VIEW IF EXISTS tree.active_change_sets; -DROP TABLE IF EXISTS tree.change_set_comment; -DROP TABLE IF EXISTS tree.tree_change; -DROP TABLE IF EXISTS tree.change_set; -DROP TYPE IF EXISTS tree.change_status; -DROP TYPE IF EXISTS tree.tree_change_type; -DROP TYPE IF EXISTS tree.change_set_status; diff --git a/conf/evolutions/default/58.sql b/conf/evolutions/default/58.sql deleted file mode 100644 index f1900141..00000000 --- a/conf/evolutions/default/58.sql +++ /dev/null @@ -1,38 +0,0 @@ --- !Ups - --- Tree versioning curator permissions -INSERT INTO auth.permissions (id, name, description, created_at, updated_at) VALUES - (gen_random_uuid(), 'tree.version.view', 'View change sets and diffs from tree merge operations', NOW(), NOW()), - (gen_random_uuid(), 'tree.version.review', 'Review and approve/reject individual changes', NOW(), NOW()), - (gen_random_uuid(), 'tree.version.promote', 'Apply approved change sets to production', NOW(), NOW()), - (gen_random_uuid(), 'tree.version.discard', 'Discard change sets', NOW(), NOW()) -ON CONFLICT (name) DO NOTHING; - --- Grant to TreeCurator role -INSERT INTO auth.role_permissions (role_id, permission_id) -SELECT r.id, p.id FROM auth.roles r, auth.permissions p -WHERE r.name = 'TreeCurator' - AND p.name IN ('tree.version.view', 'tree.version.review', 'tree.version.promote', 'tree.version.discard') -ON CONFLICT DO NOTHING; - --- Grant to Curator role -INSERT INTO auth.role_permissions (role_id, permission_id) -SELECT r.id, p.id FROM auth.roles r, auth.permissions p -WHERE r.name = 'Curator' - AND p.name IN ('tree.version.view', 'tree.version.review', 'tree.version.promote', 'tree.version.discard') -ON CONFLICT DO NOTHING; - --- Grant to Admin role -INSERT INTO auth.role_permissions (role_id, permission_id) -SELECT r.id, p.id FROM auth.roles r, auth.permissions p -WHERE r.name = 'Admin' - AND p.name IN ('tree.version.view', 'tree.version.review', 'tree.version.promote', 'tree.version.discard') -ON CONFLICT DO NOTHING; - --- !Downs - -DELETE FROM auth.role_permissions -WHERE permission_id IN (SELECT id FROM auth.permissions WHERE name LIKE 'tree.version.%'); - -DELETE FROM auth.permissions -WHERE name LIKE 'tree.version.%'; diff --git a/conf/evolutions/default/59.sql b/conf/evolutions/default/59.sql deleted file mode 100644 index 16d84ba1..00000000 --- a/conf/evolutions/default/59.sql +++ /dev/null @@ -1,144 +0,0 @@ --- !Ups - --- Shadow/WIP tables for staging tree changes before production apply --- Each table is scoped by change_set_id for easy cleanup on discard - --- WIP haplogroups - staged nodes not yet in production -CREATE TABLE tree.wip_haplogroup ( - wip_haplogroup_id SERIAL PRIMARY KEY, - change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, - - -- Placeholder ID used during merge (negative numbers to avoid collision) - placeholder_id INTEGER NOT NULL, - - -- Haplogroup data (mirrors tree.haplogroup structure) - name VARCHAR(255) NOT NULL, - lineage VARCHAR(255), - description TEXT, - haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')), - source VARCHAR(255) NOT NULL, - confidence_level VARCHAR(255) NOT NULL DEFAULT 'medium', - - -- Age estimates - formed_ybp INTEGER, - formed_ybp_lower INTEGER, - formed_ybp_upper INTEGER, - tmrca_ybp INTEGER, - tmrca_ybp_lower INTEGER, - tmrca_ybp_upper INTEGER, - age_estimate_source VARCHAR(255), - - -- Provenance tracking - provenance JSONB, - - -- Timestamps - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - - -- Unique within a change set - UNIQUE (change_set_id, placeholder_id), - UNIQUE (change_set_id, name) -); - -CREATE INDEX idx_wip_haplogroup_change_set ON tree.wip_haplogroup(change_set_id); -CREATE INDEX idx_wip_haplogroup_name ON tree.wip_haplogroup(name); - --- WIP relationships - staged parent-child relationships --- Can reference either production haplogroups (by real ID) or WIP haplogroups (by placeholder ID) -CREATE TABLE tree.wip_haplogroup_relationship ( - wip_relationship_id SERIAL PRIMARY KEY, - change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, - - -- Child reference: either a real haplogroup ID or a placeholder (negative) ID - child_haplogroup_id INTEGER, -- NULL if child is a WIP node - child_placeholder_id INTEGER, -- NULL if child is a production node - - -- Parent reference: either a real haplogroup ID or a placeholder (negative) ID - parent_haplogroup_id INTEGER, -- NULL if parent is a WIP node - parent_placeholder_id INTEGER, -- NULL if parent is a production node - - -- Metadata - source VARCHAR(255) NOT NULL, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - - -- Constraints - CHECK ( - (child_haplogroup_id IS NOT NULL AND child_placeholder_id IS NULL) OR - (child_haplogroup_id IS NULL AND child_placeholder_id IS NOT NULL) - ), - CHECK ( - (parent_haplogroup_id IS NOT NULL AND parent_placeholder_id IS NULL) OR - (parent_haplogroup_id IS NULL AND parent_placeholder_id IS NOT NULL) - ) -); - -CREATE INDEX idx_wip_relationship_change_set ON tree.wip_haplogroup_relationship(change_set_id); -CREATE INDEX idx_wip_relationship_child ON tree.wip_haplogroup_relationship(child_haplogroup_id); -CREATE INDEX idx_wip_relationship_parent ON tree.wip_haplogroup_relationship(parent_haplogroup_id); - --- WIP variant associations - staged variant links -CREATE TABLE tree.wip_haplogroup_variant ( - wip_haplogroup_variant_id SERIAL PRIMARY KEY, - change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, - - -- Haplogroup reference: either real ID or placeholder - haplogroup_id INTEGER, -- NULL if haplogroup is a WIP node - haplogroup_placeholder_id INTEGER, -- NULL if haplogroup is a production node - - -- Variant reference (always a real variant ID from genomics.variant_v2) - variant_id INTEGER NOT NULL, - - -- Metadata - source VARCHAR(255), - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - - -- Constraints - CHECK ( - (haplogroup_id IS NOT NULL AND haplogroup_placeholder_id IS NULL) OR - (haplogroup_id IS NULL AND haplogroup_placeholder_id IS NOT NULL) - ), - - -- Unique variant per haplogroup within a change set - UNIQUE (change_set_id, haplogroup_id, variant_id), - UNIQUE (change_set_id, haplogroup_placeholder_id, variant_id) -); - -CREATE INDEX idx_wip_variant_change_set ON tree.wip_haplogroup_variant(change_set_id); -CREATE INDEX idx_wip_variant_haplogroup ON tree.wip_haplogroup_variant(haplogroup_id); - --- WIP reparent operations - tracks existing nodes that should be moved -CREATE TABLE tree.wip_reparent ( - wip_reparent_id SERIAL PRIMARY KEY, - change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, - - -- The existing production haplogroup to reparent - haplogroup_id INTEGER NOT NULL, - - -- Current parent in production (for rollback reference) - old_parent_id INTEGER, - - -- New parent: either real ID or placeholder - new_parent_id INTEGER, -- NULL if new parent is a WIP node - new_parent_placeholder_id INTEGER, -- NULL if new parent is a production node - - -- Metadata - source VARCHAR(255) NOT NULL, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - - -- Constraints - CHECK ( - (new_parent_id IS NOT NULL AND new_parent_placeholder_id IS NULL) OR - (new_parent_id IS NULL AND new_parent_placeholder_id IS NOT NULL) - ), - - -- Only one reparent per haplogroup per change set - UNIQUE (change_set_id, haplogroup_id) -); - -CREATE INDEX idx_wip_reparent_change_set ON tree.wip_reparent(change_set_id); - --- !Downs - -DROP TABLE IF EXISTS tree.wip_reparent; -DROP TABLE IF EXISTS tree.wip_haplogroup_variant; -DROP TABLE IF EXISTS tree.wip_haplogroup_relationship; -DROP TABLE IF EXISTS tree.wip_haplogroup; diff --git a/conf/evolutions/default/6.sql b/conf/evolutions/default/6.sql deleted file mode 100644 index 24f5e6a1..00000000 --- a/conf/evolutions/default/6.sql +++ /dev/null @@ -1,189 +0,0 @@ -# --- !Ups ---- Add tables for Authentication and Authorization -CREATE SCHEMA auth; -CREATE EXTENSION IF NOT EXISTS citext; - --- Schema: public --- Users Table -CREATE TABLE public.users -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - email CITEXT UNIQUE, - did VARCHAR(255) UNIQUE NOT NULL, - handle VARCHAR(255) UNIQUE, - display_name VARCHAR(255), - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - is_active BOOLEAN NOT NULL DEFAULT TRUE -); - --- User PDS Information -CREATE TABLE public.user_pds_info -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - user_id UUID UNIQUE NOT NULL, - pds_url VARCHAR(255) NOT NULL, - did VARCHAR(255) UNIQUE NOT NULL, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - CONSTRAINT fk_user_pds_info_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE, - CONSTRAINT fk_user_pds_info_did FOREIGN KEY (did) REFERENCES public.users (did) ON DELETE CASCADE -); - --- Reputation Event Types Table -CREATE TABLE public.reputation_event_types -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - name VARCHAR(100) UNIQUE NOT NULL, - description TEXT, - default_points_change INTEGER NOT NULL, - is_positive BOOLEAN NOT NULL, - is_system_generated BOOLEAN NOT NULL DEFAULT FALSE, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW() -); - --- Reputation Events Log Table -CREATE TABLE public.reputation_events -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - user_id UUID NOT NULL, - event_type_id UUID NOT NULL, - actual_points_change INTEGER NOT NULL, - source_user_id UUID, -- NULL if system-generated - related_entity_type VARCHAR(50), - related_entity_id UUID, -- For specific post/comment/etc. - notes TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - CONSTRAINT fk_reputation_events_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE, - CONSTRAINT fk_reputation_events_event_type_id FOREIGN KEY (event_type_id) REFERENCES public.reputation_event_types (id) ON DELETE RESTRICT, -- RESTRICT to prevent deleting event types that are referenced - CONSTRAINT fk_reputation_events_source_user_id FOREIGN KEY (source_user_id) REFERENCES public.users (id) ON DELETE SET NULL -- Set to NULL if source user is deleted -); - --- User Reputation Scores Table (Aggregated Score) -CREATE TABLE public.user_reputation_scores -( - user_id UUID PRIMARY KEY, - score BIGINT NOT NULL DEFAULT 0, - last_calculated_at TIMESTAMP NOT NULL DEFAULT NOW(), - CONSTRAINT fk_user_reputation_scores_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE -); - --- Schema: auth - --- User Login Info Table -CREATE TABLE auth.user_login_info -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - user_id UUID NOT NULL, -- Links to public.users - provider_id VARCHAR(255) NOT NULL, - provider_key VARCHAR(255) NOT NULL, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - CONSTRAINT fk_auth_user_login_info_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE, - CONSTRAINT uq_auth_provider_id_key UNIQUE (provider_id, provider_key) -); - --- User OAuth2 Info Table (for storing tokens) -CREATE TABLE auth.user_oauth2_info -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - login_info_id UUID UNIQUE NOT NULL, - access_token TEXT NOT NULL, - token_type VARCHAR(50), - expires_in BIGINT, - refresh_token TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - scope TEXT, - CONSTRAINT fk_auth_user_oauth2_info_login_info_id FOREIGN KEY (login_info_id) REFERENCES auth.user_login_info (id) ON DELETE CASCADE -); - --- Roles Table -CREATE TABLE auth.roles -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - name VARCHAR(255) UNIQUE NOT NULL, - description TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW() -); - --- Permissions Table -CREATE TABLE auth.permissions -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - name VARCHAR(255) UNIQUE NOT NULL, - description TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW() -); - --- Role Permissions Table (Many-to-Many) -CREATE TABLE auth.role_permissions -( - role_id UUID NOT NULL, - permission_id UUID NOT NULL, - PRIMARY KEY (role_id, permission_id), - CONSTRAINT fk_auth_role_permissions_role_id FOREIGN KEY (role_id) REFERENCES auth.roles (id) ON DELETE CASCADE, - CONSTRAINT fk_auth_role_permissions_permission_id FOREIGN KEY (permission_id) REFERENCES auth.permissions (id) ON DELETE CASCADE -); - --- User Roles Table (Many-to-Many) -CREATE TABLE auth.user_roles -( - user_id UUID NOT NULL, -- Links to public.users - role_id UUID NOT NULL, - PRIMARY KEY (user_id, role_id), - CONSTRAINT fk_auth_user_roles_user_id FOREIGN KEY (user_id) REFERENCES public.users (id) ON DELETE CASCADE, - CONSTRAINT fk_auth_user_roles_role_id FOREIGN KEY (role_id) REFERENCES auth.roles (id) ON DELETE CASCADE -); - --- AT Protocol Authorization Servers -CREATE TABLE auth.atprotocol_authorization_servers -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - issuer_url VARCHAR(255) UNIQUE NOT NULL, - authorization_endpoint VARCHAR(255), - token_endpoint VARCHAR(255), - pushed_authorization_request_endpoint VARCHAR(255), - dpop_signing_alg_values_supported TEXT, - scopes_supported TEXT, - client_id_metadata_document_supported BOOLEAN, - metadata_fetched_at TIMESTAMP NOT NULL DEFAULT NOW(), - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW() -); - --- AT Protocol Client Metadata -CREATE TABLE auth.atprotocol_client_metadata -( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - client_id_url VARCHAR(255) UNIQUE NOT NULL, - client_name VARCHAR(255), - client_uri VARCHAR(255), - logo_uri VARCHAR(255), - tos_uri VARCHAR(255), - policy_uri VARCHAR(255), - redirect_uris TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW() -); - -# --- !Downs - -DROP TABLE auth.atprotocol_client_metadata; -DROP TABLE auth.atprotocol_authorization_servers; -DROP TABLE auth.user_roles; -DROP TABLE auth.role_permissions; -DROP TABLE auth.permissions; -DROP TABLE auth.roles; -DROP TABLE auth.user_oauth2_info; -DROP TABLE auth.user_login_info; -DROP TABLE public.user_reputation_scores; -DROP TABLE public.reputation_events; -DROP TABLE public.reputation_event_types; -DROP TABLE public.user_pds_info; -DROP TABLE public.users; - -DROP SCHEMA auth; \ No newline at end of file diff --git a/conf/evolutions/default/60.sql b/conf/evolutions/default/60.sql deleted file mode 100644 index c02849e9..00000000 --- a/conf/evolutions/default/60.sql +++ /dev/null @@ -1,65 +0,0 @@ --- !Ups - --- Curator conflict resolution table --- Allows curators to correct/override merge algorithm decisions before applying - -CREATE TABLE tree.wip_resolution ( - resolution_id SERIAL PRIMARY KEY, - change_set_id INTEGER NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, - - -- What we're resolving (at least one must be set) - wip_haplogroup_id INTEGER REFERENCES tree.wip_haplogroup(wip_haplogroup_id) ON DELETE CASCADE, - wip_reparent_id INTEGER REFERENCES tree.wip_reparent(wip_reparent_id) ON DELETE CASCADE, - - -- Resolution type - resolution_type VARCHAR(50) NOT NULL - CHECK (resolution_type IN ('REPARENT', 'EDIT_VARIANTS', 'MERGE_EXISTING', 'DEFER')), - - -- REPARENT: Change the parent of a node - new_parent_id INTEGER, -- Production haplogroup ID - new_parent_placeholder_id INTEGER, -- WIP haplogroup placeholder ID - - -- MERGE_EXISTING: Map WIP node to existing production node (don't create) - merge_target_id INTEGER, -- Production haplogroup to merge into - - -- EDIT_VARIANTS: Add or remove variant associations - variants_to_add JSONB DEFAULT '[]', -- Array of variant IDs to add - variants_to_remove JSONB DEFAULT '[]', -- Array of variant IDs to remove - - -- DEFER: Move to manual review queue - defer_reason TEXT, - defer_priority VARCHAR(20) DEFAULT 'NORMAL' - CHECK (defer_priority IN ('LOW', 'NORMAL', 'HIGH', 'CRITICAL')), - - -- Curator tracking - curator_id VARCHAR(100) NOT NULL, - curator_notes TEXT, - - -- Status tracking - status VARCHAR(20) NOT NULL DEFAULT 'PENDING' - CHECK (status IN ('PENDING', 'APPLIED', 'CANCELLED')), - - -- Timestamps - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - applied_at TIMESTAMP, - - -- At least one target must be specified - CONSTRAINT wip_resolution_has_target CHECK ( - wip_haplogroup_id IS NOT NULL OR - wip_reparent_id IS NOT NULL - ) -); - -CREATE INDEX idx_wip_resolution_change_set ON tree.wip_resolution(change_set_id); -CREATE INDEX idx_wip_resolution_status ON tree.wip_resolution(status); -CREATE INDEX idx_wip_resolution_type ON tree.wip_resolution(resolution_type); -CREATE INDEX idx_wip_resolution_wip_haplogroup ON tree.wip_resolution(wip_haplogroup_id) WHERE wip_haplogroup_id IS NOT NULL; -CREATE INDEX idx_wip_resolution_wip_reparent ON tree.wip_resolution(wip_reparent_id) WHERE wip_reparent_id IS NOT NULL; - --- Add comment explaining the table -COMMENT ON TABLE tree.wip_resolution IS 'Curator corrections to merge algorithm decisions. Applied during change set promotion.'; -COMMENT ON COLUMN tree.wip_resolution.resolution_type IS 'REPARENT=change parent, EDIT_VARIANTS=add/remove SNPs, MERGE_EXISTING=map to existing node, DEFER=needs manual review'; - --- !Downs - -DROP TABLE IF EXISTS tree.wip_resolution; diff --git a/conf/evolutions/default/61.sql b/conf/evolutions/default/61.sql deleted file mode 100644 index 75c5acb8..00000000 --- a/conf/evolutions/default/61.sql +++ /dev/null @@ -1,146 +0,0 @@ --- !Ups - --- ============================================================================ --- Evolution 61: Haplogroup Discovery System Tables --- Creates the discovery pipeline tables in the tree schema for tracking --- private variants, proposed branches, evidence, and curator actions. --- ============================================================================ - --- Private variants discovered in biosamples (unified across both Citizen and External) -CREATE TABLE tree.biosample_private_variant ( - id SERIAL PRIMARY KEY, - sample_type VARCHAR(20) NOT NULL CHECK (sample_type IN ('CITIZEN', 'EXTERNAL')), - sample_id INTEGER NOT NULL, - sample_guid UUID NOT NULL, - variant_id INTEGER NOT NULL, - haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')), - terminal_haplogroup_id INTEGER NOT NULL REFERENCES tree.haplogroup(haplogroup_id), - discovered_at TIMESTAMP NOT NULL DEFAULT NOW(), - status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE' - CHECK (status IN ('ACTIVE', 'PROMOTED', 'INVALIDATED')), - UNIQUE(sample_type, sample_id, variant_id, haplogroup_type) -); - -CREATE INDEX idx_bpv_sample ON tree.biosample_private_variant(sample_type, sample_id); -CREATE INDEX idx_bpv_guid ON tree.biosample_private_variant(sample_guid); -CREATE INDEX idx_bpv_variant ON tree.biosample_private_variant(variant_id); -CREATE INDEX idx_bpv_terminal ON tree.biosample_private_variant(terminal_haplogroup_id); -CREATE INDEX idx_bpv_status ON tree.biosample_private_variant(status); - -COMMENT ON TABLE tree.biosample_private_variant IS 'Tracks private (mismatching) variants discovered in biosamples that extend beyond the current terminal haplogroup.'; - --- Proposed branches awaiting consensus/review -CREATE TABLE tree.proposed_branch ( - id SERIAL PRIMARY KEY, - parent_haplogroup_id INTEGER NOT NULL REFERENCES tree.haplogroup(haplogroup_id), - proposed_name VARCHAR(100), - haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')), - status VARCHAR(20) NOT NULL DEFAULT 'PENDING' - CHECK (status IN ('PENDING', 'READY_FOR_REVIEW', 'UNDER_REVIEW', - 'ACCEPTED', 'PROMOTED', 'REJECTED', 'SPLIT')), - consensus_count INTEGER NOT NULL DEFAULT 0, - confidence_score DOUBLE PRECISION NOT NULL DEFAULT 0.0, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - reviewed_at TIMESTAMP, - reviewed_by VARCHAR(255), - notes TEXT, - promoted_haplogroup_id INTEGER REFERENCES tree.haplogroup(haplogroup_id) -); - -CREATE INDEX idx_pb_parent ON tree.proposed_branch(parent_haplogroup_id); -CREATE INDEX idx_pb_status ON tree.proposed_branch(status); -CREATE INDEX idx_pb_type ON tree.proposed_branch(haplogroup_type); - -COMMENT ON TABLE tree.proposed_branch IS 'Candidate branches proposed by the discovery system when shared private variants are detected across multiple biosamples.'; - --- Variants associated with proposed branches -CREATE TABLE tree.proposed_branch_variant ( - id SERIAL PRIMARY KEY, - proposed_branch_id INTEGER NOT NULL REFERENCES tree.proposed_branch(id) ON DELETE CASCADE, - variant_id INTEGER NOT NULL, - is_defining BOOLEAN NOT NULL DEFAULT TRUE, - evidence_count INTEGER NOT NULL DEFAULT 1, - first_observed_at TIMESTAMP NOT NULL DEFAULT NOW(), - last_observed_at TIMESTAMP NOT NULL DEFAULT NOW(), - UNIQUE(proposed_branch_id, variant_id) -); - -CREATE INDEX idx_pbv_variant ON tree.proposed_branch_variant(variant_id); - -COMMENT ON TABLE tree.proposed_branch_variant IS 'Links proposed branches to their defining variants with evidence tracking.'; - --- Biosamples supporting proposed branches (unified across both types) -CREATE TABLE tree.proposed_branch_evidence ( - id SERIAL PRIMARY KEY, - proposed_branch_id INTEGER NOT NULL REFERENCES tree.proposed_branch(id) ON DELETE CASCADE, - sample_type VARCHAR(20) NOT NULL CHECK (sample_type IN ('CITIZEN', 'EXTERNAL')), - sample_id INTEGER NOT NULL, - sample_guid UUID NOT NULL, - added_at TIMESTAMP NOT NULL DEFAULT NOW(), - variant_match_count INTEGER NOT NULL DEFAULT 0, - variant_mismatch_count INTEGER NOT NULL DEFAULT 0, - UNIQUE(proposed_branch_id, sample_type, sample_id) -); - -CREATE INDEX idx_pbe_sample ON tree.proposed_branch_evidence(sample_type, sample_id); -CREATE INDEX idx_pbe_guid ON tree.proposed_branch_evidence(sample_guid); - -COMMENT ON TABLE tree.proposed_branch_evidence IS 'Links biosamples (Citizen or External) to the proposed branches they support.'; - --- Curator audit trail -CREATE TABLE tree.curator_action ( - id SERIAL PRIMARY KEY, - curator_id VARCHAR(255) NOT NULL, - action_type VARCHAR(50) NOT NULL - CHECK (action_type IN ('REVIEW', 'ACCEPT', 'REJECT', 'MODIFY', - 'SPLIT', 'MERGE', 'CREATE', 'DELETE', - 'REASSIGN', 'NAME_VARIANT')), - target_type VARCHAR(50) NOT NULL - CHECK (target_type IN ('PROPOSED_BRANCH', 'HAPLOGROUP', - 'HAPLOGROUP_RELATIONSHIP', 'VARIANT', 'BIOSAMPLE')), - target_id INTEGER NOT NULL, - previous_state JSONB, - new_state JSONB, - reason TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW() -); - -CREATE INDEX idx_ca_curator ON tree.curator_action(curator_id); -CREATE INDEX idx_ca_timestamp ON tree.curator_action(created_at); -CREATE INDEX idx_ca_target ON tree.curator_action(target_type, target_id); - -COMMENT ON TABLE tree.curator_action IS 'Immutable audit trail of all curator operations on proposed branches, haplogroups, and variants.'; - --- Configuration for consensus thresholds -CREATE TABLE tree.discovery_config ( - id SERIAL PRIMARY KEY, - haplogroup_type VARCHAR(10) NOT NULL CHECK (haplogroup_type IN ('Y', 'MT')), - config_key VARCHAR(100) NOT NULL, - config_value TEXT NOT NULL, - description TEXT, - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_by VARCHAR(255), - UNIQUE(haplogroup_type, config_key) -); - -INSERT INTO tree.discovery_config (haplogroup_type, config_key, config_value, description) VALUES -('Y', 'consensus_threshold', '3', 'Minimum biosamples required to reach ReadyForReview'), -('Y', 'auto_promote_threshold', '10', 'Biosamples required for automatic promotion consideration'), -('Y', 'confidence_threshold', '0.95', 'Minimum confidence score for promotion'), -('Y', 'jaccard_match_threshold', '0.8', 'Minimum Jaccard similarity to match an existing proposal'), -('MT', 'consensus_threshold', '3', 'Minimum biosamples required to reach ReadyForReview'), -('MT', 'auto_promote_threshold', '10', 'Biosamples required for automatic promotion consideration'), -('MT', 'confidence_threshold', '0.95', 'Minimum confidence score for promotion'), -('MT', 'jaccard_match_threshold', '0.8', 'Minimum Jaccard similarity to match an existing proposal'); - -COMMENT ON TABLE tree.discovery_config IS 'Per-haplogroup-type configuration for discovery thresholds and scoring parameters.'; - --- !Downs - -DROP TABLE IF EXISTS tree.discovery_config; -DROP TABLE IF EXISTS tree.curator_action; -DROP TABLE IF EXISTS tree.proposed_branch_evidence; -DROP TABLE IF EXISTS tree.proposed_branch_variant; -DROP TABLE IF EXISTS tree.proposed_branch; -DROP TABLE IF EXISTS tree.biosample_private_variant; diff --git a/conf/evolutions/default/62.sql b/conf/evolutions/default/62.sql deleted file mode 100644 index a68a03c4..00000000 --- a/conf/evolutions/default/62.sql +++ /dev/null @@ -1,30 +0,0 @@ -# --- !Ups - --- Per-biosample callable loci storage for accurate mutation rate calculation --- Uses polymorphic reference pattern (consistent with tree.biosample_private_variant) -CREATE TABLE genomics.biosample_callable_loci ( - id SERIAL PRIMARY KEY, - sample_type VARCHAR(20) NOT NULL, - sample_id INTEGER NOT NULL, - sample_guid UUID, - chromosome VARCHAR(20) NOT NULL, - total_callable_bp BIGINT NOT NULL, - region_count INTEGER, - bed_file_hash VARCHAR(64), - computed_at TIMESTAMP NOT NULL, - source_test_type_id INTEGER REFERENCES test_type_definition(id), - y_xdegen_callable_bp BIGINT, - y_ampliconic_callable_bp BIGINT, - y_palindromic_callable_bp BIGINT, - UNIQUE(sample_type, sample_id, chromosome), - CHECK (sample_type IN ('citizen', 'external')) -); - -CREATE INDEX idx_bcl_sample ON genomics.biosample_callable_loci(sample_type, sample_id); -CREATE INDEX idx_bcl_guid ON genomics.biosample_callable_loci(sample_guid) WHERE sample_guid IS NOT NULL; - -# --- !Downs - -DROP INDEX IF EXISTS idx_bcl_guid; -DROP INDEX IF EXISTS idx_bcl_sample; -DROP TABLE IF EXISTS genomics.biosample_callable_loci; diff --git a/conf/evolutions/default/63.sql b/conf/evolutions/default/63.sql deleted file mode 100644 index 18e1bc96..00000000 --- a/conf/evolutions/default/63.sql +++ /dev/null @@ -1,29 +0,0 @@ -# --- !Ups - --- Genealogical anchors for historical age constraints on haplogroup branches --- Supports known MRCAs, most distant known ancestors, and ancient DNA calibration points -CREATE TABLE tree.genealogical_anchor ( - id SERIAL PRIMARY KEY, - haplogroup_id INTEGER NOT NULL REFERENCES tree.haplogroup(haplogroup_id) ON DELETE CASCADE, - anchor_type VARCHAR(50) NOT NULL, - date_ce INTEGER NOT NULL, - date_uncertainty_years INTEGER, - confidence NUMERIC(3,2), - description TEXT, - source VARCHAR(500), - carbon_date_bp INTEGER, - carbon_date_sigma INTEGER, - created_by VARCHAR(255), - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - CHECK (anchor_type IN ('KNOWN_MRCA', 'MDKA', 'ANCIENT_DNA')), - CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)) -); - -CREATE INDEX idx_genealogical_anchor_haplogroup ON tree.genealogical_anchor(haplogroup_id); -CREATE INDEX idx_genealogical_anchor_type ON tree.genealogical_anchor(anchor_type); - -# --- !Downs - -DROP INDEX IF EXISTS tree.idx_genealogical_anchor_type; -DROP INDEX IF EXISTS tree.idx_genealogical_anchor_haplogroup; -DROP TABLE IF EXISTS tree.genealogical_anchor; diff --git a/conf/evolutions/default/64.sql b/conf/evolutions/default/64.sql deleted file mode 100644 index 4cc8d88d..00000000 --- a/conf/evolutions/default/64.sql +++ /dev/null @@ -1,28 +0,0 @@ -# --- !Ups - --- Ancestral STR motifs (modal haplotypes) per haplogroup branch --- Used for STR-based age estimation and group project modal computations -CREATE TABLE tree.haplogroup_ancestral_str ( - id SERIAL PRIMARY KEY, - haplogroup_id INTEGER NOT NULL REFERENCES tree.haplogroup(haplogroup_id) ON DELETE CASCADE, - marker_name VARCHAR(50) NOT NULL, - ancestral_value INTEGER, - ancestral_value_alt INTEGER[], - confidence NUMERIC(3,2), - supporting_samples INTEGER, - variance NUMERIC(8,4), - computed_at TIMESTAMP NOT NULL DEFAULT NOW(), - method VARCHAR(50) NOT NULL DEFAULT 'MODAL', - UNIQUE(haplogroup_id, marker_name), - CHECK (method IN ('MODAL', 'PHYLOGENETIC', 'MANUAL')), - CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)) -); - -CREATE INDEX idx_hg_ancestral_str_haplogroup ON tree.haplogroup_ancestral_str(haplogroup_id); -CREATE INDEX idx_hg_ancestral_str_marker ON tree.haplogroup_ancestral_str(marker_name); - -# --- !Downs - -DROP INDEX IF EXISTS tree.idx_hg_ancestral_str_marker; -DROP INDEX IF EXISTS tree.idx_hg_ancestral_str_haplogroup; -DROP TABLE IF EXISTS tree.haplogroup_ancestral_str; diff --git a/conf/evolutions/default/65.sql b/conf/evolutions/default/65.sql deleted file mode 100644 index c524da68..00000000 --- a/conf/evolutions/default/65.sql +++ /dev/null @@ -1,78 +0,0 @@ --- # --- !Ups - --- Phase 2: Consolidate alignment_coverage into alignment_metadata as JSONB -ALTER TABLE public.alignment_metadata ADD COLUMN coverage JSONB; - --- Migrate existing coverage data into the new JSONB column -UPDATE public.alignment_metadata am -SET coverage = jsonb_build_object( - 'meanDepth', ac.mean_depth, - 'medianDepth', ac.median_depth, - 'percentCoverageAt1x', ac.percent_coverage_at_1x, - 'percentCoverageAt5x', ac.percent_coverage_at_5x, - 'percentCoverageAt10x', ac.percent_coverage_at_10x, - 'percentCoverageAt20x', ac.percent_coverage_at_20x, - 'percentCoverageAt30x', ac.percent_coverage_at_30x, - 'basesNoCoverage', ac.bases_no_coverage, - 'basesLowQualityMapping', ac.bases_low_quality_mapping, - 'basesCallable', ac.bases_callable, - 'meanMappingQuality', ac.mean_mapping_quality -) -FROM public.alignment_coverage ac -WHERE ac.alignment_metadata_id = am.id; - --- Expression indexes for aggregation queries on JSONB coverage fields -CREATE INDEX idx_am_coverage_mean_depth ON public.alignment_metadata (((coverage->>'meanDepth')::double precision)) WHERE coverage IS NOT NULL; -CREATE INDEX idx_am_coverage_bases_callable ON public.alignment_metadata (((coverage->>'basesCallable')::bigint)) WHERE coverage IS NOT NULL; -CREATE INDEX idx_am_coverage_mean_mapping_quality ON public.alignment_metadata (((coverage->>'meanMappingQuality')::double precision)) WHERE coverage IS NOT NULL; - --- Drop the old table -DROP TABLE public.alignment_coverage; - - --- # --- !Downs - --- Recreate alignment_coverage table -CREATE TABLE public.alignment_coverage ( - alignment_metadata_id BIGINT PRIMARY KEY REFERENCES alignment_metadata(id) ON DELETE CASCADE, - mean_depth DOUBLE PRECISION, - median_depth DOUBLE PRECISION, - percent_coverage_at_1x DOUBLE PRECISION, - percent_coverage_at_5x DOUBLE PRECISION, - percent_coverage_at_10x DOUBLE PRECISION, - percent_coverage_at_20x DOUBLE PRECISION, - percent_coverage_at_30x DOUBLE PRECISION, - bases_no_coverage BIGINT, - bases_low_quality_mapping BIGINT, - bases_callable BIGINT, - mean_mapping_quality DOUBLE PRECISION -); - --- Migrate data back from JSONB to separate table -INSERT INTO public.alignment_coverage ( - alignment_metadata_id, mean_depth, median_depth, - percent_coverage_at_1x, percent_coverage_at_5x, percent_coverage_at_10x, - percent_coverage_at_20x, percent_coverage_at_30x, - bases_no_coverage, bases_low_quality_mapping, bases_callable, - mean_mapping_quality -) -SELECT id, - (coverage->>'meanDepth')::double precision, - (coverage->>'medianDepth')::double precision, - (coverage->>'percentCoverageAt1x')::double precision, - (coverage->>'percentCoverageAt5x')::double precision, - (coverage->>'percentCoverageAt10x')::double precision, - (coverage->>'percentCoverageAt20x')::double precision, - (coverage->>'percentCoverageAt30x')::double precision, - (coverage->>'basesNoCoverage')::bigint, - (coverage->>'basesLowQualityMapping')::bigint, - (coverage->>'basesCallable')::bigint, - (coverage->>'meanMappingQuality')::double precision -FROM public.alignment_metadata -WHERE coverage IS NOT NULL; - --- Drop indexes and column -DROP INDEX IF EXISTS idx_am_coverage_mean_depth; -DROP INDEX IF EXISTS idx_am_coverage_bases_callable; -DROP INDEX IF EXISTS idx_am_coverage_mean_mapping_quality; -ALTER TABLE public.alignment_metadata DROP COLUMN coverage; diff --git a/conf/evolutions/default/66.sql b/conf/evolutions/default/66.sql deleted file mode 100644 index 759f7c95..00000000 --- a/conf/evolutions/default/66.sql +++ /dev/null @@ -1,105 +0,0 @@ --- # --- !Ups - --- Phase 3: Consolidate biosample_original_haplogroup into biosample as JSONB array -ALTER TABLE public.biosample ADD COLUMN original_haplogroups JSONB DEFAULT '[]'::jsonb; - --- Migrate existing data from the separate table into the JSONB array column -UPDATE public.biosample b SET original_haplogroups = ( - SELECT COALESCE(jsonb_agg(jsonb_build_object( - 'publicationId', boh.publication_id, - 'yHaplogroupResult', boh.y_haplogroup_result, - 'mtHaplogroupResult', boh.mt_haplogroup_result, - 'notes', boh.notes - ) ORDER BY boh.publication_id), '[]'::jsonb) - FROM biosample_original_haplogroup boh - WHERE boh.biosample_id = b.id -) -WHERE b.id IN (SELECT DISTINCT biosample_id FROM biosample_original_haplogroup); - --- GIN index for containment queries (e.g., finding biosamples by publication_id in array) -CREATE INDEX idx_biosample_orig_hg ON public.biosample - USING GIN (original_haplogroups jsonb_path_ops) - WHERE original_haplogroups != '[]'::jsonb; - --- Same for citizen_biosample -ALTER TABLE public.citizen_biosample ADD COLUMN original_haplogroups JSONB DEFAULT '[]'::jsonb; - -UPDATE public.citizen_biosample cb SET original_haplogroups = ( - SELECT COALESCE(jsonb_agg(jsonb_build_object( - 'publicationId', cboh.publication_id, - 'yHaplogroupResult', cboh.y_haplogroup_result, - 'mtHaplogroupResult', cboh.mt_haplogroup_result, - 'notes', cboh.notes - ) ORDER BY cboh.publication_id), '[]'::jsonb) - FROM citizen_biosample_original_haplogroup cboh - WHERE cboh.citizen_biosample_id = cb.id -) -WHERE cb.id IN (SELECT DISTINCT citizen_biosample_id FROM citizen_biosample_original_haplogroup); - -CREATE INDEX idx_citizen_biosample_orig_hg ON public.citizen_biosample - USING GIN (original_haplogroups jsonb_path_ops) - WHERE original_haplogroups != '[]'::jsonb; - --- Drop old tables -DROP TABLE public.biosample_original_haplogroup; -DROP TABLE public.citizen_biosample_original_haplogroup; - - --- # --- !Downs - --- Recreate biosample_original_haplogroup table -CREATE TABLE public.biosample_original_haplogroup ( - id SERIAL PRIMARY KEY, - biosample_id INT REFERENCES biosample (id) ON DELETE CASCADE, - publication_id INT REFERENCES publication (id) ON DELETE CASCADE, - original_y_haplogroup VARCHAR(255), - original_mt_haplogroup VARCHAR(255), - notes TEXT, - y_haplogroup_result JSONB, - mt_haplogroup_result JSONB, - UNIQUE (biosample_id, publication_id) -); - --- Migrate data back from JSONB array -INSERT INTO public.biosample_original_haplogroup ( - biosample_id, publication_id, y_haplogroup_result, mt_haplogroup_result, notes -) -SELECT b.id, - (entry->>'publicationId')::int, - entry->'yHaplogroupResult', - entry->'mtHaplogroupResult', - entry->>'notes' -FROM public.biosample b, - jsonb_array_elements(b.original_haplogroups) AS entry -WHERE b.original_haplogroups != '[]'::jsonb; - --- Recreate citizen_biosample_original_haplogroup table -CREATE TABLE public.citizen_biosample_original_haplogroup ( - id SERIAL PRIMARY KEY, - citizen_biosample_id INT REFERENCES citizen_biosample (id) ON DELETE CASCADE, - publication_id INT REFERENCES publication (id) ON DELETE CASCADE, - original_y_haplogroup VARCHAR(255), - original_mt_haplogroup VARCHAR(255), - notes TEXT, - y_haplogroup_result JSONB, - mt_haplogroup_result JSONB, - UNIQUE (citizen_biosample_id, publication_id) -); - -INSERT INTO public.citizen_biosample_original_haplogroup ( - citizen_biosample_id, publication_id, y_haplogroup_result, mt_haplogroup_result, notes -) -SELECT cb.id, - (entry->>'publicationId')::int, - entry->'yHaplogroupResult', - entry->'mtHaplogroupResult', - entry->>'notes' -FROM public.citizen_biosample cb, - jsonb_array_elements(cb.original_haplogroups) AS entry -WHERE cb.original_haplogroups != '[]'::jsonb; - --- Drop JSONB columns and indexes -DROP INDEX IF EXISTS idx_biosample_orig_hg; -DROP INDEX IF EXISTS idx_citizen_biosample_orig_hg; -ALTER TABLE public.biosample DROP COLUMN original_haplogroups; -ALTER TABLE public.citizen_biosample DROP COLUMN original_haplogroups; diff --git a/conf/evolutions/default/67.sql b/conf/evolutions/default/67.sql deleted file mode 100644 index 7726fc2c..00000000 --- a/conf/evolutions/default/67.sql +++ /dev/null @@ -1,28 +0,0 @@ --- # --- !Ups - -CREATE TABLE public.instrument_observation ( - id SERIAL PRIMARY KEY, - at_uri VARCHAR(512) UNIQUE NOT NULL, - at_cid VARCHAR(128), - instrument_id VARCHAR(255) NOT NULL, - lab_name VARCHAR(255) NOT NULL, - biosample_ref VARCHAR(512) NOT NULL, - sequence_run_ref VARCHAR(512), - platform VARCHAR(100), - instrument_model VARCHAR(255), - flowcell_id VARCHAR(255), - run_date TIMESTAMP, - confidence VARCHAR(20) DEFAULT 'INFERRED' CHECK (confidence IN ('KNOWN', 'INFERRED', 'GUESSED')), - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP -); - -CREATE INDEX idx_instrument_obs_instrument ON public.instrument_observation (instrument_id); -CREATE INDEX idx_instrument_obs_lab ON public.instrument_observation (lab_name); -CREATE INDEX idx_instrument_obs_biosample ON public.instrument_observation (biosample_ref); -CREATE INDEX idx_instrument_obs_at_uri ON public.instrument_observation (at_uri); - - --- # --- !Downs - -DROP TABLE IF EXISTS public.instrument_observation; diff --git a/conf/evolutions/default/68.sql b/conf/evolutions/default/68.sql deleted file mode 100644 index 9a567e14..00000000 --- a/conf/evolutions/default/68.sql +++ /dev/null @@ -1,53 +0,0 @@ -# --- !Ups - --- Instrument association proposals for consensus-based lab inference -CREATE TABLE public.instrument_association_proposal ( - id SERIAL PRIMARY KEY, - instrument_id VARCHAR(255) NOT NULL, - proposed_lab_name VARCHAR(255) NOT NULL, - proposed_manufacturer VARCHAR(255), - proposed_model VARCHAR(255), - existing_lab_id INTEGER REFERENCES public.sequencing_lab(id), - observation_count INTEGER NOT NULL DEFAULT 0, - distinct_citizen_count INTEGER NOT NULL DEFAULT 0, - confidence_score DOUBLE PRECISION NOT NULL DEFAULT 0.0, - earliest_observation TIMESTAMP, - latest_observation TIMESTAMP, - status VARCHAR(30) NOT NULL DEFAULT 'PENDING' - CHECK (status IN ('PENDING', 'READY_FOR_REVIEW', 'UNDER_REVIEW', - 'ACCEPTED', 'REJECTED', 'SUPERSEDED')), - reviewed_at TIMESTAMP, - reviewed_by VARCHAR(255), - review_notes TEXT, - accepted_lab_id INTEGER REFERENCES public.sequencing_lab(id), - accepted_instrument_id INTEGER REFERENCES public.sequencer_instrument(id), - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW() -); - -CREATE INDEX idx_iap_status ON instrument_association_proposal(status); -CREATE INDEX idx_iap_instrument ON instrument_association_proposal(instrument_id); -CREATE UNIQUE INDEX idx_iap_active_instrument ON instrument_association_proposal(instrument_id) - WHERE status NOT IN ('ACCEPTED', 'REJECTED', 'SUPERSEDED'); - --- Add observation tracking columns to existing sequencer_instrument table -ALTER TABLE public.sequencer_instrument - ADD COLUMN source VARCHAR(30) DEFAULT 'CURATOR' - CHECK (source IN ('CURATOR', 'CONSENSUS', 'PUBLICATION')), - ADD COLUMN observation_count INTEGER DEFAULT 0, - ADD COLUMN confidence_score DOUBLE PRECISION DEFAULT 1.0, - ADD COLUMN last_observed_at TIMESTAMP; - -CREATE INDEX idx_si_confidence ON public.sequencer_instrument(confidence_score DESC); - -# --- !Downs - -DROP INDEX IF EXISTS idx_si_confidence; - -ALTER TABLE public.sequencer_instrument - DROP COLUMN IF EXISTS source, - DROP COLUMN IF EXISTS observation_count, - DROP COLUMN IF EXISTS confidence_score, - DROP COLUMN IF EXISTS last_observed_at; - -DROP TABLE IF EXISTS public.instrument_association_proposal; diff --git a/conf/evolutions/default/69.sql b/conf/evolutions/default/69.sql deleted file mode 100644 index ff55a69e..00000000 --- a/conf/evolutions/default/69.sql +++ /dev/null @@ -1,99 +0,0 @@ -# --- !Ups - --- Vendor-specific targeted test types -INSERT INTO test_type_definition ( - code, display_name, category, vendor, target_type, - expected_min_depth, expected_target_depth, - supports_haplogroup_y, supports_haplogroup_mt, supports_autosomal_ibd, supports_ancestry, - typical_file_formats, description -) VALUES -('BIG_Y_700', 'FTDNA Big Y-700', 'SEQUENCING', 'FamilyTreeDNA', 'Y_CHROMOSOME', - 30.0, 50.0, - TRUE, FALSE, FALSE, FALSE, - ARRAY['BAM', 'VCF', 'BED'], 'FamilyTreeDNA Big Y-700 targeted Y-chromosome sequencing covering ~700 STRs and ~200K Y-SNPs.'), - -('Y_ELITE', 'Full Genomes Y Elite', 'SEQUENCING', 'Full Genomes', 'Y_CHROMOSOME', - 15.0, 30.0, - TRUE, FALSE, FALSE, FALSE, - ARRAY['BAM', 'CRAM', 'VCF'], 'Full Genomes Y Elite whole Y-chromosome sequencing at 30x depth.'), - -('Y_PRIME', 'YSEQ Y-Prime', 'SEQUENCING', 'YSEQ', 'Y_CHROMOSOME', - 15.0, 30.0, - TRUE, FALSE, FALSE, FALSE, - ARRAY['BAM', 'VCF'], 'YSEQ Y-Prime Y-chromosome sequencing product.'), - -('MT_FULL_SEQUENCE', 'mtDNA Full Sequence', 'SEQUENCING', 'FamilyTreeDNA', 'MT_DNA', - 500.0, 1000.0, - FALSE, TRUE, FALSE, FALSE, - ARRAY['BAM', 'FASTA', 'VCF'], 'Full mitochondrial genome sequencing (16,569 bp).'), - -('MT_PLUS', 'FTDNA mtDNA Plus', 'SEQUENCING', 'FamilyTreeDNA', 'MT_DNA', - 200.0, 500.0, - FALSE, TRUE, FALSE, FALSE, - ARRAY['BAM', 'FASTA', 'VCF'], 'FamilyTreeDNA mtDNA Plus covering full mitochondrial genome.'), - -('BIG_Y_500', 'FTDNA Big Y-500 (Legacy)', 'SEQUENCING', 'FamilyTreeDNA', 'Y_CHROMOSOME', - 20.0, 40.0, - TRUE, FALSE, FALSE, FALSE, - ARRAY['BAM', 'VCF', 'BED'], 'Legacy FamilyTreeDNA Big Y-500 product (superseded by Big Y-700).'); - --- Link Big Y-500 successor to Big Y-700 -UPDATE test_type_definition -SET successor_test_type_id = (SELECT id FROM test_type_definition WHERE code = 'BIG_Y_700') -WHERE code = 'BIG_Y_500'; - --- Target region definitions for targeted tests -CREATE TABLE public.test_type_target_region ( - id SERIAL PRIMARY KEY, - test_type_id INTEGER NOT NULL REFERENCES test_type_definition(id), - contig_name VARCHAR(50) NOT NULL, - start_position INTEGER, - end_position INTEGER, - region_name VARCHAR(100) NOT NULL, - region_type VARCHAR(50) NOT NULL - CHECK (region_type IN ('FULL', 'PARTIAL', 'TARGETED_SNPS')), - expected_coverage_pct DOUBLE PRECISION, - expected_min_depth DOUBLE PRECISION, - UNIQUE(test_type_id, contig_name, start_position, end_position) -); - -CREATE INDEX idx_tttr_test_type ON test_type_target_region(test_type_id); - --- Seed target regions for targeted tests --- Big Y-700: Y chromosome combbed region -INSERT INTO test_type_target_region (test_type_id, contig_name, start_position, end_position, region_name, region_type, expected_coverage_pct, expected_min_depth) -SELECT id, 'chrY', 2781480, 56887903, 'Y Combbed Region', 'TARGETED_SNPS', 0.95, 30.0 -FROM test_type_definition WHERE code = 'BIG_Y_700'; - --- Big Y-500: Y chromosome combbed region (narrower) -INSERT INTO test_type_target_region (test_type_id, contig_name, start_position, end_position, region_name, region_type, expected_coverage_pct, expected_min_depth) -SELECT id, 'chrY', 2781480, 56887903, 'Y Combbed Region', 'TARGETED_SNPS', 0.90, 20.0 -FROM test_type_definition WHERE code = 'BIG_Y_500'; - --- Y Elite: Full Y chromosome -INSERT INTO test_type_target_region (test_type_id, contig_name, region_name, region_type, expected_coverage_pct, expected_min_depth) -SELECT id, 'chrY', 'Full Y Chromosome', 'FULL', 0.98, 15.0 -FROM test_type_definition WHERE code = 'Y_ELITE'; - --- Y Prime: Full Y chromosome -INSERT INTO test_type_target_region (test_type_id, contig_name, region_name, region_type, expected_coverage_pct, expected_min_depth) -SELECT id, 'chrY', 'Full Y Chromosome', 'FULL', 0.95, 15.0 -FROM test_type_definition WHERE code = 'Y_PRIME'; - --- MT Full Sequence: Full mitochondrial genome -INSERT INTO test_type_target_region (test_type_id, contig_name, start_position, end_position, region_name, region_type, expected_coverage_pct, expected_min_depth) -SELECT id, 'chrM', 1, 16569, 'Full Mitochondrial Genome', 'FULL', 0.999, 500.0 -FROM test_type_definition WHERE code = 'MT_FULL_SEQUENCE'; - --- MT Plus: Full mitochondrial genome -INSERT INTO test_type_target_region (test_type_id, contig_name, start_position, end_position, region_name, region_type, expected_coverage_pct, expected_min_depth) -SELECT id, 'chrM', 1, 16569, 'Full Mitochondrial Genome', 'FULL', 0.999, 200.0 -FROM test_type_definition WHERE code = 'MT_PLUS'; - -# --- !Downs - -DROP TABLE IF EXISTS public.test_type_target_region; - -DELETE FROM test_type_definition WHERE code IN ( - 'BIG_Y_700', 'Y_ELITE', 'Y_PRIME', 'MT_FULL_SEQUENCE', 'MT_PLUS', 'BIG_Y_500' -); diff --git a/conf/evolutions/default/7.sql b/conf/evolutions/default/7.sql deleted file mode 100644 index 5f70dea7..00000000 --- a/conf/evolutions/default/7.sql +++ /dev/null @@ -1,306 +0,0 @@ -# --- !Ups ---- New tables to enable Pan Genome instead of traditional linear references - --- ----------------------------------------------------------- --- 1. New Tables: Pangenome Core & Reference Data --- ----------------------------------------------------------- - --- Table: public.pangenome_graph --- Defines unique versions or builds of the pangenome graph itself. -CREATE TABLE public.pangenome_graph -( - id BIGSERIAL PRIMARY KEY, - name VARCHAR(255) NOT NULL UNIQUE, - description TEXT, - creation_date TIMESTAMP NOT NULL DEFAULT NOW(), - checksum VARCHAR(255) -); - --- Table: public.assembly_metadata --- Stores metadata about the source assemblies (e.g., GRCh37, GRCh38, CHM13v2.0) -CREATE TABLE public.assembly_metadata -( - id BIGSERIAL PRIMARY KEY, - assembly_name VARCHAR(255) NOT NULL UNIQUE, - accession VARCHAR(255), - release_date DATE, - source_organism VARCHAR(255), - assembly_level VARCHAR(50), - metadata JSONB -); - --- Table: public.pangenome_node --- Represents the atomic, shared DNA segments that are the building blocks of the pangenome graph. -CREATE TABLE public.pangenome_node -( - id BIGSERIAL PRIMARY KEY, - graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id), - sequence TEXT NOT NULL, - length INTEGER NOT NULL, - is_core BOOLEAN, - annotation_id INTEGER -); - --- Table: public.pangenome_edge --- Defines the connections or adjacencies between pangenome_node's. -CREATE TABLE public.pangenome_edge -( - id BIGSERIAL PRIMARY KEY, - graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id), - source_node_id INTEGER NOT NULL REFERENCES public.pangenome_node (id), - target_node_id INTEGER NOT NULL REFERENCES public.pangenome_node (id), - source_orientation VARCHAR(1) NOT NULL CHECK (source_orientation IN ('+', '-')), - target_orientation VARCHAR(1) NOT NULL CHECK (target_orientation IN ('+', '-')), - type VARCHAR(50), - UNIQUE (graph_id, source_node_id, target_node_id, source_orientation, target_orientation) -); - --- Table: public.pangenome_path --- Represents specific linear sequences (like GRCh38 chr1, Y-DNA reference) as ordered traversals through pangenome_nodes. -CREATE TABLE public.pangenome_path -( - id BIGSERIAL PRIMARY KEY, - graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id), - name VARCHAR(255) NOT NULL, - node_sequence INTEGER[] NOT NULL, - length BIGINT NOT NULL, - source_assembly_id INTEGER REFERENCES public.assembly_metadata (id), - UNIQUE (graph_id, name) -); - --- Table: public.gene_annotation (Optional, if not existing or needs separate table) --- Stores metadata about genes. -CREATE TABLE public.gene_annotation -( - id BIGSERIAL PRIMARY KEY, - gene_symbol VARCHAR(255), - gene_id VARCHAR(255), - description TEXT, - representative_sequence_node_id INTEGER REFERENCES public.pangenome_node (id) -); - --- Add the foreign key to pangenome_node now that gene_annotation exists -ALTER TABLE public.pangenome_node - ADD CONSTRAINT fk_pangenome_node_annotation FOREIGN KEY (annotation_id) REFERENCES public.gene_annotation (id); - --- ----------------------------------------------------------- --- 2. New Tables: Variant Representation & Linkage --- ----------------------------------------------------------- - --- Table: public.canonical_pangenome_variant --- Represents a unique, abstract variant (SNP, INDEL, Structural Variant) as defined within a specific pangenome graph. -CREATE TABLE public.canonical_pangenome_variant -( - id BIGSERIAL PRIMARY KEY, - pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id), - variant_type VARCHAR(50) NOT NULL, - variant_nodes INTEGER[] NOT NULL, - variant_edges INTEGER[] NOT NULL DEFAULT '{}', - reference_path_id INTEGER REFERENCES public.pangenome_path (id), - reference_start_position INTEGER, - reference_end_position INTEGER, - reference_allele_sequence TEXT, - alternate_allele_sequence TEXT, - canonical_hash VARCHAR(255) NOT NULL UNIQUE, - description TEXT, - creation_date TIMESTAMP NOT NULL DEFAULT NOW() -); - --- Table: public.pangenome_variant_link --- Bridges existing public.variant (legacy marker) to its canonical pangenome representation. -CREATE TABLE public.pangenome_variant_link -( - pangenome_variant_link_id BIGSERIAL PRIMARY KEY, - variant_id INTEGER NOT NULL REFERENCES public.variant (variant_id) ON DELETE CASCADE, - canonical_pangenome_variant_id INTEGER NOT NULL REFERENCES public.canonical_pangenome_variant (id) ON DELETE CASCADE, - pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id), - description TEXT, - mapping_source VARCHAR(255) NOT NULL, - mapping_date TIMESTAMP NOT NULL DEFAULT NOW(), - UNIQUE (variant_id, canonical_pangenome_variant_id) -); - --- ----------------------------------------------------------- --- 3. New Table: Sample-Specific Variant Calls --- ----------------------------------------------------------- - --- Table: public.reported_variant_pangenome --- Stores the detailed variant calls detected for each sample_guid against a pangenome graph. -CREATE TABLE public.reported_variant_pangenome -( - id BIGSERIAL PRIMARY KEY, - sample_guid UUID NOT NULL, - graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id), - variant_type VARCHAR(50) NOT NULL CHECK (variant_type IN ( - 'SNP', 'INDEL', 'SV_INSERTION', - 'SV_DELETION', 'SV_INVERSION', - 'SV_DUPLICATION', 'SV_TRANSLOCATION', - 'PAV_GENE', 'STR', 'CNV', 'COMPLEX' - )), - reference_path_id INTEGER REFERENCES public.pangenome_path (id), - reference_start_position INTEGER, - reference_end_position INTEGER, - variant_nodes INTEGER[] NOT NULL, - variant_edges INTEGER[] NOT NULL DEFAULT '{}', - alternate_allele_sequence TEXT, - reference_allele_sequence TEXT, - reference_repeat_count INTEGER, - alternate_repeat_count INTEGER, - allele_fraction DOUBLE PRECISION, - depth INTEGER, - reported_date TIMESTAMP NOT NULL DEFAULT NOW(), - provenance VARCHAR(255) NOT NULL, - confidence_score DOUBLE PRECISION NOT NULL, - notes TEXT, - status VARCHAR(255) NOT NULL, - zygosity VARCHAR(10) CHECK (zygosity IN ('HOM_REF', 'HET', 'HOM_ALT', 'UNKNOWN')), - haplotype_information JSONB -); - --- IBD Discovery and Consensus - --- ----------------------------------------------------------- --- 4. New Table: public.validation_service --- ----------------------------------------------------------- -CREATE TABLE public.validation_service -( - id BIGSERIAL PRIMARY KEY, - guid UUID NOT NULL UNIQUE, - name VARCHAR(255) NOT NULL UNIQUE, - description TEXT, - trust_level VARCHAR(50) -); - --- ----------------------------------------------------------- --- 5. New Table: public.ibd_discovery_index --- A central, privacy-preserving index for IBD matches. --- This table represents the *match event itself*. --- ----------------------------------------------------------- -CREATE TABLE public.ibd_discovery_index -( - id BIGSERIAL PRIMARY KEY, - sample_guid_1 UUID NOT NULL, - sample_guid_2 UUID NOT NULL, - pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id), - match_region_type VARCHAR(50) NOT NULL CHECK (match_region_type IN - ('AUTOSOMAL', 'X_CHROMOSOME', 'Y_CHROMOSOME', 'MT_DNA', - 'ALL_CHROMOSOMES')), - total_shared_cm_approx DOUBLE PRECISION, - num_shared_segments_approx INTEGER, - is_publicly_discoverable BOOLEAN NOT NULL DEFAULT FALSE, - consensus_status VARCHAR(50) NOT NULL DEFAULT 'INITIATED', - last_consensus_update TIMESTAMP NOT NULL DEFAULT NOW(), - validation_service_guid UUID REFERENCES public.validation_service (guid), - validation_timestamp TIMESTAMP, - indexed_by_service VARCHAR(255), - indexed_date TIMESTAMP NOT NULL DEFAULT NOW() -); - --- Unique constraint for IBD pairs (order-independent) -CREATE UNIQUE INDEX idx_unique_ibd_discovery_pair ON public.ibd_discovery_index ( - LEAST(sample_guid_1, sample_guid_2), - GREATEST(sample_guid_1, sample_guid_2), - pangenome_graph_id, - match_region_type - ); - --- Indexes for ibd_discovery_index -CREATE INDEX idx_ibd_discovery_sample1 ON public.ibd_discovery_index (sample_guid_1); -CREATE INDEX idx_ibd_discovery_sample2 ON public.ibd_discovery_index (sample_guid_2); -CREATE INDEX idx_ibd_discovery_graph_id ON public.ibd_discovery_index (pangenome_graph_id); -CREATE INDEX idx_ibd_discovery_region_type ON public.ibd_discovery_index (match_region_type); -CREATE INDEX idx_ibd_discovery_cm_approx ON public.ibd_discovery_index (total_shared_cm_approx); -CREATE INDEX idx_ibd_discovery_public_status ON public.ibd_discovery_index (is_publicly_discoverable); -CREATE INDEX idx_ibd_discovery_consensus_status ON public.ibd_discovery_index (consensus_status); - - --- ----------------------------------------------------------- --- 6. New Table: public.ibd_pds_attestation --- Records a specific PDS's attestation or validation for an IBD match. --- ----------------------------------------------------------- -CREATE TABLE public.ibd_pds_attestation -( - id BIGSERIAL PRIMARY KEY, - ibd_discovery_index_id BIGINT NOT NULL REFERENCES public.ibd_discovery_index (id) ON DELETE CASCADE, - attesting_pds_guid UUID NOT NULL, - attesting_sample_guid UUID NOT NULL, - attestation_timestamp TIMESTAMP NOT NULL DEFAULT NOW(), - attestation_signature TEXT NOT NULL, - - match_summary_hash VARCHAR(255) NOT NULL, - attestation_type VARCHAR(50) NOT NULL CHECK (attestation_type IN - ('INITIAL_REPORT', 'CONFIRMATION', 'DISPUTE', 'REVOCATION', - 'THIRD_PARTY_VALIDATION')), - attestation_notes TEXT, - UNIQUE (ibd_discovery_index_id, attesting_pds_guid, attestation_type) -); - --- Indexes for ibd_pds_attestation -CREATE INDEX idx_ibd_attestation_index_id ON public.ibd_pds_attestation (ibd_discovery_index_id); -CREATE INDEX idx_ibd_attestation_pds_guid ON public.ibd_pds_attestation (attesting_pds_guid); -CREATE INDEX idx_ibd_attestation_type ON public.ibd_pds_attestation (attestation_type); - - --- ----------------------------------------------------------- --- 6. Revisions to Existing Genetic Genealogy Tables --- ----------------------------------------------------------- - --- Table: public.genbank_contig --- Add columns to link existing contigs to their pangenome context. -ALTER TABLE public.genbank_contig - ADD COLUMN pangenome_path_id INTEGER REFERENCES public.pangenome_path (id); - --- ----------------------------------------------------------- --- 7. Indexes for Performance --- ----------------------------------------------------------- - --- Indexes for public.pangenome_node -CREATE INDEX idx_pangenome_node_graph_id ON public.pangenome_node (graph_id); - --- Indexes for public.pangenome_path -CREATE INDEX idx_pangenome_path_graph_id ON public.pangenome_path (graph_id); -CREATE INDEX idx_pangenome_path_assembly_id ON public.pangenome_path (source_assembly_id); - --- Indexes for public.canonical_pangenome_variant -CREATE INDEX idx_cpv_graph_id ON public.canonical_pangenome_variant (pangenome_graph_id); -CREATE INDEX idx_cpv_variant_type ON public.canonical_pangenome_variant (variant_type); -CREATE INDEX idx_cpv_ref_path_pos ON public.canonical_pangenome_variant (reference_path_id, reference_start_position); -CREATE INDEX idx_cpv_variant_nodes ON public.canonical_pangenome_variant USING GIN (variant_nodes); -CREATE INDEX idx_cpv_variant_edges ON public.canonical_pangenome_variant USING GIN (variant_edges); - --- Indexes for public.pangenome_variant_link -CREATE INDEX idx_pvlink_variant_id ON public.pangenome_variant_link (variant_id); -CREATE INDEX idx_pvlink_canonical_id ON public.pangenome_variant_link (canonical_pangenome_variant_id); -CREATE INDEX idx_pvlink_graph_id ON public.pangenome_variant_link (pangenome_graph_id); - --- Indexes for public.reported_variant_pangenome -CREATE INDEX idx_rvp_sample_guid ON public.reported_variant_pangenome (sample_guid); -CREATE INDEX idx_rvp_graph_id ON public.reported_variant_pangenome (graph_id); -CREATE INDEX idx_rvp_variant_type ON public.reported_variant_pangenome (variant_type); -CREATE INDEX idx_rvp_ref_path_pos ON public.reported_variant_pangenome (reference_path_id, reference_start_position); -CREATE INDEX idx_rvp_variant_nodes ON public.reported_variant_pangenome USING GIN (variant_nodes); -CREATE INDEX idx_rvp_variant_edges ON public.reported_variant_pangenome USING GIN (variant_edges); -CREATE INDEX idx_rvp_confidence_score ON public.reported_variant_pangenome (confidence_score); - - --- This is destructive, but the tables were never actually used in the application code -DROP TABLE reported_variant; -DROP TABLE reported_negative_variant; - -# --- !Downs -ALTER TABLE public.genbank_contig - DROP COLUMN pangenome_path_id; - -DROP TABLE public.ibd_pds_attestation; -DROP TABLE public.ibd_discovery_index; -DROP TABLE public.validation_service; -DROP TABLE public.reported_variant_pangenome; -DROP TABLE public.pangenome_variant_link; -DROP TABLE public.canonical_pangenome_variant; -DROP TABLE public.pangenome_node; -DROP TABLE public.gene_annotation; -DROP TABLE public.pangenome_path; -DROP TABLE public.pangenome_edge; -DROP TABLE public.pangenome_node; -DROP TABLE public.assembly_metadata; -DROP TABLE public.pangenome_graph; \ No newline at end of file diff --git a/conf/evolutions/default/70.sql b/conf/evolutions/default/70.sql deleted file mode 100644 index f76ba3cb..00000000 --- a/conf/evolutions/default/70.sql +++ /dev/null @@ -1,50 +0,0 @@ -# --- !Ups - --- Additional vendor-specific chip test types -INSERT INTO test_type_definition ( - code, display_name, category, vendor, target_type, - expected_marker_count, - supports_haplogroup_y, supports_haplogroup_mt, supports_autosomal_ibd, supports_ancestry, - typical_file_formats, description -) VALUES -('ARRAY_23ANDME_V4', '23andMe v4 Chip', 'GENOTYPING', '23andMe', 'MIXED', - 570000, - TRUE, TRUE, TRUE, TRUE, - ARRAY['TXT', 'CSV'], '23andMe v4 chip (~570K markers). Superseded by v5.'), - -('ARRAY_ANCESTRY_V1', 'AncestryDNA v1', 'GENOTYPING', 'AncestryDNA', 'MIXED', - 700000, - TRUE, TRUE, TRUE, TRUE, - ARRAY['TXT', 'CSV'], 'AncestryDNA v1 chip. Superseded by v2.'), - -('ARRAY_MYHERITAGE', 'MyHeritage DNA', 'GENOTYPING', 'MyHeritage', 'MIXED', - 700000, - TRUE, TRUE, TRUE, TRUE, - ARRAY['CSV'], 'MyHeritage DNA chip (~700K markers).'), - -('ARRAY_LIVINGDNA', 'LivingDNA', 'GENOTYPING', 'LivingDNA', 'MIXED', - 630000, - TRUE, TRUE, TRUE, TRUE, - ARRAY['CSV', 'TXT'], 'LivingDNA chip (~630K markers).'), - -('ARRAY_CUSTOM', 'Custom SNP Array', 'GENOTYPING', NULL, 'MIXED', - NULL, - TRUE, TRUE, FALSE, FALSE, - ARRAY['TXT', 'CSV', 'VCF'], 'Custom or unrecognized SNP array data.'); - --- Link deprecated versions to successors -UPDATE test_type_definition -SET successor_test_type_id = (SELECT id FROM test_type_definition WHERE code = 'SNP_ARRAY_23ANDME'), - deprecated_at = '2017-08-01' -WHERE code = 'ARRAY_23ANDME_V4'; - -UPDATE test_type_definition -SET successor_test_type_id = (SELECT id FROM test_type_definition WHERE code = 'SNP_ARRAY_ANCESTRY'), - deprecated_at = '2019-01-01' -WHERE code = 'ARRAY_ANCESTRY_V1'; - -# --- !Downs - -DELETE FROM test_type_definition WHERE code IN ( - 'ARRAY_23ANDME_V4', 'ARRAY_ANCESTRY_V1', 'ARRAY_MYHERITAGE', 'ARRAY_LIVINGDNA', 'ARRAY_CUSTOM' -); diff --git a/conf/evolutions/default/71.sql b/conf/evolutions/default/71.sql deleted file mode 100644 index c806bf1e..00000000 --- a/conf/evolutions/default/71.sql +++ /dev/null @@ -1,71 +0,0 @@ -# --- !Ups - --- Per-test-type coverage expectation profiles for variant calling confidence -CREATE TABLE public.coverage_expectation_profile ( - id SERIAL PRIMARY KEY, - test_type_id INTEGER NOT NULL REFERENCES public.test_type_definition(id), - contig_name VARCHAR(50) NOT NULL, - variant_class VARCHAR(50) NOT NULL DEFAULT 'SNP', -- SNP, STR, INDEL - min_depth_high DOUBLE PRECISION NOT NULL, -- minimum depth for HIGH confidence - min_depth_medium DOUBLE PRECISION NOT NULL, -- minimum depth for MEDIUM confidence - min_depth_low DOUBLE PRECISION NOT NULL, -- minimum depth for LOW confidence - min_coverage_pct DOUBLE PRECISION, -- minimum % bases covered at 1x - min_mapping_quality DOUBLE PRECISION, -- minimum mean mapping quality - min_callable_pct DOUBLE PRECISION, -- minimum % callable bases - notes TEXT, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - UNIQUE (test_type_id, contig_name, variant_class) -); - -CREATE INDEX idx_cep_test_type ON public.coverage_expectation_profile (test_type_id); - --- Seed profiles for WGS -INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes) -SELECT id, 'Y', 'SNP', 20.0, 10.0, 5.0, 0.95, 30.0, 0.90, 'WGS Y-chromosome SNP calling thresholds' -FROM test_type_definition WHERE code = 'WGS'; - -INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes) -SELECT id, 'MT', 'SNP', 100.0, 50.0, 20.0, 0.99, 30.0, 0.95, 'WGS mtDNA SNP calling thresholds (high copy number)' -FROM test_type_definition WHERE code = 'WGS'; - -INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, notes) -SELECT id, 'Y', 'STR', 30.0, 15.0, 8.0, 0.90, 20.0, 'WGS Y-chromosome STR calling thresholds (higher depth needed)' -FROM test_type_definition WHERE code = 'WGS'; - --- Seed profiles for BIG_Y_700 -INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes) -SELECT id, 'Y', 'SNP', 50.0, 25.0, 10.0, 0.90, 25.0, 0.85, 'BIG-Y 700 Y-chromosome SNP calling thresholds' -FROM test_type_definition WHERE code = 'BIG_Y_700'; - -INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, notes) -SELECT id, 'Y', 'STR', 60.0, 30.0, 15.0, 0.85, 20.0, 'BIG-Y 700 Y-chromosome STR calling thresholds' -FROM test_type_definition WHERE code = 'BIG_Y_700'; - --- Seed profiles for BIG_Y_500 -INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes) -SELECT id, 'Y', 'SNP', 40.0, 20.0, 8.0, 0.85, 25.0, 0.80, 'BIG-Y 500 Y-chromosome SNP calling thresholds' -FROM test_type_definition WHERE code = 'BIG_Y_500'; - --- Seed profiles for Y_ELITE -INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes) -SELECT id, 'Y', 'SNP', 60.0, 30.0, 12.0, 0.92, 25.0, 0.88, 'Y-Elite Y-chromosome SNP calling thresholds' -FROM test_type_definition WHERE code = 'Y_ELITE'; - --- Seed profiles for MT_FULL_SEQUENCE -INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, min_mapping_quality, min_callable_pct, notes) -SELECT id, 'MT', 'SNP', 200.0, 100.0, 30.0, 0.99, 30.0, 0.98, 'Full mtDNA sequence SNP calling thresholds' -FROM test_type_definition WHERE code = 'MT_FULL_SEQUENCE'; - --- Seed profiles for SNP arrays (chip data — marker-based, not depth-based) -INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, notes) -SELECT id, 'Y', 'SNP', 0.0, 0.0, 0.0, 0.0, 'Chip-based: confidence from marker count, not depth' -FROM test_type_definition WHERE code = 'SNP_ARRAY_23ANDME'; - -INSERT INTO coverage_expectation_profile (test_type_id, contig_name, variant_class, min_depth_high, min_depth_medium, min_depth_low, min_coverage_pct, notes) -SELECT id, 'Y', 'SNP', 0.0, 0.0, 0.0, 0.0, 'Chip-based: confidence from marker count, not depth' -FROM test_type_definition WHERE code = 'SNP_ARRAY_ANCESTRY'; - -# --- !Downs - -DROP TABLE IF EXISTS public.coverage_expectation_profile; diff --git a/conf/evolutions/default/72.sql b/conf/evolutions/default/72.sql deleted file mode 100644 index 5f858b63..00000000 --- a/conf/evolutions/default/72.sql +++ /dev/null @@ -1,60 +0,0 @@ -# --- !Ups - --- Group Project definition -CREATE TABLE public.group_project ( - id SERIAL PRIMARY KEY, - project_guid UUID NOT NULL UNIQUE DEFAULT gen_random_uuid(), - project_name VARCHAR(100) NOT NULL, - project_type VARCHAR(30) NOT NULL CHECK (project_type IN ('HAPLOGROUP', 'SURNAME', 'GEOGRAPHIC', 'ETHNIC', 'RESEARCH', 'CUSTOM')), - target_haplogroup VARCHAR(100), - target_lineage VARCHAR(10) CHECK (target_lineage IN ('Y_DNA', 'MT_DNA', 'BOTH')), - description TEXT, - background_info TEXT, - join_policy VARCHAR(30) NOT NULL DEFAULT 'APPROVAL_REQUIRED' CHECK (join_policy IN ('OPEN', 'APPROVAL_REQUIRED', 'INVITE_ONLY', 'HAPLOGROUP_VERIFIED')), - haplogroup_requirement VARCHAR(255), - member_list_visibility VARCHAR(20) NOT NULL DEFAULT 'MEMBERS_ONLY' CHECK (member_list_visibility IN ('PUBLIC', 'MEMBERS_ONLY', 'ADMINS_ONLY', 'HIDDEN')), - str_policy VARCHAR(20) NOT NULL DEFAULT 'DISTANCE_ONLY' CHECK (str_policy IN ('HIDDEN', 'DISTANCE_ONLY', 'MODAL_COMPARISON', 'MEMBERS_ONLY_RAW', 'PUBLIC_RAW')), - snp_policy VARCHAR(30) NOT NULL DEFAULT 'TERMINAL_ONLY' CHECK (snp_policy IN ('HIDDEN', 'TERMINAL_ONLY', 'FULL_PATH', 'WITH_PRIVATE_VARIANTS')), - public_tree_view BOOLEAN NOT NULL DEFAULT FALSE, - succession_policy VARCHAR(30) DEFAULT 'CO_ADMIN_INHERITS' CHECK (succession_policy IN ('CO_ADMIN_INHERITS', 'MEMBER_VOTE', 'DECODINGUS_APPOINTS', 'PROJECT_CLOSES')), - owner_did VARCHAR(255) NOT NULL, - at_uri VARCHAR(512), - at_cid VARCHAR(255), - deleted BOOLEAN NOT NULL DEFAULT FALSE, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW() -); - -CREATE INDEX idx_group_project_owner ON public.group_project (owner_did); -CREATE INDEX idx_group_project_type ON public.group_project (project_type); -CREATE INDEX idx_group_project_haplogroup ON public.group_project (target_haplogroup) WHERE target_haplogroup IS NOT NULL; - --- Group Project membership -CREATE TABLE public.group_project_member ( - id SERIAL PRIMARY KEY, - group_project_id INTEGER NOT NULL REFERENCES public.group_project(id), - citizen_did VARCHAR(255) NOT NULL, - biosample_at_uri VARCHAR(512), - role VARCHAR(20) NOT NULL DEFAULT 'MEMBER' CHECK (role IN ('ADMIN', 'CO_ADMIN', 'MODERATOR', 'CURATOR', 'MEMBER')), - status VARCHAR(20) NOT NULL DEFAULT 'PENDING_APPROVAL' CHECK (status IN ('PENDING_APPROVAL', 'ACTIVE', 'SUSPENDED', 'LEFT', 'REMOVED')), - display_name VARCHAR(50), - kit_id VARCHAR(50), - visibility JSONB NOT NULL DEFAULT '{}', - subgroup_ids TEXT[] NOT NULL DEFAULT '{}', - contribution_level VARCHAR(20) DEFAULT 'OBSERVER' CHECK (contribution_level IN ('OBSERVER', 'CONTRIBUTOR', 'ACTIVE_RESEARCHER')), - joined_at TIMESTAMP, - at_uri VARCHAR(512), - at_cid VARCHAR(255), - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - UNIQUE (group_project_id, citizen_did) -); - -CREATE INDEX idx_gpm_project ON public.group_project_member (group_project_id); -CREATE INDEX idx_gpm_citizen ON public.group_project_member (citizen_did); -CREATE INDEX idx_gpm_status ON public.group_project_member (status); - -# --- !Downs - -DROP TABLE IF EXISTS public.group_project_member; -DROP TABLE IF EXISTS public.group_project; diff --git a/conf/evolutions/default/73.sql b/conf/evolutions/default/73.sql deleted file mode 100644 index ba388cb5..00000000 --- a/conf/evolutions/default/73.sql +++ /dev/null @@ -1,30 +0,0 @@ -# --- !Ups - -CREATE SCHEMA IF NOT EXISTS billing; - -CREATE TABLE billing.patron_subscription ( - id SERIAL PRIMARY KEY, - user_id UUID NOT NULL, - patron_tier VARCHAR(30) NOT NULL CHECK (patron_tier IN ('SUPPORTER', 'CONTRIBUTOR', 'SUSTAINER', 'FOUNDING_PATRON')), - status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE' CHECK (status IN ('ACTIVE', 'CANCELLED', 'PAST_DUE', 'EXPIRED')), - payment_provider VARCHAR(20) NOT NULL CHECK (payment_provider IN ('STRIPE', 'PAYPAL')), - provider_subscription_id VARCHAR(255), - provider_customer_id VARCHAR(255), - amount_cents INTEGER NOT NULL, - currency VARCHAR(3) NOT NULL DEFAULT 'USD', - billing_interval VARCHAR(10) NOT NULL CHECK (billing_interval IN ('MONTHLY', 'YEARLY')), - current_period_start TIMESTAMPTZ, - current_period_end TIMESTAMPTZ, - cancelled_at TIMESTAMPTZ, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - -CREATE INDEX idx_patron_sub_user ON billing.patron_subscription (user_id); -CREATE INDEX idx_patron_sub_status ON billing.patron_subscription (status); -CREATE INDEX idx_patron_sub_provider ON billing.patron_subscription (payment_provider, provider_subscription_id); - -# --- !Downs - -DROP TABLE IF EXISTS billing.patron_subscription; -DROP SCHEMA IF EXISTS billing; diff --git a/conf/evolutions/default/74.sql b/conf/evolutions/default/74.sql deleted file mode 100644 index b32d4547..00000000 --- a/conf/evolutions/default/74.sql +++ /dev/null @@ -1,106 +0,0 @@ -# --- !Ups - --- Match Discovery Engine tables (IBD-AV-1) - -CREATE TABLE match_suggestion ( - id BIGSERIAL PRIMARY KEY, - target_sample_guid UUID NOT NULL, - suggested_sample_guid UUID NOT NULL, - suggestion_type VARCHAR(30) NOT NULL CHECK (suggestion_type IN ('SHARED_MATCH', 'POPULATION_OVERLAP', 'HAPLOGROUP')), - score DOUBLE PRECISION NOT NULL, - metadata JSONB, - status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE' CHECK (status IN ('ACTIVE', 'DISMISSED', 'EXPIRED', 'CONVERTED')), - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - expires_at TIMESTAMPTZ, - UNIQUE (target_sample_guid, suggested_sample_guid, suggestion_type) -); - -CREATE INDEX idx_match_suggestion_target ON match_suggestion(target_sample_guid, status); -CREATE INDEX idx_match_suggestion_expires ON match_suggestion(expires_at) WHERE status = 'ACTIVE'; - -CREATE TABLE population_breakdown_cache ( - id BIGSERIAL PRIMARY KEY, - sample_guid UUID NOT NULL UNIQUE, - breakdown JSONB NOT NULL, - breakdown_hash VARCHAR(64) NOT NULL, - cached_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - source_at_uri VARCHAR(500) -); - -CREATE INDEX idx_pop_cache_sample ON population_breakdown_cache(sample_guid); - -CREATE TABLE population_overlap_score ( - id BIGSERIAL PRIMARY KEY, - sample_guid_1 UUID NOT NULL, - sample_guid_2 UUID NOT NULL, - overlap_score DOUBLE PRECISION NOT NULL, - computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - UNIQUE (sample_guid_1, sample_guid_2), - CHECK (sample_guid_1 < sample_guid_2) -); - -CREATE INDEX idx_pop_overlap_sample1 ON population_overlap_score(sample_guid_1); -CREATE INDEX idx_pop_overlap_sample2 ON population_overlap_score(sample_guid_2); - --- Match Request & Consent Tracking tables (IBD-AV-2) - -CREATE TABLE match_request_tracking ( - id BIGSERIAL PRIMARY KEY, - at_uri VARCHAR(500) NOT NULL UNIQUE, - requester_did VARCHAR(255) NOT NULL, - target_did VARCHAR(255), - from_sample_guid UUID NOT NULL, - to_sample_guid UUID NOT NULL, - request_type VARCHAR(30) NOT NULL DEFAULT 'FULL' CHECK (request_type IN ('AUTOSOMAL', 'Y_CHROMOSOME', 'MT_DNA', 'FULL')), - status VARCHAR(20) NOT NULL DEFAULT 'PENDING' CHECK (status IN ('PENDING', 'ACCEPTED', 'DECLINED', 'EXPIRED', 'WITHDRAWN', 'CANCELLED')), - discovery_reason JSONB, - message TEXT, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - expires_at TIMESTAMPTZ, - completed_at TIMESTAMPTZ -); - -CREATE INDEX idx_match_req_requester ON match_request_tracking(requester_did, status); -CREATE INDEX idx_match_req_target ON match_request_tracking(target_did, status); -CREATE INDEX idx_match_req_to_sample ON match_request_tracking(to_sample_guid, status); -CREATE INDEX idx_match_req_from_sample ON match_request_tracking(from_sample_guid, status); - -CREATE TABLE match_consent_tracking ( - id BIGSERIAL PRIMARY KEY, - at_uri VARCHAR(500) NOT NULL UNIQUE, - consenting_did VARCHAR(255) NOT NULL, - sample_guid UUID NOT NULL, - consent_level VARCHAR(20) NOT NULL CHECK (consent_level IN ('FULL', 'ANONYMOUS', 'PROJECT_ONLY')), - allowed_match_types JSONB, - share_contact_info BOOLEAN NOT NULL DEFAULT FALSE, - consented_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - expires_at TIMESTAMPTZ, - revoked_at TIMESTAMPTZ -); - -CREATE INDEX idx_match_consent_did ON match_consent_tracking(consenting_did); -CREATE INDEX idx_match_consent_sample ON match_consent_tracking(sample_guid); - --- Extend ibd_discovery_index for IBD-AV-4 linkage -ALTER TABLE ibd_discovery_index - ADD COLUMN IF NOT EXISTS match_request_at_uri VARCHAR(500), - ADD COLUMN IF NOT EXISTS requester_did VARCHAR(255), - ADD COLUMN IF NOT EXISTS target_did VARCHAR(255); - -CREATE INDEX idx_ibd_request_uri ON ibd_discovery_index(match_request_at_uri); - -# --- !Downs - -DROP INDEX IF EXISTS idx_ibd_request_uri; - -ALTER TABLE ibd_discovery_index - DROP COLUMN IF EXISTS match_request_at_uri, - DROP COLUMN IF EXISTS requester_did, - DROP COLUMN IF EXISTS target_did; - -DROP TABLE IF EXISTS match_consent_tracking; -DROP TABLE IF EXISTS match_request_tracking; -DROP TABLE IF EXISTS population_overlap_score; -DROP TABLE IF EXISTS population_breakdown_cache; -DROP TABLE IF EXISTS match_suggestion; diff --git a/conf/evolutions/default/8.sql b/conf/evolutions/default/8.sql deleted file mode 100644 index f81f54ae..00000000 --- a/conf/evolutions/default/8.sql +++ /dev/null @@ -1,59 +0,0 @@ -# --- !Ups ---- New tables to enable Pan Genome instead of traditional linear references - --- New Table: public.pangenome_alignment_metrics --- Tracks quality metrics for alignment against a pangenome graph or its specific paths/nodes. -CREATE TABLE public.pangenome_alignment_metrics -( - id BIGSERIAL PRIMARY KEY, - sequence_file_id BIGINT NOT NULL REFERENCES public.sequence_file (id) ON DELETE CASCADE, - pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id), - metric_level VARCHAR(50) NOT NULL CHECK (metric_level IN ('GRAPH_OVERALL', 'PATH', 'NODE', 'REGION')), - pangenome_path_id INTEGER REFERENCES public.pangenome_path (id), -- Null if metric_level is GRAPH_OVERALL or NODE - pangenome_node_id INTEGER REFERENCES public.pangenome_node (id), -- Null if metric_level is GRAPH_OVERALL or PATH/REGION - region_start_node_id INTEGER REFERENCES public.pangenome_node (id), -- For 'REGION' level, start of the specific segment - region_end_node_id INTEGER REFERENCES public.pangenome_node (id), -- For 'REGION' level, end of the specific segment - region_name VARCHAR(255), - region_length_bp BIGINT, - - -- Core Coverage Metrics - mean_depth DOUBLE PRECISION, - median_depth DOUBLE PRECISION, - percent_coverage_at_1x DOUBLE PRECISION, - percent_coverage_at_5x DOUBLE PRECISION, - percent_coverage_at_10x DOUBLE PRECISION, - percent_coverage_at_20x DOUBLE PRECISION, - percent_coverage_at_30x DOUBLE PRECISION, - bases_no_coverage BIGINT, - bases_low_quality_mapping BIGINT, - bases_callable BIGINT, - - -- Mapping Quality Metrics - mean_mapping_quality DOUBLE PRECISION, - - -- Metadata and Provenance - metrics_date TIMESTAMP NOT NULL DEFAULT NOW(), - analysis_tool VARCHAR(255) NOT NULL, - analysis_tool_version VARCHAR(50), - notes TEXT, - metadata JSONB -); - --- Indexes for performance -CREATE INDEX idx_pam_sequence_file_id ON public.pangenome_alignment_metrics (sequence_file_id); -CREATE INDEX idx_pam_pangenome_graph_id ON public.pangenome_alignment_metrics (pangenome_graph_id); -CREATE INDEX idx_pam_metric_level ON public.pangenome_alignment_metrics (metric_level); -CREATE INDEX idx_pam_pangenome_path_id ON public.pangenome_alignment_metrics (pangenome_path_id); -CREATE INDEX idx_pam_metrics_date ON public.pangenome_alignment_metrics (metrics_date); - -ALTER TABLE public.sequence_file - ADD COLUMN pangenome_graph_id INTEGER REFERENCES public.pangenome_graph(id); - --- UNUSED Table -DROP TABLE public.quality_metrics; - -# --- !Downs - -ALTER TABLE public.sequence_file DROP COLUMN pangenome_graph_id; - -DROP TABLE public.pangenome_alignment_metrics; \ No newline at end of file diff --git a/conf/evolutions/default/9.sql b/conf/evolutions/default/9.sql deleted file mode 100644 index 3a2c53c0..00000000 --- a/conf/evolutions/default/9.sql +++ /dev/null @@ -1,47 +0,0 @@ -# --- !Ups ---- Normalizing the metrics since Slick can't deal with that many columns -DROP TABLE IF EXISTS public.pangenome_alignment_metrics; - --- New Table 1: public.pangenome_alignment_metadata --- Stores general metadata and region info about the alignment metrics -CREATE TABLE public.pangenome_alignment_metadata -( - id BIGSERIAL PRIMARY KEY, - sequence_file_id BIGINT NOT NULL REFERENCES public.sequence_file (id) ON DELETE CASCADE, - pangenome_graph_id INTEGER NOT NULL REFERENCES public.pangenome_graph (id), - metric_level VARCHAR(50) NOT NULL CHECK (metric_level IN ('GRAPH_OVERALL', 'PATH', 'NODE', 'REGION')), - pangenome_path_id INTEGER REFERENCES public.pangenome_path (id), - pangenome_node_id INTEGER REFERENCES public.pangenome_node (id), - region_start_node_id INTEGER REFERENCES public.pangenome_node (id), -- For 'REGION' level, start of the specific segment - region_end_node_id INTEGER REFERENCES public.pangenome_node (id), -- For 'REGION' level, end of the specific segment - region_name VARCHAR(255), - region_length_bp BIGINT, - metrics_date TIMESTAMP NOT NULL DEFAULT NOW(), - analysis_tool VARCHAR(255) NOT NULL, - analysis_tool_version VARCHAR(50), - notes TEXT, - metadata JSONB -); - --- New Table 2: public.pangenome_alignment_coverage --- Stores detailed coverage and quality metrics, linked to pangenome_alignment_metadata -CREATE TABLE public.pangenome_alignment_coverage -( - alignment_metadata_id BIGINT PRIMARY KEY REFERENCES public.pangenome_alignment_metadata (id) ON DELETE CASCADE, - mean_depth DOUBLE PRECISION, - median_depth DOUBLE PRECISION, - percent_coverage_at_1x DOUBLE PRECISION, - percent_coverage_at_5x DOUBLE PRECISION, - percent_coverage_at_10x DOUBLE PRECISION, - percent_coverage_at_20x DOUBLE PRECISION, - percent_coverage_at_30x DOUBLE PRECISION, - bases_no_coverage BIGINT, - bases_low_quality_mapping BIGINT, - bases_callable BIGINT, - mean_mapping_quality DOUBLE PRECISION -); - -# --- !Downs - -drop table public.pangenome_alignment_coverage; -drop table public.pangenome_alignment_metadata; \ No newline at end of file diff --git a/conf/evolutions/metadata/1.sql b/conf/evolutions/metadata/1.sql deleted file mode 100644 index 16d7dfba..00000000 --- a/conf/evolutions/metadata/1.sql +++ /dev/null @@ -1,20 +0,0 @@ -# PDS Registrations schema -# --- !Ups - -CREATE TABLE pds_registrations ( - did TEXT PRIMARY KEY, - pds_url TEXT NOT NULL, - handle TEXT NOT NULL, - last_commit_cid TEXT, - last_commit_seq BIGINT DEFAULT 0, - cursor BIGINT NOT NULL DEFAULT 0, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - -CREATE INDEX pds_registrations_handle_idx ON pds_registrations (handle); -CREATE INDEX pds_registrations_last_commit_cid_idx ON pds_registrations (last_commit_cid); - -# --- !Downs - -DROP TABLE IF EXISTS pds_registrations; \ No newline at end of file diff --git a/conf/evolutions/metadata/2.sql b/conf/evolutions/metadata/2.sql deleted file mode 100644 index 1259aa32..00000000 --- a/conf/evolutions/metadata/2.sql +++ /dev/null @@ -1,13 +0,0 @@ -# --- !Ups -ALTER TABLE pds_registrations ADD COLUMN leased_by_instance_id TEXT NULL; -ALTER TABLE pds_registrations ADD COLUMN lease_expires_at TIMESTAMPTZ NULL; -ALTER TABLE pds_registrations ADD COLUMN processing_status TEXT NOT NULL DEFAULT 'idle'; - -CREATE INDEX pds_registrations_lease_expires_at_idx ON pds_registrations (lease_expires_at); - -# --- !Downs -DROP INDEX pds_registrations_lease_expires_at_idx; - -ALTER TABLE pds_registrations DROP COLUMN processing_status; -ALTER TABLE pds_registrations DROP COLUMN lease_expires_at; -ALTER TABLE pds_registrations DROP COLUMN leased_by_instance_id; diff --git a/conf/evolutions/metadata/3.sql b/conf/evolutions/metadata/3.sql deleted file mode 100644 index 022dce3a..00000000 --- a/conf/evolutions/metadata/3.sql +++ /dev/null @@ -1,62 +0,0 @@ -# PDS fleet management — status tracking, heartbeat, software versions, capabilities - -# --- !Ups - -CREATE TABLE pds_node ( - id SERIAL PRIMARY KEY, - did TEXT NOT NULL UNIQUE, - pds_url TEXT NOT NULL, - handle TEXT, - node_name TEXT, - software_version TEXT, - status TEXT NOT NULL DEFAULT 'UNKNOWN' - CHECK (status IN ('ONLINE', 'OFFLINE', 'BUSY', 'ERROR', 'UNKNOWN')), - capabilities JSONB NOT NULL DEFAULT '{}', - last_heartbeat TIMESTAMPTZ, - last_commit_cid TEXT, - last_commit_rev TEXT, - ip_address TEXT, - os_info TEXT, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - -CREATE INDEX idx_pds_node_status ON pds_node(status); -CREATE INDEX idx_pds_node_last_heartbeat ON pds_node(last_heartbeat); -CREATE INDEX idx_pds_node_software_version ON pds_node(software_version); - -CREATE TABLE pds_heartbeat_log ( - id SERIAL PRIMARY KEY, - pds_node_id INTEGER NOT NULL REFERENCES pds_node(id), - status TEXT NOT NULL - CHECK (status IN ('ONLINE', 'OFFLINE', 'BUSY', 'ERROR', 'UNKNOWN')), - software_version TEXT, - load_metrics JSONB, - processing_queue_size INTEGER DEFAULT 0, - error_message TEXT, - recorded_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - -CREATE INDEX idx_pds_heartbeat_log_node ON pds_heartbeat_log(pds_node_id); -CREATE INDEX idx_pds_heartbeat_log_recorded_at ON pds_heartbeat_log(recorded_at); - -CREATE TABLE pds_fleet_config ( - id SERIAL PRIMARY KEY, - config_key TEXT NOT NULL UNIQUE, - config_value TEXT NOT NULL, - description TEXT, - updated_by TEXT, - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - -INSERT INTO pds_fleet_config (config_key, config_value, description) -VALUES - ('target_software_version', '0.1.0', 'Target PDS software version for fleet'), - ('heartbeat_interval_seconds', '300', 'Expected heartbeat interval in seconds'), - ('offline_threshold_seconds', '900', 'Seconds without heartbeat before marking OFFLINE'); - -# --- !Downs - -DROP TABLE IF EXISTS pds_fleet_config; -DROP TABLE IF EXISTS pds_heartbeat_log; -DROP TABLE IF EXISTS pds_node; diff --git a/conf/evolutions/metadata/4.sql b/conf/evolutions/metadata/4.sql deleted file mode 100644 index b0f23e45..00000000 --- a/conf/evolutions/metadata/4.sql +++ /dev/null @@ -1,36 +0,0 @@ -# Submission provenance — audit which PDS proposed specific variants and haplogroup calls - -# --- !Ups - -CREATE TABLE pds_submission ( - id SERIAL PRIMARY KEY, - pds_node_id INTEGER NOT NULL REFERENCES pds_node(id), - submission_type TEXT NOT NULL - CHECK (submission_type IN ('HAPLOGROUP_CALL', 'VARIANT_CALL', 'BRANCH_PROPOSAL', 'PRIVATE_VARIANT', 'STR_PROFILE')), - biosample_id INTEGER, - biosample_guid UUID, - proposed_value TEXT NOT NULL, - confidence_score DOUBLE PRECISION, - algorithm_version TEXT, - software_version TEXT, - payload JSONB, - status TEXT NOT NULL DEFAULT 'PENDING' - CHECK (status IN ('PENDING', 'ACCEPTED', 'REJECTED', 'SUPERSEDED')), - reviewed_by TEXT, - reviewed_at TIMESTAMPTZ, - review_notes TEXT, - at_uri TEXT, - at_cid TEXT, - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() -); - -CREATE INDEX idx_pds_submission_node ON pds_submission(pds_node_id); -CREATE INDEX idx_pds_submission_type ON pds_submission(submission_type); -CREATE INDEX idx_pds_submission_biosample ON pds_submission(biosample_id); -CREATE INDEX idx_pds_submission_biosample_guid ON pds_submission(biosample_guid); -CREATE INDEX idx_pds_submission_status ON pds_submission(status); -CREATE INDEX idx_pds_submission_created ON pds_submission(created_at); - -# --- !Downs - -DROP TABLE IF EXISTS pds_submission; diff --git a/conf/logback.xml b/conf/logback.xml deleted file mode 100644 index 9dc376c8..00000000 --- a/conf/logback.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - ${LOG_PATH}/application.log - - - - ${LOG_PATH}/application.%d{yyyy-MM-dd}.log.gz - - - 30 - - - 3GB - - - - UTF-8 - %d{yyyy-MM-dd HH:mm:ss} %highlight(%-5level) %cyan(%logger{36}) %magenta(%X{pekkoSource}) %msg%n - - - - - - UTF-8 - %d{yyyy-MM-dd HH:mm:ss} %highlight(%-5level) %cyan(%logger{36}) %magenta(%X{pekkoSource}) %msg%n - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/conf/messages b/conf/messages deleted file mode 100644 index 014a19fe..00000000 --- a/conf/messages +++ /dev/null @@ -1,335 +0,0 @@ -# General -app.name = Decoding Us - -# Navigation -nav.home = Home -nav.about = About -nav.ytree = Y-DNA Tree -nav.mtree = MT-DNA Tree -nav.references = References -nav.tools = Research Tools -nav.coverage = Coverage Benchmarks -nav.contact = Contact -nav.reputation = Reputation System -nav.terms = Terms of Use -nav.privacy = Privacy Policy -nav.cookies = Cookie Usage -nav.faq = FAQ -nav.api = API Documentation -nav.profile = Profile -nav.candidates = Review Candidates -nav.messages = Messages -nav.supportAdmin = Support -nav.logout = Logout -nav.login = Login - -# Footer -footer.copyright = Copyright © {0}, Decoding-Us.com - -# Home Page -home.title = Decoding Us -home.welcome = Welcome to Decoding Us - -# Reputation System -reputation.title = Community Reputation System -reputation.link = Reputation System - -# Legal -legal.cookies.title = Cookie Usage -legal.privacy.title = Privacy Policy -legal.terms.title = Terms of Use -legal.faq.title = FAQ - -# Cookie Consent Banner -cookies.banner.message = We use cookies only for authentication. No tracking, no marketing, no third-party sharing. -cookies.banner.learnMore = Learn more -cookies.banner.accept = Accept - -# Generic -generic.notAvailable = Not Available -generic.submit = Submit -generic.loading = Loading... -generic.show = Show -generic.hide = Hide -generic.cancel = Cancel - -# Pagination -pagination.pageOf = Page {0} of {1} -pagination.previous = Previous -pagination.next = Next -pagination.first = First -pagination.last = Last -pagination.itemsPerPage = Items per page: - -# Biosample Report -biosample.details = Biosample Details -biosample.header.sampleId = Sample ID -biosample.header.accession = Accession Number -biosample.header.origin = Geographic Origin -biosample.header.timePeriod = Time Period -biosample.header.datingPeriod = Dating Period -biosample.header.sex = Sex -biosample.header.yHaplo = Y-DNA Haplogroup -biosample.header.mtHaplo = mt-DNA Haplogroup -biosample.header.population = Population Group -biosample.analysisMethod = Analysis method: {0} - -# Map -map.title = Biosamples Geographic Distribution -map.loading = Loading samples... -map.totalSamplesPrefix = Total Samples: -map.totalSamples = Total Samples: {0} (this key is for informational purposes for now, should not be used in views) - -# Contact -contact.title = Contact Us -contact.name.label = Name -contact.name.placeholder = Your name -contact.name.help = Please enter your full name -contact.email.label = Email -contact.email.placeholder = your.email@example.com -contact.email.help = We'll never share your email with anyone else -contact.subject.label = Subject -contact.subject.placeholder = Subject of your message -contact.message.label = Message -contact.message.placeholder = Your message -contact.send = Send Message -contact.authenticated.info = You're logged in. Your message will be linked to your account and you can track responses in your message history. -contact.viewHistory = View Message History - -# Support - User Messages -support.myMessages.title = My Messages -support.myMessages.heading = My Message History -support.myMessages.empty = You haven't sent any messages yet. -support.newMessage = New Message -support.status.new = New -support.status.read = Read -support.status.replied = Replied -support.status.closed = Closed - -# Support - Admin -support.admin.title = Support Messages -support.admin.heading = Contact Message Management -support.admin.filterStatus = Filter by status: -support.admin.allStatuses = All Statuses -support.admin.noMessages = No messages found. -support.admin.table.status = Status -support.admin.table.from = From -support.admin.table.subject = Subject -support.admin.table.date = Date -support.admin.view = View -support.admin.authenticated = User -support.admin.registeredUser = Registered User -support.admin.anonymous = Anonymous -support.admin.messageDetail.title = Message Details -support.admin.backToList = Back to Messages -support.admin.receivedAt = Received -support.admin.replies = Replies -support.admin.repliedBy = Replied by -support.admin.emailSent = Email sent -support.admin.sendReply = Send Reply -support.admin.replyPlaceholder = Type your reply here... -support.admin.sendEmailCopy = Send email copy to -support.admin.noEmailForAuth = This user is logged in. Replies will appear in their message history. -support.admin.submitReply = Send Reply -support.admin.senderInfo = Sender Information -support.admin.type = Type -support.admin.handle = Handle -support.admin.userId = User ID -support.admin.name = Name -support.admin.email = Email -support.admin.actions = Actions -support.admin.markClosed = Mark as Closed -support.admin.reopen = Reopen - -# Errors -error.forbidden.title = Access Denied -error.forbidden.heading = Access Denied -error.notFound.title = Not Found -error.notFound.heading = Page Not Found - -# Coverage -coverage.subtitle = Aggregated coverage statistics grouped by laboratory, test type, and contig. -coverage.selectLab = Select Sequencing Laboratory: -coverage.selectLab.default = -- Select a lab -- -coverage.selectLab.prompt = Please select a laboratory to view benchmark data. -coverage.metrics.title = Explanation of Metrics -coverage.whyMatters = Why This Matters -coverage.technicalDetails = Technical Details - -# Benchmarks -benchmark.header.testType = Test Type -benchmark.header.contig = Contig -benchmark.header.meanReadLen = Mean Read Length -benchmark.header.readLenRange = Read Length Range -benchmark.header.meanInsertSize = Mean Insert Size -benchmark.header.insertSizeRange = Insert Size Range -benchmark.header.meanDepth = Mean Depth -benchmark.header.noCoverage = Bases No Coverage -benchmark.header.lowQuality = Low Quality Mapping -benchmark.header.callable = Callable Bases -benchmark.header.meanMappingQuality = Mean Mapping Quality -benchmark.header.samples = Samples -benchmark.noData = No benchmark data available for this laboratory. - -# Tree -tree.legend.established = Established haplogroup -tree.legend.updated = Updated in the last year -tree.noData = No tree data available -tree.loading = Loading haplogroup tree... -tree.jumpTo = Jump to haplogroup: -tree.go = Go -tree.reRoot = Click to re-root -tree.verticalCladogram = Vertical Cladogram -tree.clickToSeeVariants = Click to see {0} defining Variants(s) - -# Sidebar -sidebar.title = Defining Variant(s) for {0} -sidebar.noVariants = No defining Variants found for {0}. -sidebar.type = Type: {0} -sidebar.refLocation = Reference Location(s): - -# References Page -references.title = Publications -references.viewMap = View Map -references.search.placeholder = Search by title, author, DOI... -references.search.help = Search publications by title, author name, journal, or DOI - -# Publication -publication.authors = Authors: -publication.authors.etAl = (See paper for full author list) -publication.journal = Journal: -publication.published = Published: -publication.abstract = Abstract -publication.additionalDetails = Additional Details -publication.classification = Classification: -publication.access = Publication Access: -publication.impact = Impact Metrics: -publication.cited = Cited: {0} -publication.percentile = Percentile: {0}% -publication.genomicStudies = Genomic Studies -publication.sampleCount = Sample Count: {0} -publication.showSamples = Show Samples -publication.hideSamples = Hide Samples -publication.directLink = Direct Link -publication.source.ena = European Nucleotide Archive -publication.source.ncbi = NCBI BioProject -publication.list.notFound = No publications found matching "{0}". Try a different search term. -publication.list.empty = No publications available. -publication.list.total = {0} publications -publication.submit.title = Submit Publication -publication.submit.doi.label = DOI -publication.submit.doi.placeholder = Enter DOI or DOI URL (e.g., 10.1234/example or https://doi.org/10.1234/example) -publication.submit.doi.help = Enter either a DOI (10.1234/example) or the full DOI URL (https://doi.org/10.1234/example) -publication.submit.ena.label = Sequence Archive Study Accession (Optional) -publication.submit.ena.placeholder = Enter sequence archive study accession if available (e.g., PRJEB12345, PRJNA123456) -publication.submit.ena.help = If the paper mentions a sequence archive study (e.g., from ENA or SRA), enter its accession here -publication.submit.forceRefresh = Force refresh (Update even if publication already exists) - -# Publication Candidates (New) -publicationCandidates.title = Publication Candidates Review -publicationCandidates.heading = Publication Candidates for Review -publicationCandidates.noCandidates = No pending publication candidates found. -publicationCandidates.table.title = Title -publicationCandidates.table.journal = Journal -publicationCandidates.table.date = Publication Date -publicationCandidates.table.relevance = Relevance Score -publicationCandidates.table.status = Status -publicationCandidates.table.actions = Actions -publicationCandidates.action.accept = Accept -publicationCandidates.action.reject = Reject -publicationCandidates.action.reviewed = Reviewed -publicationCandidates.confirmAccept = Are you sure you want to accept this candidate and import it as a new publication? -publicationCandidates.confirmReject = Are you sure you want to reject this candidate? -publicationCandidates.acceptSuccess = Candidate '{0}' accepted and imported successfully. -publicationCandidates.acceptFailed = Failed to accept candidate or import publication. -publicationCandidates.acceptError = Error accepting candidate: {0} -publicationCandidates.rejectSuccess = Candidate rejected successfully. -publicationCandidates.rejectFailed = Failed to reject candidate. -publicationCandidates.rejectError = Error rejecting candidate: {0} -publicationCandidates.promptRejectReason = Please enter a reason for rejecting this candidate. -publicationCandidates.emptyRejectReasonAlert = Rejection reason cannot be empty. - -# Auth -auth.login.title = Login -auth.login.heading = Sign In -auth.login.federation.title = Federated Login -auth.login.federation.description = Sign in with your AT Protocol identity from any compatible provider (Bluesky, self-hosted PDS, or future DecodingUs accounts). -auth.login.handle = Handle or DID -auth.login.handle.placeholder = alice.bsky.social or did:plc:... -auth.login.handle.help = Enter your full handle (e.g., alice.bsky.social) or DID. We'll find your identity provider automatically. -auth.login.password = App Password -auth.login.passwordHelp = Use an App Password from your identity provider. Learn more about App Passwords. -auth.login.submit = Sign In - -# Auth - App Password Help -auth.appPasswordHelp.title = What is an App Password? -auth.appPasswordHelp.heading = What is an App Password? -auth.appPasswordHelp.p1 = An App Password is a unique, one-time password generated by your AT Protocol identity provider (such as Bluesky, a self-hosted PDS, or other compatible services) specifically for third-party applications like this one. It allows you to grant access to an application without sharing your main account password. -auth.appPasswordHelp.p2 = Using App Passwords enhances your security by: -auth.appPasswordHelp.li1 = Allowing you to revoke access for a single application without changing your main password. -auth.appPasswordHelp.li2 = Limiting the permissions an application might have (though for this app, it grants full session access). -auth.appPasswordHelp.generateSteps = How to Generate an App Password: -auth.appPasswordHelp.step1 = Open your AT Protocol client or identity provider's settings (e.g., bsky.app for Bluesky, or your self-hosted PDS admin panel). -auth.appPasswordHelp.step2 = Navigate to Settings > App Passwords (or Privacy & Security > App Passwords). -auth.appPasswordHelp.step3 = Click on Add App Password. -auth.appPasswordHelp.step4 = Give it a descriptive name (e.g., "DecodingUs App"). -auth.appPasswordHelp.step5 = Copy the generated password string. This is the only time you will see it. -auth.appPasswordHelp.note = This generated password is what you'll use in the Login form. Treat it like a regular password. - -# Profile -profile.title = User Profile -profile.heading = Your Profile -profile.accountDetails = Account Details -profile.handle = Handle -profile.did = DID -profile.editProfile = Edit Profile -profile.displayName = Display Name -profile.displayNameHelp = This name will be visible to other users in the messaging system. -profile.save = Save Profile - -# Variant Browser -nav.variants = Variant Browser -variants.browser.title = Variant Browser -variants.browser.heading = Variant Browser -variants.browser.description = Search the Y-DNA and mtDNA variant database. Find SNPs by name, rsID, or alias. -variants.browser.variants = Variants -variants.browser.searchPlaceholder = Search by rsID, SNP name, or alias (e.g., M269, rs9786076)... -variants.browser.searchHelp = Enter at least 2 characters to search. Searches variant names, rsIDs, and aliases. -variants.browser.selectVariant = Select a variant to view details -variants.browser.foundMatching = Found {0} variant groups matching "{1}" -variants.browser.showingTotal = Showing {0} total variant groups -variants.browser.pageOf = Page {0} of {1} -variants.browser.noResults = No variants found matching "{0}". -variants.browser.enterSearch = Enter a search term to find variants. -variants.browser.col.name = Name / rsID -variants.browser.col.alleles = Anc/Der -variants.browser.col.type = Type -variants.browser.col.builds = Builds -variants.browser.strandDiffers = Strand differs between builds - -# Variant Detail Panel -variants.detail.rsId = rsID -variants.detail.commonName = Common Name -variants.detail.ancestral = Ancestral -variants.detail.derived = Derived -variants.detail.type = Type -variants.detail.status = Naming Status -variants.detail.altNames = Alternative Names -variants.detail.source = Source -variants.detail.refBuilds = Reference Builds -variants.detail.strandDiff = Strand difference (reverse complement) -variants.detail.build = Build -variants.detail.position = Position -variants.detail.alleles = Alleles -variants.detail.motifRepeats = Motif / Repeats -variants.detail.usedBy = Used By Haplogroups -variants.detail.noHaplogroups = This variant is not associated with any haplogroups. -variants.detail.more = more -variants.detail.aliasType.snpNames = SNP Names -variants.detail.aliasType.dbsnp = dbSNP IDs - -# Language -lang.switch = Language -lang.en = English -lang.fr = Français -lang.es = Español diff --git a/conf/messages.es b/conf/messages.es deleted file mode 100644 index bde14d94..00000000 --- a/conf/messages.es +++ /dev/null @@ -1,335 +0,0 @@ -# General -app.name = Decoding Us - -# Navigation -nav.home = Inicio -nav.about = Acerca de -nav.ytree = Árbol Y-ADN -nav.mtree = Árbol ADN-mt -nav.references = Referencias -nav.tools = Herramientas de investigación -nav.coverage = Benchmarks de cobertura -nav.contact = Contacto -nav.reputation = Sistema de reputación -nav.terms = Condiciones de uso -nav.privacy = Política de privacidad -nav.cookies = Uso de cookies -nav.faq = Preguntas frecuentes -nav.api = Documentación de la API -nav.profile = Perfil -nav.candidates = Revisar candidatos -nav.messages = Mensajes -nav.supportAdmin = Soporte -nav.logout = Cerrar sesión -nav.login = Iniciar sesión - -# Footer -footer.copyright = Copyright © {0}, Decoding-Us.com - -# Home Page -home.title = Decoding Us -home.welcome = Bienvenido a Decoding Us - -# Reputation System -reputation.title = Sistema de reputación comunitaria -reputation.link = Sistema de reputación - -# Legal -legal.cookies.title = Uso de cookies -legal.privacy.title = Política de privacidad -legal.terms.title = Condiciones de uso -legal.faq.title = Preguntas frecuentes - -# Cookie Consent Banner -cookies.banner.message = Usamos cookies únicamente para la autenticación. Sin rastreo, sin marketing, sin compartir con terceros. -cookies.banner.learnMore = Más información -cookies.banner.accept = Aceptar - -# Generic -generic.notAvailable = No disponible -generic.submit = Enviar -generic.loading = Cargando... -generic.show = Mostrar -generic.hide = Ocultar -generic.cancel = Cancelar - -# Pagination -pagination.pageOf = Página {0} de {1} -pagination.previous = Anterior -pagination.next = Siguiente -pagination.first = Primero -pagination.last = Último -pagination.itemsPerPage = Elementos por página: - -# Biosample Report -biosample.details = Detalles de la muestra -biosample.header.sampleId = Identificador de la muestra -biosample.header.accession = Número de acceso -biosample.header.origin = Origen geográfico -biosample.header.timePeriod = Período -biosample.header.datingPeriod = Período de datación -biosample.header.sex = Sexo -biosample.header.yHaplo = Haplogrupo Y-ADN -biosample.header.mtHaplo = Haplogrupo ADN-mt -biosample.header.population = Grupo de población -biosample.analysisMethod = Método de análisis: {0} - -# Map -map.title = Distribución geográfica de las muestras -map.loading = Cargando muestras... -map.totalSamplesPrefix = Total de muestras: -map.totalSamples = Total de muestras: {0} - -# Contact -contact.title = Contáctenos -contact.name.label = Nombre -contact.name.placeholder = Su nombre -contact.name.help = Por favor, introduzca su nombre completo -contact.email.label = Correo electrónico -contact.email.placeholder = su.correo@ejemplo.com -contact.email.help = Nunca compartiremos su correo electrónico con nadie -contact.subject.label = Asunto -contact.subject.placeholder = Asunto de su mensaje -contact.message.label = Mensaje -contact.message.placeholder = Su mensaje -contact.send = Enviar mensaje -contact.authenticated.info = Ha iniciado sesión. Su mensaje se vinculará a su cuenta y podrá seguir las respuestas en su historial. -contact.viewHistory = Ver historial de mensajes - -# Support - User Messages -support.myMessages.title = Mis mensajes -support.myMessages.heading = Historial de mis mensajes -support.myMessages.empty = Aún no ha enviado ningún mensaje. -support.newMessage = Nuevo mensaje -support.status.new = Nuevo -support.status.read = Leído -support.status.replied = Respondido -support.status.closed = Cerrado - -# Support - Admin -support.admin.title = Mensajes de soporte -support.admin.heading = Gestión de mensajes de contacto -support.admin.filterStatus = Filtrar por estado: -support.admin.allStatuses = Todos los estados -support.admin.noMessages = No se encontraron mensajes. -support.admin.table.status = Estado -support.admin.table.from = De -support.admin.table.subject = Asunto -support.admin.table.date = Fecha -support.admin.view = Ver -support.admin.authenticated = Usuario -support.admin.registeredUser = Usuario registrado -support.admin.anonymous = Anónimo -support.admin.messageDetail.title = Detalles del mensaje -support.admin.backToList = Volver a los mensajes -support.admin.receivedAt = Recibido -support.admin.replies = Respuestas -support.admin.repliedBy = Respondido por -support.admin.emailSent = Correo enviado -support.admin.sendReply = Enviar respuesta -support.admin.replyPlaceholder = Escriba su respuesta aquí... -support.admin.sendEmailCopy = Enviar copia por correo a -support.admin.noEmailForAuth = Este usuario ha iniciado sesión. Las respuestas aparecerán en su historial de mensajes. -support.admin.submitReply = Enviar respuesta -support.admin.senderInfo = Información del remitente -support.admin.type = Tipo -support.admin.handle = Identificador -support.admin.userId = ID de usuario -support.admin.name = Nombre -support.admin.email = Correo electrónico -support.admin.actions = Acciones -support.admin.markClosed = Marcar como cerrado -support.admin.reopen = Reabrir - -# Errors -error.forbidden.title = Acceso denegado -error.forbidden.heading = Acceso denegado -error.notFound.title = No encontrado -error.notFound.heading = Página no encontrada - -# Coverage -coverage.subtitle = Estadísticas de cobertura agregadas por laboratorio, tipo de prueba y contig. -coverage.selectLab = Seleccionar laboratorio de secuenciación: -coverage.selectLab.default = -- Seleccionar un laboratorio -- -coverage.selectLab.prompt = Por favor, seleccione un laboratorio para ver los datos de benchmark. -coverage.metrics.title = Explicación de las métricas -coverage.whyMatters = Por qué es importante -coverage.technicalDetails = Detalles técnicos - -# Benchmarks -benchmark.header.testType = Tipo de prueba -benchmark.header.contig = Contig -benchmark.header.meanReadLen = Longitud media de lectura -benchmark.header.readLenRange = Rango de longitud de lectura -benchmark.header.meanInsertSize = Tamaño medio de inserción -benchmark.header.insertSizeRange = Rango de tamaño de inserción -benchmark.header.meanDepth = Profundidad media -benchmark.header.noCoverage = Bases sin cobertura -benchmark.header.lowQuality = Mapeo de baja calidad -benchmark.header.callable = Bases utilizables -benchmark.header.meanMappingQuality = Calidad media de mapeo -benchmark.header.samples = Muestras -benchmark.noData = No hay datos de benchmark disponibles para este laboratorio. - -# Tree -tree.legend.established = Haplogrupo establecido -tree.legend.updated = Actualizado en el último año -tree.noData = No hay datos del árbol disponibles -tree.loading = Cargando árbol de haplogrupos... -tree.jumpTo = Ir al haplogrupo: -tree.go = Ir -tree.reRoot = Clic para cambiar la raíz -tree.verticalCladogram = Cladograma vertical -tree.clickToSeeVariants = Clic para ver {0} variante(s) definidora(s) - -# Sidebar -sidebar.title = Variante(s) definidora(s) para {0} -sidebar.noVariants = No se encontraron variantes definidoras para {0}. -sidebar.type = Tipo: {0} -sidebar.refLocation = Ubicación(es) de referencia: - -# References Page -references.title = Publicaciones -references.viewMap = Ver mapa -references.search.placeholder = Buscar por título, autor, DOI... -references.search.help = Buscar publicaciones por título, nombre de autor, revista o DOI - -# Publication -publication.authors = Autores: -publication.authors.etAl = (Ver el artículo para la lista completa de autores) -publication.journal = Revista: -publication.published = Publicado: -publication.abstract = Resumen -publication.additionalDetails = Detalles adicionales -publication.classification = Clasificación: -publication.access = Acceso a la publicación: -publication.impact = Métricas de impacto: -publication.cited = Citado: {0} -publication.percentile = Percentil: {0}% -publication.genomicStudies = Estudios genómicos -publication.sampleCount = Cantidad de muestras: {0} -publication.showSamples = Mostrar muestras -publication.hideSamples = Ocultar muestras -publication.directLink = Enlace directo -publication.source.ena = Archivo Europeo de Nucleótidos -publication.source.ncbi = BioProyecto NCBI -publication.list.notFound = No se encontraron publicaciones que coincidan con «{0}». Intente con otro término de búsqueda. -publication.list.empty = No hay publicaciones disponibles. -publication.list.total = {0} publicaciones -publication.submit.title = Enviar publicación -publication.submit.doi.label = DOI -publication.submit.doi.placeholder = Ingrese el DOI o la URL del DOI (ej.: 10.1234/ejemplo o https://doi.org/10.1234/ejemplo) -publication.submit.doi.help = Ingrese un DOI (10.1234/ejemplo) o la URL completa del DOI (https://doi.org/10.1234/ejemplo) -publication.submit.ena.label = Acceso de estudio de archivo de secuencias (opcional) -publication.submit.ena.placeholder = Ingrese el acceso de estudio de archivo si está disponible (ej.: PRJEB12345, PRJNA123456) -publication.submit.ena.help = Si el artículo menciona un estudio de archivo de secuencias (ej.: ENA o SRA), ingrese su acceso aquí -publication.submit.forceRefresh = Forzar actualización (actualizar aunque la publicación ya exista) - -# Publication Candidates -publicationCandidates.title = Revisión de publicaciones candidatas -publicationCandidates.heading = Publicaciones candidatas para revisar -publicationCandidates.noCandidates = No se encontraron publicaciones candidatas pendientes. -publicationCandidates.table.title = Título -publicationCandidates.table.journal = Revista -publicationCandidates.table.date = Fecha de publicación -publicationCandidates.table.relevance = Puntuación de relevancia -publicationCandidates.table.status = Estado -publicationCandidates.table.actions = Acciones -publicationCandidates.action.accept = Aceptar -publicationCandidates.action.reject = Rechazar -publicationCandidates.action.reviewed = Revisado -publicationCandidates.confirmAccept = ¿Está seguro de que desea aceptar este candidato e importarlo como nueva publicación? -publicationCandidates.confirmReject = ¿Está seguro de que desea rechazar este candidato? -publicationCandidates.acceptSuccess = Candidato «{0}» aceptado e importado con éxito. -publicationCandidates.acceptFailed = Error al aceptar el candidato o importar la publicación. -publicationCandidates.acceptError = Error al aceptar el candidato: {0} -publicationCandidates.rejectSuccess = Candidato rechazado con éxito. -publicationCandidates.rejectFailed = Error al rechazar el candidato. -publicationCandidates.rejectError = Error al rechazar el candidato: {0} -publicationCandidates.promptRejectReason = Por favor, ingrese una razón para rechazar este candidato. -publicationCandidates.emptyRejectReasonAlert = La razón del rechazo no puede estar vacía. - -# Auth -auth.login.title = Iniciar sesión -auth.login.heading = Iniciar sesión -auth.login.federation.title = Inicio de sesión federado -auth.login.federation.description = Inicie sesión con su identidad AT Protocol desde cualquier proveedor compatible (Bluesky, PDS auto-alojado o futuras cuentas DecodingUs). -auth.login.handle = Identificador o DID -auth.login.handle.placeholder = alice.bsky.social o did:plc:... -auth.login.handle.help = Ingrese su identificador completo (ej.: alice.bsky.social) o DID. Encontraremos su proveedor de identidad automáticamente. -auth.login.password = Contraseña de aplicación -auth.login.passwordHelp = Use una contraseña de aplicación de su proveedor de identidad. Más información sobre las contraseñas de aplicación. -auth.login.submit = Iniciar sesión - -# Auth - App Password Help -auth.appPasswordHelp.title = ¿Qué es una contraseña de aplicación? -auth.appPasswordHelp.heading = ¿Qué es una contraseña de aplicación? -auth.appPasswordHelp.p1 = Una contraseña de aplicación es una contraseña única generada por su proveedor de identidad AT Protocol (como Bluesky, un PDS auto-alojado u otros servicios compatibles) específicamente para aplicaciones de terceros como esta. Le permite otorgar acceso a una aplicación sin compartir su contraseña principal. -auth.appPasswordHelp.p2 = El uso de contraseñas de aplicación mejora su seguridad al: -auth.appPasswordHelp.li1 = Permitirle revocar el acceso de una sola aplicación sin cambiar su contraseña principal. -auth.appPasswordHelp.li2 = Limitar los permisos que una aplicación puede tener (aunque para esta aplicación, otorga acceso completo a la sesión). -auth.appPasswordHelp.generateSteps = Cómo generar una contraseña de aplicación: -auth.appPasswordHelp.step1 = Abra la configuración de su cliente AT Protocol o proveedor de identidad (ej.: bsky.app para Bluesky, o el panel de administración de su PDS auto-alojado). -auth.appPasswordHelp.step2 = Navegue a Configuración > Contraseñas de aplicación (o Privacidad y seguridad > Contraseñas de aplicación). -auth.appPasswordHelp.step3 = Haga clic en Agregar contraseña de aplicación. -auth.appPasswordHelp.step4 = Asígnele un nombre descriptivo (ej.: "App DecodingUs"). -auth.appPasswordHelp.step5 = Copie la contraseña generada. Esta es la única vez que la verá. -auth.appPasswordHelp.note = Esta contraseña generada es la que usará en el formulario de inicio de sesión. Trátela como una contraseña normal. - -# Profile -profile.title = Perfil de usuario -profile.heading = Su perfil -profile.accountDetails = Detalles de la cuenta -profile.handle = Identificador -profile.did = DID -profile.editProfile = Editar perfil -profile.displayName = Nombre para mostrar -profile.displayNameHelp = Este nombre será visible para otros usuarios en el sistema de mensajería. -profile.save = Guardar perfil - -# Variant Browser -nav.variants = Navegador de variantes -variants.browser.title = Navegador de variantes -variants.browser.heading = Navegador de variantes -variants.browser.description = Busque en la base de datos de variantes de Y-ADN y ADN-mt. Encuentre SNP por nombre, rsID o alias. -variants.browser.variants = Variantes -variants.browser.searchPlaceholder = Buscar por rsID, nombre de SNP o alias (ej.: M269, rs9786076)... -variants.browser.searchHelp = Ingrese al menos 2 caracteres para buscar. Busca en nombres de variantes, rsID y alias. -variants.browser.selectVariant = Seleccione una variante para ver los detalles -variants.browser.foundMatching = Se encontraron {0} grupos de variantes para «{1}» -variants.browser.showingTotal = Mostrando {0} grupos de variantes en total -variants.browser.pageOf = Página {0} de {1} -variants.browser.noResults = No se encontraron variantes para «{0}». -variants.browser.enterSearch = Ingrese un término de búsqueda para encontrar variantes. -variants.browser.col.name = Nombre / rsID -variants.browser.col.alleles = Anc/Der -variants.browser.col.type = Tipo -variants.browser.col.builds = Builds -variants.browser.strandDiffers = El hebra difiere entre builds - -# Variant Detail Panel -variants.detail.rsId = rsID -variants.detail.commonName = Nombre común -variants.detail.ancestral = Ancestral -variants.detail.derived = Derivado -variants.detail.type = Tipo -variants.detail.status = Estado de denominación -variants.detail.altNames = Nombres alternativos -variants.detail.source = Fuente -variants.detail.refBuilds = Builds de referencia -variants.detail.strandDiff = Diferencia de hebra (complemento inverso) -variants.detail.build = Build -variants.detail.position = Posición -variants.detail.alleles = Alelos -variants.detail.motifRepeats = Motivo / Repeticiones -variants.detail.usedBy = Usado por haplogrupos -variants.detail.noHaplogroups = Esta variante no está asociada con ningún haplogrupo. -variants.detail.more = más -variants.detail.aliasType.snpNames = Nombres de SNP -variants.detail.aliasType.dbsnp = Identificadores dbSNP - -# Language -lang.switch = Idioma -lang.en = English -lang.fr = Français -lang.es = Español diff --git a/conf/messages.fr b/conf/messages.fr deleted file mode 100644 index 850b0026..00000000 --- a/conf/messages.fr +++ /dev/null @@ -1,335 +0,0 @@ -# General -app.name = Decoding Us - -# Navigation -nav.home = Accueil -nav.about = À propos -nav.ytree = Arbre Y-ADN -nav.mtree = Arbre ADN-mt -nav.references = Références -nav.tools = Outils de recherche -nav.coverage = Benchmarks de couverture -nav.contact = Contact -nav.reputation = Système de réputation -nav.terms = Conditions d''utilisation -nav.privacy = Politique de confidentialité -nav.cookies = Utilisation des cookies -nav.faq = FAQ -nav.api = Documentation de l''API -nav.profile = Profil -nav.candidates = Examiner les candidats -nav.messages = Messages -nav.supportAdmin = Support -nav.logout = Déconnexion -nav.login = Connexion - -# Footer -footer.copyright = Copyright © {0}, Decoding-Us.com - -# Home Page -home.title = Decoding Us -home.welcome = Bienvenue sur Decoding Us - -# Reputation System -reputation.title = Système de réputation communautaire -reputation.link = Système de réputation - -# Legal -legal.cookies.title = Utilisation des cookies -legal.privacy.title = Politique de confidentialité -legal.terms.title = Conditions d''utilisation -legal.faq.title = FAQ - -# Cookie Consent Banner -cookies.banner.message = Nous utilisons les cookies uniquement pour l''authentification. Pas de suivi, pas de marketing, pas de partage avec des tiers. -cookies.banner.learnMore = En savoir plus -cookies.banner.accept = Accepter - -# Generic -generic.notAvailable = Non disponible -generic.submit = Envoyer -generic.loading = Chargement... -generic.show = Afficher -generic.hide = Masquer -generic.cancel = Annuler - -# Pagination -pagination.pageOf = Page {0} sur {1} -pagination.previous = Précédent -pagination.next = Suivant -pagination.first = Premier -pagination.last = Dernier -pagination.itemsPerPage = Éléments par page : - -# Biosample Report -biosample.details = Détails de l''échantillon -biosample.header.sampleId = Identifiant de l''échantillon -biosample.header.accession = Numéro d''accession -biosample.header.origin = Origine géographique -biosample.header.timePeriod = Période -biosample.header.datingPeriod = Période de datation -biosample.header.sex = Sexe -biosample.header.yHaplo = Haplogroupe Y-ADN -biosample.header.mtHaplo = Haplogroupe ADN-mt -biosample.header.population = Groupe de population -biosample.analysisMethod = Méthode d''analyse : {0} - -# Map -map.title = Distribution géographique des échantillons -map.loading = Chargement des échantillons... -map.totalSamplesPrefix = Total des échantillons : -map.totalSamples = Total des échantillons : {0} - -# Contact -contact.title = Nous contacter -contact.name.label = Nom -contact.name.placeholder = Votre nom -contact.name.help = Veuillez entrer votre nom complet -contact.email.label = Courriel -contact.email.placeholder = votre.courriel@exemple.com -contact.email.help = Nous ne partagerons jamais votre courriel avec qui que ce soit -contact.subject.label = Objet -contact.subject.placeholder = Objet de votre message -contact.message.label = Message -contact.message.placeholder = Votre message -contact.send = Envoyer le message -contact.authenticated.info = Vous êtes connecté(e). Votre message sera lié à votre compte et vous pourrez suivre les réponses dans votre historique. -contact.viewHistory = Voir l''historique des messages - -# Support - User Messages -support.myMessages.title = Mes messages -support.myMessages.heading = Historique de mes messages -support.myMessages.empty = Vous n''avez pas encore envoyé de messages. -support.newMessage = Nouveau message -support.status.new = Nouveau -support.status.read = Lu -support.status.replied = Répondu -support.status.closed = Fermé - -# Support - Admin -support.admin.title = Messages de support -support.admin.heading = Gestion des messages de contact -support.admin.filterStatus = Filtrer par statut : -support.admin.allStatuses = Tous les statuts -support.admin.noMessages = Aucun message trouvé. -support.admin.table.status = Statut -support.admin.table.from = De -support.admin.table.subject = Objet -support.admin.table.date = Date -support.admin.view = Voir -support.admin.authenticated = Utilisateur -support.admin.registeredUser = Utilisateur enregistré -support.admin.anonymous = Anonyme -support.admin.messageDetail.title = Détails du message -support.admin.backToList = Retour aux messages -support.admin.receivedAt = Reçu le -support.admin.replies = Réponses -support.admin.repliedBy = Répondu par -support.admin.emailSent = Courriel envoyé -support.admin.sendReply = Envoyer une réponse -support.admin.replyPlaceholder = Tapez votre réponse ici... -support.admin.sendEmailCopy = Envoyer une copie par courriel à -support.admin.noEmailForAuth = Cet utilisateur est connecté. Les réponses apparaîtront dans son historique de messages. -support.admin.submitReply = Envoyer la réponse -support.admin.senderInfo = Informations sur l''expéditeur -support.admin.type = Type -support.admin.handle = Identifiant -support.admin.userId = ID utilisateur -support.admin.name = Nom -support.admin.email = Courriel -support.admin.actions = Actions -support.admin.markClosed = Marquer comme fermé -support.admin.reopen = Rouvrir - -# Errors -error.forbidden.title = Accès refusé -error.forbidden.heading = Accès refusé -error.notFound.title = Non trouvé -error.notFound.heading = Page non trouvée - -# Coverage -coverage.subtitle = Statistiques de couverture agrégées par laboratoire, type de test et contig. -coverage.selectLab = Sélectionner un laboratoire de séquençage : -coverage.selectLab.default = -- Sélectionner un laboratoire -- -coverage.selectLab.prompt = Veuillez sélectionner un laboratoire pour consulter les données de benchmark. -coverage.metrics.title = Explication des métriques -coverage.whyMatters = Pourquoi c''est important -coverage.technicalDetails = Détails techniques - -# Benchmarks -benchmark.header.testType = Type de test -benchmark.header.contig = Contig -benchmark.header.meanReadLen = Longueur moyenne des lectures -benchmark.header.readLenRange = Plage de longueur des lectures -benchmark.header.meanInsertSize = Taille moyenne d''insertion -benchmark.header.insertSizeRange = Plage de taille d''insertion -benchmark.header.meanDepth = Profondeur moyenne -benchmark.header.noCoverage = Bases sans couverture -benchmark.header.lowQuality = Mapping de faible qualité -benchmark.header.callable = Bases appelables -benchmark.header.meanMappingQuality = Qualité moyenne de mapping -benchmark.header.samples = Échantillons -benchmark.noData = Aucune donnée de benchmark disponible pour ce laboratoire. - -# Tree -tree.legend.established = Haplogroupe établi -tree.legend.updated = Mis à jour au cours de la dernière année -tree.noData = Aucune donnée d''arbre disponible -tree.loading = Chargement de l''arbre des haplogroupes... -tree.jumpTo = Aller à l''haplogroupe : -tree.go = Aller -tree.reRoot = Cliquer pour changer la racine -tree.verticalCladogram = Cladogramme vertical -tree.clickToSeeVariants = Cliquer pour voir {0} variant(s) définissant(s) - -# Sidebar -sidebar.title = Variant(s) définissant(s) pour {0} -sidebar.noVariants = Aucun variant définissant trouvé pour {0}. -sidebar.type = Type : {0} -sidebar.refLocation = Emplacement(s) de référence : - -# References Page -references.title = Publications -references.viewMap = Voir la carte -references.search.placeholder = Rechercher par titre, auteur, DOI... -references.search.help = Rechercher des publications par titre, nom d''auteur, revue ou DOI - -# Publication -publication.authors = Auteurs : -publication.authors.etAl = (Voir l''article pour la liste complète des auteurs) -publication.journal = Revue : -publication.published = Publié : -publication.abstract = Résumé -publication.additionalDetails = Détails supplémentaires -publication.classification = Classification : -publication.access = Accès à la publication : -publication.impact = Métriques d''impact : -publication.cited = Cité : {0} -publication.percentile = Percentile : {0}% -publication.genomicStudies = Études génomiques -publication.sampleCount = Nombre d''échantillons : {0} -publication.showSamples = Afficher les échantillons -publication.hideSamples = Masquer les échantillons -publication.directLink = Lien direct -publication.source.ena = Archive européenne des nucléotides -publication.source.ncbi = BioProject NCBI -publication.list.notFound = Aucune publication trouvée correspondant à « {0} ». Essayez un autre terme de recherche. -publication.list.empty = Aucune publication disponible. -publication.list.total = {0} publications -publication.submit.title = Soumettre une publication -publication.submit.doi.label = DOI -publication.submit.doi.placeholder = Entrez le DOI ou l''URL du DOI (ex. : 10.1234/exemple ou https://doi.org/10.1234/exemple) -publication.submit.doi.help = Entrez un DOI (10.1234/exemple) ou l''URL complète du DOI (https://doi.org/10.1234/exemple) -publication.submit.ena.label = Accession d''étude d''archive de séquences (optionnel) -publication.submit.ena.placeholder = Entrez l''accession d''étude d''archive si disponible (ex. : PRJEB12345, PRJNA123456) -publication.submit.ena.help = Si l''article mentionne une étude d''archive de séquences (ex. : ENA ou SRA), entrez son accession ici -publication.submit.forceRefresh = Forcer l''actualisation (mettre à jour même si la publication existe déjà) - -# Publication Candidates -publicationCandidates.title = Revue des publications candidates -publicationCandidates.heading = Publications candidates à examiner -publicationCandidates.noCandidates = Aucune publication candidate en attente. -publicationCandidates.table.title = Titre -publicationCandidates.table.journal = Revue -publicationCandidates.table.date = Date de publication -publicationCandidates.table.relevance = Score de pertinence -publicationCandidates.table.status = Statut -publicationCandidates.table.actions = Actions -publicationCandidates.action.accept = Accepter -publicationCandidates.action.reject = Rejeter -publicationCandidates.action.reviewed = Examiné -publicationCandidates.confirmAccept = Êtes-vous sûr(e) de vouloir accepter ce candidat et l''importer comme nouvelle publication ? -publicationCandidates.confirmReject = Êtes-vous sûr(e) de vouloir rejeter ce candidat ? -publicationCandidates.acceptSuccess = Candidat « {0} » accepté et importé avec succès. -publicationCandidates.acceptFailed = Échec de l''acceptation du candidat ou de l''importation de la publication. -publicationCandidates.acceptError = Erreur lors de l''acceptation du candidat : {0} -publicationCandidates.rejectSuccess = Candidat rejeté avec succès. -publicationCandidates.rejectFailed = Échec du rejet du candidat. -publicationCandidates.rejectError = Erreur lors du rejet du candidat : {0} -publicationCandidates.promptRejectReason = Veuillez entrer une raison pour rejeter ce candidat. -publicationCandidates.emptyRejectReasonAlert = La raison du rejet ne peut pas être vide. - -# Auth -auth.login.title = Connexion -auth.login.heading = Connexion -auth.login.federation.title = Connexion fédérée -auth.login.federation.description = Connectez-vous avec votre identité AT Protocol depuis n''importe quel fournisseur compatible (Bluesky, PDS auto-hébergé ou futurs comptes DecodingUs). -auth.login.handle = Identifiant ou DID -auth.login.handle.placeholder = alice.bsky.social ou did:plc:... -auth.login.handle.help = Entrez votre identifiant complet (ex. : alice.bsky.social) ou DID. Nous trouverons votre fournisseur d''identité automatiquement. -auth.login.password = Mot de passe d''application -auth.login.passwordHelp = Utilisez un mot de passe d''application de votre fournisseur d''identité. En savoir plus sur les mots de passe d''application. -auth.login.submit = Se connecter - -# Auth - App Password Help -auth.appPasswordHelp.title = Qu''est-ce qu''un mot de passe d''application ? -auth.appPasswordHelp.heading = Qu''est-ce qu''un mot de passe d''application ? -auth.appPasswordHelp.p1 = Un mot de passe d''application est un mot de passe unique généré par votre fournisseur d''identité AT Protocol (comme Bluesky, un PDS auto-hébergé ou d''autres services compatibles) spécifiquement pour les applications tierces comme celle-ci. Il vous permet d''accorder l''accès à une application sans partager votre mot de passe principal. -auth.appPasswordHelp.p2 = L''utilisation de mots de passe d''application renforce votre sécurité en : -auth.appPasswordHelp.li1 = Vous permettant de révoquer l''accès pour une seule application sans changer votre mot de passe principal. -auth.appPasswordHelp.li2 = Limitant les permissions qu''une application peut avoir (bien que pour cette application, elle accorde un accès complet à la session). -auth.appPasswordHelp.generateSteps = Comment générer un mot de passe d''application : -auth.appPasswordHelp.step1 = Ouvrez les paramètres de votre client AT Protocol ou de votre fournisseur d''identité (ex. : bsky.app pour Bluesky, ou le panneau d''administration de votre PDS auto-hébergé). -auth.appPasswordHelp.step2 = Accédez à Paramètres > Mots de passe d''application (ou Confidentialité et sécurité > Mots de passe d''application). -auth.appPasswordHelp.step3 = Cliquez sur Ajouter un mot de passe d''application. -auth.appPasswordHelp.step4 = Donnez-lui un nom descriptif (ex. : « Appli DecodingUs »). -auth.appPasswordHelp.step5 = Copiez le mot de passe généré. C''est la seule fois que vous le verrez. -auth.appPasswordHelp.note = Ce mot de passe généré est celui que vous utiliserez dans le formulaire de connexion. Traitez-le comme un mot de passe ordinaire. - -# Profile -profile.title = Profil utilisateur -profile.heading = Votre profil -profile.accountDetails = Détails du compte -profile.handle = Identifiant -profile.did = DID -profile.editProfile = Modifier le profil -profile.displayName = Nom d''affichage -profile.displayNameHelp = Ce nom sera visible par les autres utilisateurs dans le système de messagerie. -profile.save = Enregistrer le profil - -# Variant Browser -nav.variants = Navigateur de variants -variants.browser.title = Navigateur de variants -variants.browser.heading = Navigateur de variants -variants.browser.description = Recherchez dans la base de données des variants Y-ADN et ADN-mt. Trouvez des SNP par nom, rsID ou alias. -variants.browser.variants = Variants -variants.browser.searchPlaceholder = Rechercher par rsID, nom de SNP ou alias (ex. : M269, rs9786076)... -variants.browser.searchHelp = Entrez au moins 2 caractères pour rechercher. Recherche dans les noms, rsID et alias de variants. -variants.browser.selectVariant = Sélectionnez un variant pour voir les détails -variants.browser.foundMatching = {0} groupes de variants trouvés pour « {1} » -variants.browser.showingTotal = Affichage de {0} groupes de variants au total -variants.browser.pageOf = Page {0} sur {1} -variants.browser.noResults = Aucun variant trouvé pour « {0} ». -variants.browser.enterSearch = Entrez un terme de recherche pour trouver des variants. -variants.browser.col.name = Nom / rsID -variants.browser.col.alleles = Anc/Dér -variants.browser.col.type = Type -variants.browser.col.builds = Builds -variants.browser.strandDiffers = Le brin diffère entre les builds - -# Variant Detail Panel -variants.detail.rsId = rsID -variants.detail.commonName = Nom commun -variants.detail.ancestral = Ancestral -variants.detail.derived = Dérivé -variants.detail.type = Type -variants.detail.status = Statut de dénomination -variants.detail.altNames = Noms alternatifs -variants.detail.source = Source -variants.detail.refBuilds = Builds de référence -variants.detail.strandDiff = Différence de brin (complément inverse) -variants.detail.build = Build -variants.detail.position = Position -variants.detail.alleles = Allèles -variants.detail.motifRepeats = Motif / Répétitions -variants.detail.usedBy = Utilisé par les haplogroupes -variants.detail.noHaplogroups = Ce variant n''est associé à aucun haplogroupe. -variants.detail.more = plus -variants.detail.aliasType.snpNames = Noms de SNP -variants.detail.aliasType.dbsnp = Identifiants dbSNP - -# Language -lang.switch = Langue -lang.en = English -lang.fr = Français -lang.es = Español diff --git a/conf/routes b/conf/routes deleted file mode 100644 index 11a99f4d..00000000 --- a/conf/routes +++ /dev/null @@ -1,386 +0,0 @@ -# Routes -# This file defines all application routes (Higher priority routes first) -# https://www.playframework.com/documentation/latest/ScalaRouting -# ~~~~ - -# Health check for load balancers and container orchestration -GET /health controllers.HomeController.health() - -GET / controllers.HomeController.index() -GET /cookie-usage controllers.HomeController.cookieUsage() -GET /terms controllers.HomeController.terms() -GET /privacy controllers.HomeController.privacy() -GET /faq controllers.HomeController.faq() -GET /reputation controllers.HomeController.reputation() -GET /how-to-submit-tree-data controllers.HomeController.howToSubmitTreeData() - -GET /language/:lang controllers.LanguageController.switchLanguage(lang: String) - -GET /sitemap.xml controllers.HomeController.sitemap() -GET /robots.txt controllers.HomeController.robots() - -GET /ytree controllers.TreeController.ytree(rootHaplogroup: Option[String]) -GET /mtree controllers.TreeController.mtree(rootHaplogroup: Option[String]) -GET /ytree/fragment controllers.TreeController.yTreeFragment(rootHaplogroup: Option[String]) -GET /mtree/fragment controllers.TreeController.mTreeFragment(rootHaplogroup: Option[String]) - -GET /haplogroups/snp-sidebar/placeholder controllers.TreeController.emptySnpDetailSidebarPlaceholder -GET /haplogroups/snp-sidebar/:haplogroupName controllers.TreeController.getSnpDetailSidebar(haplogroupName: String, haplogroupType: models.HaplogroupType) - - -GET /references controllers.PublicationController.index() -GET /references/details controllers.PublicationController.getAllPublicationsWithDetailsHtml(page: Option[Int], pageSize: Option[Int], query: Option[String]) -GET /references/details/:publicationId/biosamples controllers.BiosampleReportController.getBiosampleReportHTML(publicationId: Int, page: Option[Int] ?= Some(1)) - -GET /publications/submit controllers.PublicationController.showSubmissionForm() -POST /publications/submit controllers.PublicationController.submitPaper() - -GET /coverage-benchmarks controllers.CoverageController.index() - -# Variant Browser (public read-only) -GET /variants controllers.VariantBrowserController.index(query: Option[String] ?= None, page: Int ?= 1, pageSize: Int ?= 25) -GET /variants/list controllers.VariantBrowserController.listFragment(query: Option[String] ?= None, page: Int ?= 1, pageSize: Int ?= 25) -GET /variants/detail/:id controllers.VariantBrowserController.detailPanel(id: Int) - -# Contact form routes -GET /contact controllers.ContactController.show() -POST /contact controllers.ContactController.submit() -GET /my-messages controllers.ContactController.myMessages() -GET /my-messages/badge controllers.ContactController.userMessageBadge() - -# Support Admin (requires Admin role) -GET /admin/messages controllers.SupportAdminController.listMessages(status: Option[String] ?= None, page: Int ?= 1, pageSize: Int ?= 20) -GET /admin/messages/badge controllers.SupportAdminController.adminMessageBadge() -GET /admin/messages/:id controllers.SupportAdminController.viewMessage(id: java.util.UUID) -POST /admin/messages/:id/reply controllers.SupportAdminController.submitReply(id: java.util.UUID) -GET /admin/messages/:id/status/:status controllers.SupportAdminController.updateStatus(id: java.util.UUID, status: String) - -GET /api/v1/y-tree controllers.TreeController.apiYTree(rootHaplogroup: Option[String]) -GET /api/v1/mt-tree controllers.TreeController.apiMTree(rootHaplogroup: Option[String]) - -# Coverage API -GET /api/v1/coverage/benchmarks controllers.CoverageController.apiBenchmarks() -GET /coverage/labs controllers.CoverageController.labs -GET /coverage/benchmarks/lab/:labId controllers.CoverageController.benchmarksByLabWithDetails(labId: Int) - -# Sequencer API -GET /api/v1/sequencer/lab controllers.SequencerController.getLabByInstrumentId(instrument_id: String) -GET /api/v1/sequencer/lab-instruments controllers.SequencerController.getAllLabInstruments -POST /api/v1/sequencer/lab/associate controllers.SequencerController.associateLabWithInstrument() - -# Instrument Proposal Curator API -GET /api/v1/curator/instrument-proposals controllers.InstrumentProposalController.listProposals(status: Option[String]) -GET /api/v1/curator/instrument-proposals/conflicts controllers.InstrumentProposalController.detectConflicts() -GET /api/v1/curator/instrument-proposals/:id controllers.InstrumentProposalController.getProposalDetail(id: Int) -POST /api/v1/curator/instrument-proposals/:id/accept controllers.InstrumentProposalController.acceptProposal(id: Int) -POST /api/v1/curator/instrument-proposals/:id/reject controllers.InstrumentProposalController.rejectProposal(id: Int) - -GET /api/v1/references/details controllers.PublicationController.getAllPublicationsWithDetailsJson() -GET /api/v1/references/details/:publicationId/biosamples controllers.BiosampleReportController.getBiosampleReportJSON(publicationId: Int) - -GET /biosamples/map controllers.BiosampleMapController.mapView() -GET /biosamples/geo-data controllers.BiosampleMapController.geoData() - -# Sample studies endpoint -GET /api/v1/biosample/studies controllers.BiosampleController.getSamplesWithStudies() - -# Biosample API endpoint -PATCH /api/private/biosamples/:id controllers.BiosampleController.updateBiosample(id: Int) -GET /api/private/biosamples/search controllers.BiosampleController.findByAliasOrAccession(query: String) - -POST /api/private/biosamples/publication-link controllers.BiosamplePublicationController.linkBiosampleToPublication - -# Biosample Original Haplogroup endpoints -PUT /api/private/biosamples/:biosampleId/publications/:publicationId/haplogroup controllers.BiosampleOriginalHaplogroupController.updateOrCreateHaplogroup(biosampleId: Int, publicationId: Int) -GET /api/private/biosamples/:biosampleId/publications/:publicationId/haplogroup controllers.BiosampleOriginalHaplogroupController.getHaplogroup(biosampleId: Int, publicationId: Int) -DELETE /api/private/biosamples/:biosampleId/publications/:publicationId/haplogroup controllers.BiosampleOriginalHaplogroupController.deleteHaplogroup(biosampleId: Int, publicationId: Int) - - -# PGP Biosample API endpoints -POST /api/private/pgp/biosamples controllers.PgpBiosampleController.create -POST /api/private/pgp/biosamples/:sampleGuid/sequences controllers.BiosampleDataController.addSequenceData(sampleGuid: java.util.UUID) -POST /api/private/pgp/biosamples/:sampleGuid/publication controllers.BiosampleDataController.linkPublication(sampleGuid: java.util.UUID) - -# External Biosample API endpoints -POST /api/private/external/biosamples controllers.ExternalBiosampleController.create -POST /api/private/external/biosamples/:sampleGuid/sequences controllers.BiosampleDataController.addSequenceData(sampleGuid: java.util.UUID) -POST /api/private/external/biosamples/:sampleGuid/publication controllers.BiosampleDataController.linkPublication(sampleGuid: java.util.UUID) - -# Specimen Donor endpoints -POST /api/private/donors/merge controllers.SpecimenDonorController.mergeDonors() - -# Sequencing Lab private admin CRUD (not exposed in Swagger) -GET /api/private/sequencing-labs controllers.SequencingLabAdminController.list -GET /api/private/sequencing-labs/:id controllers.SequencingLabAdminController.get(id: Int) -POST /api/private/sequencing-labs controllers.SequencingLabAdminController.create -PATCH /api/private/sequencing-labs/:id controllers.SequencingLabAdminController.update(id: Int) -DELETE /api/private/sequencing-labs/:id controllers.SequencingLabAdminController.delete(id: Int) - -# Atmosphere Firehose -POST /api/firehose/event controllers.FirehoseController.processEvent - -# Publication Discovery -POST /api/private/publication-discovery/run controllers.PublicationDiscoveryController.triggerDiscovery() - -# Genomics Admin -GET /admin/genomics controllers.GenomicsAdminController.dashboard() -POST /api/private/genomics/ybrowse/update controllers.GenomicsAdminController.triggerYBrowseUpdate() -POST /api/private/genomics/hipstr/update controllers.GenomicsAdminController.triggerHipStrUpdate() - -# Private Variant API -POST /api/private/variants/builds controllers.VariantApiController.bulkAddBuilds() -POST /api/private/variants/rsids controllers.VariantApiController.bulkUpdateRsIds() - -# Private Alias Source Management API -GET /api/private/aliases/sources/stats controllers.VariantApiController.getAliasSourceStats() -GET /api/private/aliases/sources/preview controllers.VariantApiController.previewAliasSourceUpdate(aliasPrefix: String, currentSource: String) -POST /api/private/aliases/sources controllers.VariantApiController.bulkUpdateAliasSources() - -# DU Naming Authority API (X-API-Key secured) -POST /api/private/variants/:variantId/du-name controllers.VariantApiController.assignDuName(variantId: Int) -POST /api/private/variants/du-names controllers.VariantApiController.bulkAssignDuNames() -GET /api/private/variants/du-names/next controllers.VariantApiController.previewNextDuName() - -# Authentication -GET /login controllers.AuthController.login -POST /login controllers.AuthController.authenticate -POST /logout controllers.AuthController.logout -GET /app-password-help controllers.AuthController.showAppPasswordHelp() - -# Cookie Consent (GDPR) -GET /cookies/check controllers.CookieConsentController.checkConsent -POST /cookies/accept controllers.CookieConsentController.acceptConsent - -# User Profile -GET /profile controllers.ProfileController.view -POST /profile controllers.ProfileController.update - -# Public Variant API -GET /api/v1/variants controllers.VariantPublicApiController.searchVariants(query: Option[String] ?= None, page: Int ?= 1, pageSize: Int ?= 25) -GET /api/v1/variants/export controllers.VariantPublicApiController.downloadExport() -GET /api/v1/variants/export/metadata controllers.VariantPublicApiController.exportMetadata() -GET /api/v1/variants/:variantId controllers.VariantPublicApiController.getVariantById(variantId: Int) -GET /api/v1/haplogroups/:haplogroupName/variants controllers.VariantPublicApiController.getVariantsByHaplogroup(haplogroupName: String) - -# Private Variant Export API (X-API-Key secured) -POST /api/private/variants/export/generate controllers.VariantPublicApiController.triggerExport() - -# Genome Regions API -GET /api/v1/genome-regions controllers.GenomeRegionsApiController.listBuilds() -GET /api/v1/genome-regions/:build controllers.GenomeRegionsApiController.getRegions(build: String) - -# ============================================= -# Genome Regions API Management (X-API-Key secured) -# ============================================= -# Genome Regions -GET /api/v1/manage/genome-regions controllers.GenomeRegionsApiManagementController.listRegions(regionType: Option[String], build: Option[String], page: Int ?= 1, pageSize: Int ?= 25) -GET /api/v1/manage/genome-regions/:id controllers.GenomeRegionsApiManagementController.getRegion(id: Int) -POST /api/v1/manage/genome-regions controllers.GenomeRegionsApiManagementController.createRegion() -PUT /api/v1/manage/genome-regions/:id controllers.GenomeRegionsApiManagementController.updateRegion(id: Int) -DELETE /api/v1/manage/genome-regions/:id controllers.GenomeRegionsApiManagementController.deleteRegion(id: Int) -POST /api/v1/manage/genome-regions/bulk controllers.GenomeRegionsApiManagementController.bulkCreateRegions() -POST /api/v1/manage/genome-regions/bootstrap controllers.GenomicsAdminController.triggerRegionsBootstrap() - -# ============================================= -# Haplogroup Tree Merge API (X-API-Key secured) -# ============================================= -POST /api/v1/manage/haplogroups/merge controllers.HaplogroupTreeMergeController.mergeFullTree() -POST /api/v1/manage/haplogroups/merge/subtree controllers.HaplogroupTreeMergeController.mergeSubtree() -POST /api/v1/manage/haplogroups/merge/preview controllers.HaplogroupTreeMergeController.previewMerge() - -# ============================================= -# Tree Versioning API (X-API-Key secured) -# ============================================= -# Change Set Management -GET /api/v1/manage/change-sets controllers.TreeVersioningApiController.listChangeSets(haplogroupType: Option[String], status: Option[String], page: Int ?= 1, pageSize: Int ?= 20) -GET /api/v1/manage/change-sets/:id controllers.TreeVersioningApiController.getChangeSetDetails(id: Int) -POST /api/v1/manage/change-sets/:id/start-review controllers.TreeVersioningApiController.startReview(id: Int) -POST /api/v1/manage/change-sets/:id/apply controllers.TreeVersioningApiController.applyChangeSet(id: Int) -POST /api/v1/manage/change-sets/:id/discard controllers.TreeVersioningApiController.discardChangeSet(id: Int) - -# Change Review -GET /api/v1/manage/change-sets/:id/changes/pending controllers.TreeVersioningApiController.getPendingChanges(id: Int, limit: Int ?= 50) -POST /api/v1/manage/change-sets/:changeSetId/changes/:changeId/review controllers.TreeVersioningApiController.reviewChange(changeSetId: Int, changeId: Int) -POST /api/v1/manage/change-sets/:id/approve-all controllers.TreeVersioningApiController.approveAllPending(id: Int) - -# Change Set Comments -POST /api/v1/manage/change-sets/:id/comments controllers.TreeVersioningApiController.addComment(id: Int) -GET /api/v1/manage/change-sets/:id/comments controllers.TreeVersioningApiController.listComments(id: Int) - -# Tree Diff -GET /api/v1/manage/change-sets/:id/diff controllers.TreeVersioningApiController.getTreeDiff(id: Int) -GET /api/v1/manage/tree-diff/:haplogroupType controllers.TreeVersioningApiController.getActiveTreeDiff(haplogroupType: String) - -# Discovery Proposals API -GET /api/v1/discovery/proposals controllers.DiscoveryApiController.listProposals(type: Option[String], status: Option[String]) -GET /api/v1/discovery/proposals/:id controllers.DiscoveryApiController.getProposalDetails(id: Int) -POST /api/v1/discovery/proposals/:id/start-review controllers.DiscoveryApiController.startReview(id: Int) -POST /api/v1/discovery/proposals/:id/accept controllers.DiscoveryApiController.acceptProposal(id: Int) -POST /api/v1/discovery/proposals/:id/reject controllers.DiscoveryApiController.rejectProposal(id: Int) -POST /api/v1/discovery/proposals/:id/promote controllers.DiscoveryApiController.promoteProposal(id: Int) -GET /api/v1/discovery/proposals/:id/audit controllers.DiscoveryApiController.getAuditTrail(id: Int) - -# Curator Workflow -GET /admin/publication-candidates controllers.PublicationCandidateController.listCandidates(page: Int ?= 1, pageSize: Int ?= 20, status: String ?= "pending") -POST /admin/publication-candidates/:id/accept controllers.PublicationCandidateController.accept(id: Int) -POST /admin/publication-candidates/:id/reject controllers.PublicationCandidateController.reject(id: Int) -POST /admin/publication-candidates/:id/defer controllers.PublicationCandidateController.defer(id: Int) -POST /admin/publication-candidates/bulk controllers.PublicationCandidateController.bulkAction() - -# Curator Tools (requires TreeCurator or Admin role) -GET /curator controllers.CuratorController.dashboard - -# Curator - Change Sets (Tree Versioning) -GET /curator/change-sets controllers.TreeVersioningCuratorController.listChangeSets(hgType: Option[String], status: Option[String], pageSize: Int ?= 20) -GET /curator/change-sets/fragment controllers.TreeVersioningCuratorController.changeSetsFragment(hgType: Option[String], status: Option[String], page: Int ?= 1, pageSize: Int ?= 20) -GET /curator/change-sets/:id/panel controllers.TreeVersioningCuratorController.changeSetDetailPanel(id: Int) -GET /curator/change-sets/:id/changes/pending controllers.TreeVersioningCuratorController.pendingChangesFragment(id: Int, limit: Int ?= 50) -POST /curator/change-sets/:id/start-review controllers.TreeVersioningCuratorController.startReview(id: Int) -POST /curator/change-sets/:id/apply controllers.TreeVersioningCuratorController.applyChangeSet(id: Int) -POST /curator/change-sets/:id/discard controllers.TreeVersioningCuratorController.discardChangeSet(id: Int) -POST /curator/change-sets/:id/approve-all controllers.TreeVersioningCuratorController.approveAllPending(id: Int) -POST /curator/change-sets/:changeSetId/changes/:changeId/review controllers.TreeVersioningCuratorController.reviewChange(changeSetId: Int, changeId: Int) -GET /curator/change-sets/:id/diff controllers.TreeVersioningCuratorController.diffView(id: Int) -GET /curator/change-sets/:id/diff/fragment controllers.TreeVersioningCuratorController.diffFragment(id: Int) -GET /curator/change-sets/:id/tree-preview controllers.TreeVersioningCuratorController.treePreview(id: Int) -GET /curator/change-sets/:id/ambiguity-report controllers.TreeVersioningCuratorController.ambiguityReport(id: Int) -GET /curator/change-sets/:id/ambiguity-report/download controllers.TreeVersioningCuratorController.downloadAmbiguityReport(id: Int) - -# Curator - Change Set Conflict Resolutions -GET /curator/change-sets/:id/resolutions controllers.TreeVersioningCuratorController.listResolutions(id: Int) -GET /curator/change-sets/:id/deferred controllers.TreeVersioningCuratorController.listDeferredItems(id: Int) -POST /curator/change-sets/:id/resolve/reparent controllers.TreeVersioningCuratorController.resolveReparent(id: Int) -POST /curator/change-sets/:id/resolve/edit-variants controllers.TreeVersioningCuratorController.resolveEditVariants(id: Int) -POST /curator/change-sets/:id/resolve/merge-existing controllers.TreeVersioningCuratorController.resolveMergeExisting(id: Int) -POST /curator/change-sets/:id/resolve/defer controllers.TreeVersioningCuratorController.resolveDefer(id: Int) -DELETE /curator/change-sets/:changeSetId/resolutions/:resolutionId controllers.TreeVersioningCuratorController.cancelResolution(changeSetId: Int, resolutionId: Int) - -# Curator - Haplogroups -GET /curator/haplogroups controllers.CuratorController.listHaplogroups(query: Option[String], hgType: Option[String], page: Int ?= 1, pageSize: Int ?= 20) -GET /curator/haplogroups/fragment controllers.CuratorController.haplogroupsFragment(query: Option[String], hgType: Option[String], page: Int ?= 1, pageSize: Int ?= 20) -GET /curator/haplogroups/search.json controllers.CuratorController.searchHaplogroupsJson(query: Option[String], hgType: Option[String]) -GET /curator/haplogroups/new controllers.CuratorController.createHaplogroupForm -POST /curator/haplogroups controllers.CuratorController.createHaplogroup -GET /curator/haplogroups/:id/panel controllers.CuratorController.haplogroupDetailPanel(id: Int) -GET /curator/haplogroups/:id/edit controllers.CuratorController.editHaplogroupForm(id: Int) -POST /curator/haplogroups/:id controllers.CuratorController.updateHaplogroup(id: Int) -DELETE /curator/haplogroups/:id controllers.CuratorController.deleteHaplogroup(id: Int) - -# Curator - Haplogroup Tree Restructuring -GET /curator/haplogroups/:id/split controllers.CuratorController.splitBranchForm(id: Int) -POST /curator/haplogroups/:id/split controllers.CuratorController.splitBranch(id: Int) -GET /curator/haplogroups/:id/merge controllers.CuratorController.mergeConfirmForm(id: Int) -POST /curator/haplogroups/:id/merge controllers.CuratorController.mergeIntoParent(id: Int) -GET /curator/haplogroups/:id/reparent controllers.CuratorController.reparentForm(id: Int) -POST /curator/haplogroups/:id/reparent controllers.CuratorController.reparent(id: Int) - -# Curator - Haplogroup-Variant Associations -GET /curator/haplogroups/:id/variants/search controllers.CuratorController.searchVariantsForHaplogroup(id: Int, q: Option[String]) -POST /curator/haplogroups/:hgId/variants/:variantId controllers.CuratorController.addVariantToHaplogroup(hgId: Int, variantId: Int) -DELETE /curator/haplogroups/:hgId/variants/:variantId controllers.CuratorController.removeVariantFromHaplogroup(hgId: Int, variantId: Int) -GET /curator/haplogroup-variants/:hvId/history controllers.CuratorController.haplogroupVariantHistory(hvId: Int) - -# Curator - Variants -GET /curator/variants controllers.CuratorController.listVariants(query: Option[String], page: Int ?= 1, pageSize: Int ?= 20) -GET /curator/variants/fragment controllers.CuratorController.variantsFragment(query: Option[String], page: Int ?= 1, pageSize: Int ?= 20) -GET /curator/variants/new controllers.CuratorController.createVariantForm -POST /curator/variants controllers.CuratorController.createVariant -GET /curator/variants/:id/panel controllers.CuratorController.variantDetailPanel(id: Int) -GET /curator/variants/:id/edit controllers.CuratorController.editVariantForm(id: Int) -POST /curator/variants/:id controllers.CuratorController.updateVariant(id: Int) -DELETE /curator/variants/:id controllers.CuratorController.deleteVariant(id: Int) -# Variant group editing removed - now using VariantV2 with consolidated coordinates - -# Curator - Audit -GET /curator/audit/:entityType/:entityId controllers.CuratorController.auditHistory(entityType: String, entityId: Int) - -# ============================================= -# Curator - Genome Regions (session auth + permissions) -# ============================================= -# Genome Regions -GET /curator/genome-regions controllers.GenomeRegionsCuratorController.listRegions(regionType: Option[String], build: Option[String], page: Int ?= 1, pageSize: Int ?= 25) -GET /curator/genome-regions/fragment controllers.GenomeRegionsCuratorController.regionsFragment(regionType: Option[String], build: Option[String], page: Int ?= 1, pageSize: Int ?= 25) -GET /curator/genome-regions/:id/detail controllers.GenomeRegionsCuratorController.regionDetailPanel(id: Int) -GET /curator/genome-regions/new controllers.GenomeRegionsCuratorController.createRegionForm -POST /curator/genome-regions controllers.GenomeRegionsCuratorController.createRegion -GET /curator/genome-regions/:id/edit controllers.GenomeRegionsCuratorController.editRegionForm(id: Int) -POST /curator/genome-regions/:id controllers.GenomeRegionsCuratorController.updateRegion(id: Int) -DELETE /curator/genome-regions/:id controllers.GenomeRegionsCuratorController.deleteRegion(id: Int) - -# ============================================= -# PDS Fleet Management API -# ============================================= -# PDS-authenticated endpoints (signed by edge node private key) -+nocsrf -POST /api/v1/pds/heartbeat controllers.PdsFleetApiController.heartbeat() -+nocsrf -POST /api/v1/pds/submissions controllers.PdsFleetApiController.submitData() - -# Admin fleet management (X-API-Key secured) -GET /api/v1/pds/fleet/summary controllers.PdsFleetApiController.getFleetSummary -GET /api/v1/pds/fleet/nodes controllers.PdsFleetApiController.listNodes(status: Option[String]) -GET /api/v1/pds/fleet/nodes/:did controllers.PdsFleetApiController.getNode(did: String) -DELETE /api/v1/pds/fleet/nodes/:did controllers.PdsFleetApiController.removeNode(did: String) -POST /api/v1/pds/fleet/mark-stale controllers.PdsFleetApiController.markStaleOffline() -GET /api/v1/pds/submissions/pending controllers.PdsFleetApiController.getPendingSubmissions(type: Option[String], limit: Int ?= 100) -POST /api/v1/pds/submissions/:id/accept controllers.PdsFleetApiController.acceptSubmission(id: Int) -POST /api/v1/pds/submissions/:id/reject controllers.PdsFleetApiController.rejectSubmission(id: Int) -GET /api/v1/pds/submissions/summary/:did controllers.PdsFleetApiController.getNodeSubmissionSummary(did: String) - -# ============================================= -# Patronage API (X-API-Key secured) -# ============================================= -POST /api/v1/patronage/subscriptions controllers.PatronageApiController.createSubscription() -POST /api/v1/patronage/subscriptions/:id/cancel controllers.PatronageApiController.cancelSubscription(id: Int) -GET /api/v1/patronage/subscriptions/user/:userId controllers.PatronageApiController.getUserSubscriptions(userId: java.util.UUID) -GET /api/v1/patronage/subscriptions/active/:userId controllers.PatronageApiController.getSubscription(userId: java.util.UUID) -GET /api/v1/patronage/is-patron/:userId controllers.PatronageApiController.isPatron(userId: java.util.UUID) -GET /api/v1/patronage/summary controllers.PatronageApiController.getPatronSummary -POST /api/v1/patronage/expire-overdue controllers.PatronageApiController.expireOverdue() - -# ============================================= -# IBD Match Discovery API (PDS-authenticated) -# ============================================= -+nocsrf -GET /api/v1/discovery/suggestions controllers.MatchDiscoveryController.getSuggestions(type: Option[String], limit: Int ?= 20) -+nocsrf -POST /api/v1/discovery/suggestions/:id/dismiss controllers.MatchDiscoveryController.dismissSuggestion(id: Long) -+nocsrf -GET /api/v1/discovery/population/:sampleGuid controllers.MatchDiscoveryController.getPopulationBreakdown(sampleGuid: java.util.UUID) -+nocsrf -GET /api/v1/discovery/population/overlap/:guid1/:guid2 controllers.MatchDiscoveryController.getPopulationOverlap(guid1: java.util.UUID, guid2: java.util.UUID) - -# ============================================= -# IBD Match Request & Consent API (PDS-authenticated) -# ============================================= -+nocsrf -POST /api/v1/matches/request controllers.MatchRequestController.createRequest() -+nocsrf -GET /api/v1/matches/requests/pending controllers.MatchRequestController.getPendingRequests() -+nocsrf -GET /api/v1/matches/requests/sent controllers.MatchRequestController.getSentRequests() -+nocsrf -POST /api/v1/matches/requests/:uri/cancel controllers.MatchRequestController.cancelRequest(uri: String) -+nocsrf -POST /api/v1/matches/consent controllers.MatchRequestController.submitConsent() -+nocsrf -GET /api/v1/matches/consent/status/:requestUri controllers.MatchRequestController.getConsentStatus(requestUri: String) - -# ============================================= -# IBD Relay API (PDS-authenticated) -# ============================================= -+nocsrf -POST /api/v1/ibd/relay/session controllers.IbdRelayController.createSession() -+nocsrf -GET /api/v1/ibd/relay/session/:sessionId controllers.IbdRelayController.getSessionStatus(sessionId: String) -GET /api/v1/ibd/relay/:sessionId controllers.IbdRelayController.relay(sessionId: String) - -# --- API Routes (Handled by Tapir, including Swagger UI) --- -POST /api/registerPDS controllers.PDSRegistrationController.registerPDS() - -# Delegate all requests starting with /api to the Tapir-based ApiRouter --> /api controllers.ApiRouter - -# Map static resources from the /public folder to the /assets URL path -GET /assets/*file controllers.Assets.versioned(path="/public", file: Asset) --> /webjars webjars.Routes \ No newline at end of file diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml deleted file mode 100644 index 72969df6..00000000 --- a/docker-compose.prod.yml +++ /dev/null @@ -1,43 +0,0 @@ -# ============================================================================= -# DecodingUs Production Overrides -# ============================================================================= -# Usage: docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d -# ============================================================================= - -services: - app: - image: ${DOCKER_REGISTRY:-}decodingus:${IMAGE_TAG:-latest} - build: - context: . - environment: - # Production database (use RDS or external PostgreSQL) - - SLICK_DBS_DEFAULT_DB_URL=${DATABASE_URL} - - SLICK_DBS_DEFAULT_DB_USER=${DATABASE_USER} - - SLICK_DBS_DEFAULT_DB_PASSWORD=${DATABASE_PASSWORD} - - SLICK_DBS_METADATA_DB_URL=${METADATA_DATABASE_URL:-${DATABASE_URL}} - - SLICK_DBS_METADATA_DB_USER=${DATABASE_USER} - - SLICK_DBS_METADATA_DB_PASSWORD=${DATABASE_PASSWORD} - # Security - - APPLICATION_SECRET=${APPLICATION_SECRET} - - PLAY_HTTP_SECRET_KEY=${APPLICATION_SECRET} - # Production settings - - PLAY_EVOLUTIONS_AUTOCOMMIT=false - - ENABLE_RECAPTCHA=${ENABLE_RECAPTCHA:-true} - - RECAPTCHA_SECRET_KEY=${RECAPTCHA_SECRET_KEY} - - RECAPTCHA_SITE_KEY=${RECAPTCHA_SITE_KEY} - # Contact - - CONTACT_RECIPIENT_EMAIL=${CONTACT_RECIPIENT_EMAIL} - ports: - - "127.0.0.1:9000:9000" # Only expose to localhost (nginx will proxy) - restart: always - logging: - driver: "json-file" - options: - max-size: "10m" - max-file: "5" - - # In production, typically use RDS instead of containerized PostgreSQL - # Comment out or remove the db service when using external database - db: - profiles: - - with-db # Only start if explicitly requested: docker compose --profile with-db up diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 5310260c..00000000 --- a/docker-compose.yml +++ /dev/null @@ -1,72 +0,0 @@ -# ============================================================================= -# DecodingUs Docker Compose Configuration -# ============================================================================= -# Development: docker compose up -# Production: docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d -# ============================================================================= - -services: - # --------------------------------------------------------------------------- - # Application - # --------------------------------------------------------------------------- - app: - build: - context: . - dockerfile: Dockerfile - container_name: decodingus-app - ports: - - "9000:9000" - environment: - # Database connection (override in production) - - SLICK_DBS_DEFAULT_DB_URL=jdbc:postgresql://db:5432/decodingus_db - - SLICK_DBS_DEFAULT_DB_USER=decodingus_user - - SLICK_DBS_DEFAULT_DB_PASSWORD=decodingus_password - - SLICK_DBS_METADATA_DB_URL=jdbc:postgresql://db:5432/decodingus_metadata - - SLICK_DBS_METADATA_DB_USER=decodingus_user - - SLICK_DBS_METADATA_DB_PASSWORD=decodingus_password - # Play configuration - - APPLICATION_SECRET=${APPLICATION_SECRET:-changeme} - - PLAY_HTTP_SECRET_KEY=${APPLICATION_SECRET:-changeme} - depends_on: - db: - condition: service_healthy - networks: - - decodingus-network - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/health"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 60s - - # --------------------------------------------------------------------------- - # PostgreSQL Database - # --------------------------------------------------------------------------- - db: - image: postgis/postgis:16-3.4-alpine - container_name: decodingus-db - environment: - - POSTGRES_USER=decodingus_user - - POSTGRES_PASSWORD=decodingus_password - - POSTGRES_DB=decodingus_db - volumes: - - postgres_data:/var/lib/postgresql/data - - ./docker/init-db.sql:/docker-entrypoint-initdb.d/init-db.sql:ro - ports: - - "5432:5432" - networks: - - decodingus-network - restart: unless-stopped - healthcheck: - test: ["CMD-SHELL", "pg_isready -U decodingus_user -d decodingus_db"] - interval: 10s - timeout: 5s - retries: 5 - -networks: - decodingus-network: - driver: bridge - -volumes: - postgres_data: diff --git a/docker/init-db.sql b/docker/init-db.sql deleted file mode 100644 index 85137291..00000000 --- a/docker/init-db.sql +++ /dev/null @@ -1,28 +0,0 @@ --- ============================================================================= --- DecodingUs Database Initialization --- ============================================================================= --- This script runs once when the PostgreSQL container is first created. --- It sets up the required databases and extensions. --- ============================================================================= - --- Create the metadata database (main database is created by POSTGRES_DB env var) -CREATE DATABASE decodingus_metadata; - --- Connect to main database and enable extensions -\c decodingus_db - --- PostGIS for geospatial data -CREATE EXTENSION IF NOT EXISTS postgis; - --- LTree for hierarchical data (haplogroup trees) -CREATE EXTENSION IF NOT EXISTS ltree; - --- pg_trgm for fuzzy text search -CREATE EXTENSION IF NOT EXISTS pg_trgm; - --- Connect to metadata database and enable extensions -\c decodingus_metadata - -CREATE EXTENSION IF NOT EXISTS postgis; -CREATE EXTENSION IF NOT EXISTS ltree; -CREATE EXTENSION IF NOT EXISTS pg_trgm; diff --git a/documents/API_Improvement_Report.md b/documents/API_Improvement_Report.md deleted file mode 100644 index bc37496d..00000000 --- a/documents/API_Improvement_Report.md +++ /dev/null @@ -1,60 +0,0 @@ -# API Improvement Report: Decoding Us as an App Layer for Edge Nodes - -This report outlines the strategy for evolving the Decoding Us API. It distinguishes between the existing **Research/Curator APIs** (used for academic data maintenance) and the required **Edge/Atmosphere APIs** (needed for the distributed citizen science network). - -## 1. Current State: The Research/Curator API - -The application currently contains a set of "Private APIs" (implemented as standard Play Controllers) primarily used for: -* **Academic Data Integration:** Importing and managing data from research papers. -* **Public Repository Sync:** Maintaining reference data from sources like the 1000 Genomes Project (1KG). -* **Biosample Management:** Creation of samples by trusted administrators or researchers. - -**Status:** -* **Authentication:** Relies on `ApiKeyFilter`. This is generally **adequate** for this specific use case, as these endpoints are intended for a limited set of trusted internal users or automated service accounts managing reference data. -* **Definition:** Implemented purely as Play Actions. While functional, the lack of Tapir definitions makes it harder to generate client SDKs for internal tools. - -## 2. Future State: The Edge/Atmosphere API - -To function as an "App Layer in the Atmosphere" for thousands of distributed Personal Data Servers (PDS), a distinct set of APIs is required. These APIs operate in a fundamentally different trust domain (untrusted/semi-trusted distributed nodes) compared to the Curator APIs. - -### Critical Improvements Required - -#### A. Formalize Edge-Facing APIs (Tapir) -* **Goal:** Define the interface for the distributed Edge application. -* **Action:** Create new Tapir definitions for all Edge interactions. This allows the Edge application (likely written in a different language/context) to use auto-generated clients, ensuring robust communication. -* **Scope:** - * `EdgeOpsEndpoints.scala`: Heartbeats, Config, Updates. - * `IngestionEndpoints.scala`: Submission of results (calls, segments). - -#### B. Implement DID-Based Authentication (The "Atmosphere" Layer) -* **Context:** The current `ApiKeyFilter` is insufficient for the Edge layer. We cannot issue and manage static secrets for thousands of citizen scientists. -* **Action:** Implement a decentralized auth flow. - * **Registration:** `POST /api/v1/edge/register` - Edge Node exchanges its DID and Public Key. - * **Verification:** Middleware that verifies requests signed by the PDS's private key against the registered DID. -* **Distinction:** This auth mechanism specifically protects the *Edge* endpoints, while the *Curator* endpoints can potentially remain on the simpler API Key system (or migrate later). - -#### C. Operational Management (Fleet Control) -* **Gap:** The current system has no visibility into the "Fleet". -* **Action:** Implement "Atmosphere" control endpoints. - * **Heartbeat:** Nodes report `online`, `idle`, `processing`. - * **Configuration:** Server pushes distinct configurations (e.g., "Focus analysis on Haplogroup R-M269"). - * **Manifest:** Server publishes the "Target State" for the Edge software version. - -#### D. Granular Data Ingestion -* **Gap:** Current ingestion is "Sample Creation" focused. Edge nodes need to submit *results* for existing samples. -* **Action:** Create specialized endpoints for result submission. - * `POST /api/v1/ingest/haplogroup-call`: "I found this variant." - * `POST /api/v1/ingest/ibd-segments`: "I found this match (hash)." - * `POST /api/v1/ingest/stats`: "I processed 5GB of data." - -## Summary of Recommendations - -| Feature Area | Curator API (Existing) | Edge/Atmosphere API (New) | -| :--- | :--- | :--- | -| **Primary User** | Internal Researchers / Scripts | Citizen Scientist PDS Nodes | -| **Trust Model** | Trusted (Internal) | Untrusted/Semi-Trusted (Distributed) | -| **Auth Method** | API Key (Current is OK) | **DID + Request Signing (Required)** | -| **Definition** | Play Controllers | **Tapir Endpoints (Required)** | -| **Action** | Maintain / Doc Improvements | **Build New Layer** | - -**Immediate Next Step:** Begin implementing `EdgeOpsEndpoints.scala` and the DID-based authentication middleware to establish the secure channel for the new layer. \ No newline at end of file diff --git a/documents/BGS-Firehouse-Sync.mermaid b/documents/BGS-Firehouse-Sync.mermaid deleted file mode 100644 index 93f297e1..00000000 --- a/documents/BGS-Firehouse-Sync.mermaid +++ /dev/null @@ -1,25 +0,0 @@ -sequenceDiagram - participant AS as App Server (Play View) - participant PS1 as Participant PDS 1 - participant PS2 as Participant PDS 2 - participant DBR as Internal DID Registry - participant IMB as Internal Message Bus (Kafka/Akka) - - title Custom BGS/Firehose Sync Flow - - AS->>DBR: 1. Get List of Active DIDs and Sync Cursors - - loop Sync all DIDs in Registry - AS->>PS1: 2a. Sync Request: com.atproto.sync.getLatestCommit(did:...) - PS1-->>AS: 3a. Commit Response (Root CID) - - AS->>PS1: 4a. Fetch Blocks: com.atproto.sync.getRepo(did:..., since: Cursor) - PS1-->>AS: 5a. Response: CAR file containing new records/diff - - AS->>AS: 6. Verify, Decode, Filter for "app.citizen.report" - - AS->>IMB: 7. Publish Event: Decoded Citizen Report - AS->>DBR: 8. Update Cursor (New last synced sequence number) - end - - AS->>AS: 9. App View Consumes IMB Topic (The Custom Firehose) \ No newline at end of file diff --git a/documents/BGS_Integration_Plan.md b/documents/BGS_Integration_Plan.md deleted file mode 100644 index 52e5864e..00000000 --- a/documents/BGS_Integration_Plan.md +++ /dev/null @@ -1,352 +0,0 @@ -# BGS / Firehose Integration Plan - -## Status: Transitioning to Atmosphere Lexicon Events - -This document outlines the transition of the BGS integration from a monolithic REST API to a more granular, event-driven model based on the Atmosphere Lexicon. While the initial REST API (`/api/external-biosamples`, `/api/projects`) remains functional for backward compatibility, new integrations should prefer the generic `/api/firehose/event` endpoint. - ---- - -## Architecture Overview - -For the MVP and early phases, we utilize a **Secure REST API** pattern. The BGS server (or Edge App) acts as an authenticated API client. - -### Generic Atmosphere Event API (Recommended for New Integrations) - -This endpoint provides a unified entry point for all Atmosphere Lexicon records (Biosample, Sequence Run, Alignment, Project, etc.). The client sends a JSON payload representing a `FirehoseEvent` type, which is then dispatched to the appropriate handler. - -* **Integration Point:** `POST /api/firehose/event` -* **Controller:** `app/controllers/CitizenBiosampleController.scala` (specifically `processEvent` action) -* **Handler:** `app/services/firehose/AtmosphereEventHandler.scala` -* **Data Models:** `app/models/atmosphere/*Record.scala` and `app/services/firehose/*Event.scala` -* **Security:** API Key authentication via `X-API-Key` header (`ApiSecurityAction`) - -### Legacy (Phase 1) Monolithic APIs (For Backward Compatibility) - -These endpoints handle `ExternalBiosampleRequest` which is a monolithic structure that embeds all related data. This will eventually be deprecated in favor of the granular Atmosphere events. - -#### Citizen Biosample API - -* **Integration Point:** `POST /api/external-biosamples` -* **Controller:** `app/controllers/CitizenBiosampleController.scala` -* **Service:** `app/services/CitizenBiosampleService.scala` -* **Data Model:** `app/models/api/ExternalBiosampleRequest.scala` -* **Security:** API Key authentication via `X-API-Key` header (`ApiSecurityAction`) - -#### Full CRUD Operations (Legacy) - -| Operation | Endpoint | Description | -|-----------|----------|-------------| -| **Create** | `POST /api/external-biosamples` | Create new citizen biosample with donor resolution | -| **Update** | `PUT /api/external-biosamples/{atUri}` | Update existing biosample (optimistic locking via `atCid`) | -| **Delete** | `DELETE /api/external-biosamples/{atUri}` | Soft delete biosample | - -#### Project API (Legacy) - -| Operation | Endpoint | Description | -|-----------|----------|-------------| -| **Create** | `POST /api/projects` | Create new research project | -| **Update** | `PUT /api/projects/{atUri}` | Update project (optimistic locking) | -| **Delete** | `DELETE /api/projects/{atUri}` | Soft delete project | - ---- - -## Data Model: Atmosphere Lexicon Granular Records - -The Edge App now generates and sends granular records defined by the `com.decodingus.atmosphere` Lexicon. The previous monolithic `ExternalBiosampleRequest` is now broken down into distinct, inter-referenced records. - -### Key Concepts - -1. **PDS Owner (citizenDid):** The researcher/genealogist running the Edge App. Owns the AT Protocol records. -2. **Granular Records:** Each significant entity (Biosample, Sequence Run, Alignment, Project) is its own top-level record with a unique `atUri`. -3. **Referential Integrity:** Records link to each other using `atUri` references (e.g., `SequenceRunRecord.biosampleRef` points to a `BiosampleRecord`). - -### Linkage Keys - -* `atUri`: The canonical AT Protocol identifier (`at://did:plc:xxx/collection/rkey`) - uniquely identifies *any* record. -* `atCid`: Content Identifier for optimistic locking / version tracking. -* `citizenDid`: Identifies the PDS owner, extracted from `atUri` or provided explicitly. -* `donorIdentifier`: Identifies the specific biological source (person) within that PDS owner's collection. - -### SpecimenDonor Resolution Logic - -Implemented in `CitizenBiosampleService.resolveOrCreateDonor()`: - -1. Extract `citizenDid` from `atUri` (format: `at://did:plc:xxx/...`) -2. Look up `SpecimenDonor` by `(citizenDid, donorIdentifier)` pair -3. If found: Link biosample to existing donor (aggregates multiple datasets) -4. If not found: Create new `SpecimenDonor` with `donorType = Citizen` - - - ---- - -## Data Payload Specification (Atmosphere Lexicon Events) - -Clients should send JSON payloads corresponding to `FirehoseEvent` wrappers around the Atmosphere Lexicon records. The `action` field (`Create`, `Update`, `Delete`) dictates the operation. - -### 1. `BiosampleEvent` (for `com.decodingus.atmosphere.biosample`) - -**Example `Create` Payload:** - -```json -{ - "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.biosample/3jui7q2lx", - "atCid": "bafyreihp47vj6t24z4k3f2f5vj4b5t3g2d5c3v2h5j4k3l2m5n6o4p3q2r", - "action": "Create", - "payload": { - "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.biosample/3jui7q2lx", - "meta": { - "version": 1, - "createdAt": "2025-12-07T10:00:00Z" - }, - "sampleAccession": "BGS-SAMPLE-001", - "donorIdentifier": "Subject-Alice-1", - "citizenDid": "did:plc:alice123", - "description": "Blood sample from Alice's WGS", - "centerName": "Home Lab BGS Node", - "sex": "Female", - "haplogroups": { - "yDna": { - "haplogroupName": "H1", - "score": 0.99 - }, - "mtDna": { - "haplogroupName": "K1a10", - "score": 0.98 - } - }, - "sequenceRunRefs": [], - "genotypeRefs": [], - "populationBreakdownRef": null, - "strProfileRef": null - } -} -``` - -### 2. `SequenceRunEvent` (for `com.decodingus.atmosphere.sequencerun`) - -**Example `Create` Payload:** - -```json -{ - "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.sequencerun/abc123xyz", - "atCid": "bafyreiaabcdefghijklmnopqrstuvwxyz0123456789", - "action": "Create", - "payload": { - "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.sequencerun/abc123xyz", - "meta": { - "version": 1, - "createdAt": "2025-12-07T11:00:00Z" - }, - "biosampleRef": "at://did:plc:alice123/com.decodingus.atmosphere.biosample/3jui7q2lx", - "platformName": "ILLUMINA", - "instrumentModel": "NovaSeq 6000", - "instrumentId": "SN0001", - "testType": "WGS", - "libraryLayout": "PAIRED", - "totalReads": 850000000, - "readLength": 150, - "meanInsertSize": 350.5, - "runDate": "2025-10-15T09:00:00Z", - "files": [ - { - "fileName": "alice_wgs.fastq.gz", - "fileSizeBytes": 50000000000, - "fileFormat": "FASTQ", - "checksum": "sha256-...", - "checksumAlgorithm": "SHA-256", - "location": "/data/alice/alice_wgs.fastq.gz" - } - ], - "alignmentRefs": [] - } -} -``` - -### 3. `AlignmentEvent` (for `com.decodingus.atmosphere.alignment`) - -**Example `Create` Payload:** - -```json -{ - "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.alignment/def456uvw", - "atCid": "bafyreic1d2e3f4g5h6i7j8k9l0m1n2o3p4q5r6s7t8u9v0w", - "action": "Create", - "payload": { - "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.alignment/def456uvw", - "meta": { - "version": 1, - "createdAt": "2025-12-07T12:00:00Z" - }, - "sequenceRunRef": "at://did:plc:alice123/com.decodingus.atmosphere.sequencerun/abc123xyz", - "biosampleRef": "at://did:plc:alice123/com.decodingus.atmosphere.biosample/3jui7q2lx", - "referenceBuild": "GRCh38", - "aligner": "BWA-MEM 0.7.17", - "variantCaller": "GATK HaplotypeCaller 4.2", - "files": [ - { - "fileName": "alice_wgs.cram", - "fileSizeBytes": 20000000000, - "fileFormat": "CRAM", - "checksum": "sha256-...", - "checksumAlgorithm": "SHA-256", - "location": "/data/alice/alice_wgs.cram" - } - ], - "metrics": { - "genomeTerritory": 3000000000, - "meanCoverage": 35.5, - "medianCoverage": 30.0, - "sdCoverage": 10.2, - "pctExcDupe": 0.05, - "pctExcMapq": 0.01, - "pct10x": 0.95, - "pct20x": 0.90, - "pct30x": 0.85, - "hetSnpSensitivity": 0.99 - } - } -} -``` - -### 4. `AtmosphereProjectEvent` (for `com.decodingus.atmosphere.project`) - -**Example `Create` Payload:** - -```json -{ - "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.project/my-family-project", - "atCid": "bafyreidf8w9x7y6z5a4b3c2d1e0f9g8h7i6j5k4l3m2n1o0p9q8r7s6t5u4v3w2", - "action": "Create", - "payload": { - "atUri": "at://did:plc:alice123/com.decodingus.atmosphere.project/my-family-project", - "meta": { - "version": 1, - "createdAt": "2025-12-07T13:00:00Z" - }, - "projectName": "Alice's Family Tree Project", - "description": "Research project on the maternal lineage of the Alice family.", - "administrator": "did:plc:alice123", - "memberRefs": [ - "at://did:plc:alice123/com.decodingus.atmosphere.biosample/3jui7q2lx", - "at://did:plc:alice123/com.decodingus.atmosphere.biosample/other-sample" - ] - } -} -``` - -### Key Fields (Atmosphere Events) - -| Field | Required | Description | -|-------|----------|-------------| -| `atUri` | Yes | AT Protocol URI - canonical identifier for the specific record | -| `atCid` | Yes | Content Identifier for optimistic locking / version tracking | -| `action` | Yes | Operation type (`Create`, `Update`, `Delete`) | -| `payload` | Yes (for Create/Update) | The specific Lexicon record (e.g., `BiosampleRecord`, `SequenceRunRecord`) | -| `payload.meta.createdAt` | Yes | Timestamp of record creation | -| `payload.biosampleRef` | Yes (for child records) | AT URI of the parent biosample | -| `payload.sequenceRunRef` | Yes (for Alignment) | AT URI of the parent sequence run | - ---- - -## PDS Registration - -Before syncing data, PDS instances must be registered: - -**Endpoint:** `POST /api/registerPDS` - -```json -{ - "did": "did:plc:abc123", - "handle": "researcher.bsky.social", - "pdsUrl": "https://pds.example.com", - "rToken": "auth-token-from-edge-app" -} -``` - -The registration process: -1. Verifies PDS is reachable via `com.atproto.sync.getLatestCommit` -2. Stores DID, PDS URL, and initial sync cursor -3. Enables the Rust sync cluster to poll for updates - -### PDS Lease Management - -For parallel sync processing, the `pds_registrations` table includes: -- `leased_by_instance_id`: Which sync worker owns this PDS -- `lease_expires_at`: Lease expiration for failover -- `processing_status`: idle | processing | error - ---- - -## Database Schema - -### Tables - -| Table | Purpose | -|-------|---------| -| `citizen_biosample` | Citizen/Atmosphere biosample records | -| `specimen_donor` | Physical persons (donors) - linked via `specimen_donor_id` FK | -| `project` | Research projects grouping biosamples | -| `sequence_library` | Sequence run records | -| `sequence_file` | Sequence file metadata | -| `alignment_metadata` | Alignment metadata and metrics | -| `pds_registrations` | Registered PDS instances for sync | -| `publication_citizen_biosample` | Links biosamples to publications | -| `citizen_biosample_original_haplogroup` | Publication-reported haplogroups | - -### Key Columns on `citizen_biosample` (and other Atmosphere-enabled tables) - -| Column | Type | Purpose | -|--------|------|---------| -| `at_uri` | VARCHAR | AT Protocol canonical identifier | -| `at_cid` | VARCHAR | Version for optimistic locking | -| `specimen_donor_id` | INT FK | Link to physical donor | -| `deleted` | BOOLEAN | Soft delete flag | -| `y_haplogroup` | JSONB | Full HaplogroupResult with scoring | -| `mt_haplogroup` | JSONB | Full HaplogroupResult with scoring | - ---- - -## Integration Roadmap - -### Phase 1 (Legacy): Direct REST API - -* **Mechanism:** Synchronous HTTP POST -* **Flow:** `Edge App` → `CitizenBiosampleController` (`/api/external-biosamples`, `/api/projects`) → `CitizenBiosampleService` → `CitizenBiosampleEventHandler` → `DB` -* **Status:** Functional for existing integrations. Use the new `/api/firehose/event` for all new Atmosphere Lexicon-based events. - -### Phase 2: Asynchronous Event Ingestion (Kafka) - -* **Mechanism:** Message Queue -* **Flow:** `Edge App` → `Kafka Topic` → `DecodingUs Kafka Consumer` → `AtmosphereEventHandler` → `DB` -* **Change:** Edge App uses Kafka Producer; DecodingUs adds Kafka Consumer service. Processes raw Atmosphere Lexicon events. -* **Benefits:** Decoupled; handles traffic bursts; high resilience. - -### Phase 3: Decentralized AppView (AT Protocol Firehose) - -* **Mechanism:** AT Protocol Firehose subscription -* **Flow:** `Edge App` → `User's PDS` → `AT Proto Relay` → `DecodingUs Firehose Consumer` → `AtmosphereEventHandler` → `DB` -* **Change:** Edge App writes directly to PDS using `com.decodingus.atmosphere.*` Lexicon records; DecodingUs becomes a passive indexer. -* **Benefits:** True user data ownership; interoperability with AT Protocol ecosystem. - ---- - -## Deployment Checklist - -### For New Atmosphere Integrations (using `/api/firehose/event`) - -1. **API Key:** Configure in AWS Secrets Manager (prod) or `application.conf` (dev) -2. **Database:** Ensure all relevant evolutions (`25.sql` and prior) have been applied to update table schemas (e.g., `at_uri`, `at_cid` on `sequence_library`, new fields on `alignment_metadata`). -3. **Edge App Config:** Set DecodingUs API URL and API key. Configure the Edge App to construct and send `FirehoseEvent` JSON payloads as per the Atmosphere Lexicon. -4. **Test:** POST example `FirehoseEvent` payloads (e.g., `BiosampleEvent`, `SequenceRunEvent`, `AlignmentEvent`, `AtmosphereProjectEvent`) to `/api/firehose/event`. -5. **Verify:** Check `citizen_biosample`, `specimen_donor`, `sequence_library`, `sequence_file`, `alignment_metadata`, and `project` tables for correctly ingested and linked data. - -### Swagger UI - -API documentation available at: `/api/docs` - -Documented endpoints now include: -- **Generic Atmosphere Event Processor:** `POST /api/firehose/event` -- Legacy Citizen Biosamples (Create, Update, Delete) -- Legacy Projects (Create, Update, Delete) -- References, Haplogroups, Coverage, Sequencer APIs diff --git a/documents/Coverage_Priority_Report.md b/documents/Coverage_Priority_Report.md deleted file mode 100644 index d9d1a1ef..00000000 --- a/documents/Coverage_Priority_Report.md +++ /dev/null @@ -1,58 +0,0 @@ -# Code Coverage Analysis & Prioritization - -## Current Status -* **Overall Statement Coverage:** ~5.72% -* **Overall Branch Coverage:** ~3.44% -* **Tested Areas:** - * `HomeController` (Partial, rendering only) - * `PgpBiosampleService` (Creation logic) - * `AccessionNumberGenerator` (ID generation logic) - * `AccessionNumberGenerator` (Infrastructure logic via decoupling) - -## Analysis of Gaps - -The application has significant gaps in its testing strategy. Most controllers and services are completely untested. - -### High Criticality (Core Business Logic) -These areas handle data integrity, ingestion, and the core value proposition (Trees, Biosamples). - -1. **`BiosampleDataService.scala`** - * **Role:** Orchestrates linking publications and adding raw sequence data to biosamples. - * **Risk:** High. Failures here mean data loss or corruption during ingestion. Complex nested `Future` chains. - * **Status:** 0% Coverage. - -2. **`HaplogroupTreeService.scala` & `TreeImporter.scala`** - * **Role:** Manages the phylogenetic trees (Y-DNA/mtDNA). - * **Risk:** High. The tree is the central data structure of the application. - * **Status:** 0% Coverage. - -3. **`BiosampleUpdateService.scala`** - * **Role:** Handling modifications to existing records. - * **Risk:** Medium-High. Potential for unauthorized or incorrect data overwrites. - * **Status:** 0% Coverage. - -### Medium Criticality (Controllers & Display) -1. **`ExternalBiosampleController.scala`** - * **Role:** Entry point for creating non-PGP biosamples. - * **Risk:** Medium. Similar to `PgpBiosampleController` but less restrictive. - * **Status:** 0% Coverage. - -2. **`BiosampleController.scala`** - * **Role:** Retrieval and viewing of samples. - * **Risk:** Low-Medium (Read-only mostly). - * **Status:** 0% Coverage. - -## Prioritized Action Plan - -We recommend addressing coverage in the following order to maximize stability and reliability: - -| Priority | Component | Rationale | -| :--- | :--- | :--- | -| **1** | **`BiosampleDataService`** | Complex data orchestration (Library -> File -> Checksum -> Location) is prone to bugs. | -| **2** | **`ExternalBiosampleService`** | Completes the coverage for "Ingestion" workflows (pairing with PGP service). | -| **3** | **`BiosampleUpdateService`** | Ensures data modification safety. | -| **4** | **`HaplogroupTreeService`** | Core domain logic, though often static/read-heavy. | -| **5** | **`ExternalBiosampleController`** | API surface testing. | - -## Tech Debt Note -* `BiosampleDataService` relies heavily on multiple repositories. Following the pattern used in `BiosampleAccessionGenerator`, we should strictly mock these repositories rather than trying to use an in-memory DB, to keep tests fast and focused on the orchestration logic. diff --git a/documents/Database_Schema_Review.md b/documents/Database_Schema_Review.md deleted file mode 100644 index 15907bf1..00000000 --- a/documents/Database_Schema_Review.md +++ /dev/null @@ -1,59 +0,0 @@ -# Database Schema Review: Alignment with Application Goals - -This document reviews the current database schema (`app/models/dal/`) against the application's stated goals of becoming a "App Layer in the Atmosphere" for citizen science genetic research. - -## Summary of Findings - -The database schema is remarkably mature and well-aligned with the project's complex domain requirements (Pangenome, Haplogroups, IBD). It already includes sophisticated structures for: -* **Decentralized Identity:** Native support for DIDs in user and donor tables. -* **Reputation:** A built-in system for tracking user contributions. -* **Privacy-Preserving Discovery:** Specialized tables for IBD matches that verify PDS attestations without exposing raw data. - -However, specific gaps exist regarding the **operational management** of the Edge Node fleet and the **auditability** of specific data submissions. - -## Detailed Alignment Analysis - -### 1. Goal: Collaborative Haplogroup Tree Resolution -* **Status:** **Strong Support** -* **Evidence:** `HaplogroupsTable`, `HaplogroupRelationshipsTable`, and `HaplogroupVariantMetadataTable` provide a complete graph structure for storing the tree. -* **Gap:** **Submission Provenance.** While the *result* (the tree) is stored, there is no obvious table (e.g., `HaplogroupCallSubmissions`) to track *which* Edge Node proposed a specific variant or branch change before it was accepted. Tracking this is crucial for the "Collaborative" aspect and resolving conflicts. - -### 2. Goal: Privacy-Preserving Genetic Relative Discovery (IBD) -* **Status:** **Excellent Support** -* **Evidence:** - * `IbdDiscoveryIndicesTable`: Stores the *existence* of a match and its strength (cM) without storing the raw segment data, perfectly aligning with the privacy goal. - * `IbdPdsAttestationsTable`: Links matches to `attesting_pds_guid` and includes an `attestation_signature`. This is a critical feature for a distributed trust model, allowing PDS owners to cryptographically sign off on matches. - -### 3. Goal: Edge Computing Participation (Citizen Science) -* **Status:** **Mixed** -* **Evidence (Positive):** - * `UserPdsInfoTable`: Explicitly links Users to a `pds_url` and `did`. - * `ReputationEventsTable` & `UserReputationScoresTable`: A complete gamification/credit system is already defined in the schema, allowing the system to reward users for contributions. -* **Evidence (Negative):** - * **Missing Operational State:** There is no table to track the *live status* of Edge Nodes. If a user has 5 computers running the Edge software, the database has no way to know which are Online, Offline, or their current load. - * **Missing Device Registry:** `UserPdsInfo` links a *User* to a PDS. It does not clearly support a User having *multiple* distinct compute nodes (devices) with different capabilities. - -### 4. Goal: Secure Data Interaction (AT Protocol) -* **Status:** **Good Support** -* **Evidence:** - * `SequenceAtpLocationTable`: Directly maps sequence files to AT Protocol concepts (`repo_did`, `record_cid`), enabling the "App Layer" to reference data stored in the distributed network. - * `SpecimenDonorsTable`: Contains `citizen_biosample_did`, facilitating the link between physical samples and their digital twins on the AT Protocol. - -## Recommendations - -To fully support the "App Layer" vision, we recommend the following schema additions: - -1. **Edge Node Registry Table:** - * Create `EdgeNodesTable` (or `UserComputeNodes`) to track individual devices associated with a user/PDS. - * Columns: `node_id (UUID)`, `user_id`, `last_heartbeat (Timestamp)`, `status (Online/Offline/Busy)`, `software_version`. - -2. **Submission Audit Tables:** - * Create `HaplogroupSubmissionsTable` to log incoming calls from Edge Nodes before they are merged into the main `BiosampleHaplogroupsTable`. - * Columns: `submission_id`, `biosample_id`, `edge_node_id`, `proposed_haplogroup`, `confidence_score`, `algorithm_version`, `submission_timestamp`. - -3. **Job Assignment Table (Optional):** - * If the server intends to *dispatch* work to Edge Nodes (rather than just accepting results), a `ComputeJobsTable` will be needed to track which node was assigned which task. - -## Conclusion - -The schema requires only minor additions to support the operational aspects of the "Edge Node" fleet. The core scientific and identity data models are robust and ready for production. diff --git a/documents/Internationalization_Guide.md b/documents/Internationalization_Guide.md deleted file mode 100644 index 44a7dd49..00000000 --- a/documents/Internationalization_Guide.md +++ /dev/null @@ -1,116 +0,0 @@ -# Internationalization (I18n) Guide for Decoding Us - -## Overview -Currently, the application has English text embedded directly into Twirl templates. To support multiple languages, we should adopt the **Standard Play Framework I18n Pattern**. This approach is robust, performant, and natively supported by the framework without requiring external libraries. - -## The Design Pattern - -The core concept is to separate **Content** (text) from **Structure** (HTML/Twirl). - -### 1. Architecture -* **Message Files:** Text is stored in `conf/messages` (default/English), `conf/messages.fr` (French), `conf/messages.es` (Spanish), etc. -* **Key-Value Pairs:** Each line in these files maps a unique key to a translated string. - * `home.title = Welcome to Decoding Us` -* **Twirl Templates:** Instead of hardcoded text, templates use the `Messages` object to look up strings by key. - * `

@messages("home.title")

` -* **Context Propagation:** Controllers inject `MessagesControllerComponents` and mix in `I18nSupport` to automatically detect the user's preferred language (via `Accept-Language` header or cookies) and pass the correct `Messages` provider to the view. - -## Implementation Steps - -### Step 1: Create Message Files -Create the `conf/messages` file for the default language (English). - -**File:** `conf/messages` -```properties -# General -app.name = Decoding Us -site.title = Decoding Us - Citizen Science Genetics - -# Navigation -nav.home = Home -nav.about = About -nav.contact = Contact - -# Home Page -home.welcome = Welcome to Decoding Us -home.intro = Decoding Us will be a next-generation platform for citizen science... -home.goals.title = The system shall be architected with these goals: -``` - -### Step 2: Update Configuration -Enable the languages in `conf/application.conf`. - -```hocon -play.i18n { - # The list of supported languages - langs = [ "en", "fr", "es" ] -} -``` - -### Step 3: Refactor Controllers -Update controllers to provide `Messages` support. This is often done by injecting `MessagesControllerComponents`. - -**Example:** -```scala -import play.api.mvc._ -import play.api.i18n._ -import javax.inject.Inject - -class HomeController @Inject()(cc: MessagesControllerComponents) extends AbstractController(cc) with I18nSupport { - def index = Action { implicit request => - // 'request' implicitly contains the messages context due to I18nSupport - Ok(views.html.index()) - } -} -``` - -### Step 4: Refactor Views -Update Twirl templates to accept an implicit `Messages` provider and use it. - -**File:** `app/views/index.scala.html` -```scala -@()(implicit messages: Messages) - -@main(messages("site.title")) { -
-

@messages("home.welcome")

-

@messages("home.intro")

-
-} -``` - -**File:** `app/views/main.scala.html` (Layout) -```scala -@(title: String)(content: Html)(implicit messages: Messages) - - - - - @title - - - - @_navbar() - @content - - -``` - -## Handling Dynamic Content -For text that includes dynamic values (e.g., "Hello, John"), use placeholders in the message file. - -`conf/messages`: -```properties -greeting = Hello, {0}! -``` - -Twirl: -```scala -@messages("greeting", userName) -``` - -## Advantages -1. **Standardization:** Any Play developer will instantly understand this structure. -2. **Performance:** Message lookups are extremely fast and compiled. -3. **Type Safety:** While the keys are strings, the integration with Twirl is robust. -4. **Flexibility:** Adding a new language just requires adding a new `messages.xx` file. diff --git a/documents/Internationalization_Guide_Blocks.md b/documents/Internationalization_Guide_Blocks.md deleted file mode 100644 index 6d4a4fc8..00000000 --- a/documents/Internationalization_Guide_Blocks.md +++ /dev/null @@ -1,88 +0,0 @@ -# Internationalization (I18n) Guide: Block-Based Content Strategy - -## Overview -While the standard key-value pair approach (Property Files) is excellent for UI labels and short text, it is cumbersome and unmaintainable for long-form content like "About Us" pages, blog posts, or extensive privacy policies. - -For these cases, we recommend a **Block-Based Content Strategy** that treats long-form content as structural dependencies rather than simple strings. - -## Recommended Pattern: Localized Partial Views - -Instead of putting entire paragraphs into a `messages` file, we create separate Twirl templates (partials) for the content blocks of each language. - -### 1. Architecture - -* **Structure:** Maintain the main page structure (layout, headers, footers) in a master template. -* **Content Blocks:** Create a directory structure for localized content fragments. - * `app/views/content/en/aboutBody.scala.html` - * `app/views/content/es/aboutBody.scala.html` -* **Dispatcher:** Use a helper (or the controller) to dynamically select the correct partial based on the user's language. - -### 2. Implementation - -#### File Structure -``` -app/ - views/ - about.scala.html (Master structure) - content/ - en/ - aboutText.scala.html (English paragraphs) - es/ - aboutText.scala.html (Spanish paragraphs) -``` - -#### The Content Partial (English) -**`app/views/content/en/aboutText.scala.html`** -```html -

Decoding Us will be a next-generation platform for citizen science focused on empowering individuals...

-

The system shall be architected with these goals:

-
    -
  • Federated Design:...
  • -
-``` - -#### The Master View (Dispatcher) -**`app/views/about.scala.html`** -```scala -@()(implicit messages: Messages) - -@main(messages("nav.about")) { -
-

@messages("nav.about")

- - @messages.lang.code match { - case "es" => { @views.html.content.es.aboutText() } - case "fr" => { @views.html.content.fr.aboutText() } - case _ => { @views.html.content.en.aboutText() } - } -
-} -``` - -### 3. Alternative: Markdown-Based Content - -For even easier editing (especially for non-developers), you can store long-form content as **Markdown** files and render them at runtime. - -* **Storage:** `conf/content/about/en.md`, `conf/content/about/es.md`. -* **Loader:** A simple service reads the file based on the requested language. -* **Renderer:** Use a library like `flexmark-java` to convert Markdown to HTML in the controller, then pass the `Html` object to the view. - -**Controller Example:** -```scala -def about = Action { implicit request => - val lang = messagesApi.preferred(request).lang.code - val markdownContent = contentLoader.load("about", lang) // returns "## About Us..." - val htmlContent = MarkdownRenderer.render(markdownContent) - Ok(views.html.about(htmlContent)) -} -``` - -## Summary Recommendation - -| Use Case | Recommended Pattern | -| :--- | :--- | -| **UI Labels, Buttons, Short Titles** | **Standard `messages` file** (Key-Value). | -| **Static Long-Form (About, Terms)** | **Localized Partial Views** (Twirl). Best for compile-time safety. | -| **Dynamic/Frequent Long-Form (Blog)** | **Markdown Files**. Best for ease of editing and CMS-like behavior. | - -For the MVP "About" page, **Localized Partial Views** offers the best balance of type safety and maintainability without introducing new dependencies. diff --git a/documents/atmosphere/02-Core-Records.md b/documents/atmosphere/02-Core-Records.md index 4903b7d8..98a9f408 100644 --- a/documents/atmosphere/02-Core-Records.md +++ b/documents/atmosphere/02-Core-Records.md @@ -365,6 +365,87 @@ This record defines a research project that aggregates multiple biosamples withi --- +## 6. Instrument Observation Record + +A citizen's explicit claim that a sequencer instrument id (from `@RG` headers) +belongs to a particular laboratory, carrying a confidence level. The AppView +mirrors these into `fed.instrument_observation` and the **consensus engine** +(`du_db::sequencer::recompute_consensus`) folds them — weighted by `confidence` and +recency — alongside the implicit `centerName` claims on biosamples, producing +curator proposals that, when accepted, set the instrument→lab tie the public +`/api/v1/sequencer/lab` lookup resolves. This is the only citizen-driven input to +lab inference; everything else is read-only lookup. See +`documents/planning/sequencer-lab-inference-system.md`. + +**NSID:** `com.decodingus.atmosphere.instrumentObservation` + +```json +{ + "lexicon": 1, + "id": "com.decodingus.atmosphere.instrumentObservation", + "defs": { + "main": { + "type": "record", + "description": "An observation of a sequencer instrument and its associated laboratory, extracted from BAM/CRAM read headers.", + "key": "tid", + "record": { + "type": "object", + "required": ["instrumentId", "labName", "biosampleRef", "observedAt"], + "properties": { + "instrumentId": { + "type": "string", + "description": "The instrument ID extracted from the @RG header (e.g., 'A00123').", + "minLength": 1, + "maxLength": 255 + }, + "labName": { + "type": "string", + "description": "The name of the sequencing laboratory (as known by the user or inferred).", + "minLength": 1, + "maxLength": 255 + }, + "biosampleRef": { + "type": "string", + "description": "AT URI of the biosample this observation was extracted from." + }, + "platform": { + "type": "string", + "description": "Sequencing platform.", + "knownValues": ["ILLUMINA", "PACBIO", "ONT", "MGI", "ELEMENT", "ULTIMA"] + }, + "instrumentModel": { + "type": "string", + "description": "Inferred or known instrument model (e.g., 'NovaSeq 6000')." + }, + "flowcellId": { + "type": "string", + "description": "Flowcell identifier if extractable from read headers." + }, + "runDate": { + "type": "string", + "format": "datetime", + "description": "Date of the sequencing run if extractable." + }, + "confidence": { + "type": "string", + "description": "Confidence level of the lab association (weights: KNOWN 1.0, INFERRED 0.7, GUESSED 0.3).", + "knownValues": ["KNOWN", "INFERRED", "GUESSED"], + "default": "INFERRED" + }, + "observedAt": { + "type": "string", + "format": "datetime", + "description": "When this observation was recorded (drives the recency term of the consensus score)." + } + } + } + } + } +} +``` + +--- + ## Mapping to `decodingus` Backend To fully leverage these records, `decodingus` will evolve its internal data model: diff --git a/documents/atmosphere/07-Discovery-Records.md b/documents/atmosphere/07-Discovery-Records.md index 23a51c2e..8433f657 100644 --- a/documents/atmosphere/07-Discovery-Records.md +++ b/documents/atmosphere/07-Discovery-Records.md @@ -104,8 +104,81 @@ This record allows citizens to contribute instrument-lab observations from their --- +## Private Variant Record + +This record lets a citizen publish the **private variants** their analysis found beyond +their assigned terminal haplogroup — the mutations that may define a new branch. The +DecodingUs AppView mirrors them into `fed.private_variant` and the **discovery consensus +engine** (`du_db::discovery`) pools them across submitters by variant-set similarity +(Jaccard) into proposed branches for curator review. One record per (biosample, DNA arm). + +**Privacy:** like the `biosample`/`strProfile` summary records, this is citizen-opt-in, +keyed by biosample ref (no donor PII); variants are anonymized to coordinates/known names. + +**NSID:** `com.decodingus.atmosphere.privateVariant` + +```json +{ + "lexicon": 1, + "id": "com.decodingus.atmosphere.privateVariant", + "defs": { + "main": { + "type": "record", + "description": "The private variants a sample carries beyond its assigned terminal haplogroup — candidate defining mutations for a new branch.", + "key": "tid", + "record": { + "type": "object", + "required": ["meta", "biosampleRef", "dnaType", "terminalHaplogroup", "variants"], + "properties": { + "meta": { "type": "ref", "ref": "com.decodingus.atmosphere.defs#recordMeta" }, + "biosampleRef": { + "type": "string", + "description": "AT URI of the biosample these private variants were extracted from." + }, + "sequenceRunRef": { + "type": "string", + "description": "AT URI of the specific sequence run (optional, for precision)." + }, + "dnaType": { + "type": "string", + "description": "Which tree the variants extend.", + "knownValues": ["Y_DNA", "MT_DNA"] + }, + "terminalHaplogroup": { + "type": "string", + "description": "The terminal haplogroup the sample was assigned (e.g., 'R-M269'); the private variants sit below it." + }, + "variants": { + "type": "array", + "description": "The private (mismatching) variant calls beyond the terminal.", + "items": { + "type": "object", + "required": ["contig", "position", "ancestral", "derived"], + "properties": { + "name": { "type": "string", "description": "Known name if any (e.g., 'FT123456'); omit for novel variants." }, + "contig": { "type": "string", "description": "Reference contig (e.g., 'chrY')." }, + "position": { "type": "integer", "description": "GRCh38 position." }, + "ancestral": { "type": "string", "description": "Ancestral allele." }, + "derived": { "type": "string", "description": "Derived allele." }, + "rsId": { "type": "string", "description": "dbSNP rsID if known." } + } + } + } + } + } + } + } +} +``` + +--- + ## Backend Mapping * **`InstrumentObservation`:** Maps to `instrument_observation` table for lab inference consensus. +* **`PrivateVariant`:** Mirrored to `fed.private_variant`; the discovery consensus engine + (`du_db::discovery`) materializes it into `tree.biosample_private_variant` and pools it + into `tree.proposed_branch`. See + [haplogroup-discovery-system.md](../haplogroup-discovery-system.md) (D6). See [sequencer-lab-inference-system.md](../sequencer-lab-inference-system.md) for implementation planning. diff --git a/documents/curator-guide-tree-versioning.md b/documents/curator-guide-tree-versioning.md index f331a5e7..dab2f95f 100644 --- a/documents/curator-guide-tree-versioning.md +++ b/documents/curator-guide-tree-versioning.md @@ -1,503 +1,260 @@ # Curator Guide: Tree Versioning System -This guide explains how to use the Tree Versioning System to review, validate, and apply bulk changes to the haplogroup tree. +This guide explains how to review, validate, and apply bulk changes to the +haplogroup tree, and how to resolve the placements the merge/graft couldn't make +confidently. + +The Rust AppView splits this across **two curator screens**: + +- **Change Sets** (`/curator/change-sets`) — the lifecycle: review the diff, + approve/reject changes, then apply or discard. +- **Merge Reviews** (`/curator/reviews`) — resolve the ambiguous or blocked + placements the SNP-graft/merge staged for a human (the `wip_*` items). + +Both are reached from the **Curator dashboard** (`/curator`) and are gated by the +**Curator** role (`Admin`, `TreeCurator`, or `Curator`) — there are no finer-grained +per-action permissions. --- ## Overview -When large external tree sources (like ISOGG or ytree.net) are merged into the system, the changes don't go directly to production. Instead, they're captured in a **Change Set** that you can review before applying. +When large external tree sources (ISOGG, decoding-us, FTDNA) are merged or +grafted in, the changes don't go straight to production. They're captured in a +**Change Set** you review before applying. This gives you: - Time to review changes at your own pace -- Ability to see what will change before it affects users -- Tools to handle ambiguous placements +- A diff of exactly what will change before it affects users +- A separate worklist for ambiguous placements that need a human decision - An audit trail of all changes --- -## Accessing the Change Sets Dashboard +## Change Sets screen (`/curator/change-sets`) -1. Navigate to **Curator > Change Sets** from the main menu -2. Or go directly to `/curator/change-sets` - -**Required Permission:** `tree.version.view` - ---- +A two-panel (master-detail) HTMX screen. -## Understanding the Dashboard +### Left panel — the change-set list -The dashboard shows a master-detail layout: +Each change set shows its name, source, DNA type (Y/mt), status, change count, +and who created it. Filter by status with the dropdown. -### Left Panel: Change Set List +### Right panel — the detail/diff -Each change set shows: -- **Name**: Descriptive name (e.g., "isogg-2025-12") -- **Source**: Where the data came from (ISOGG, ytree.net, etc.) -- **Type**: Y-DNA or mtDNA -- **Status**: Current state (see below) -- **Changes**: Total count and pending items -- **Created**: When and by whom +Selecting a change set loads its panel (`/curator/change-sets/:id/panel`) with: +- Summary stats: **added / removed / modified / reparented** +- The **diff** rows (type, node name, before→after detail) — rendered inline +- The per-change list with each change's status +- Comments, and the status-appropriate lifecycle actions -### Right Panel: Change Set Details +### Statuses -Click a change set to see: -- Full statistics (nodes created, updated, reparented) -- Ambiguity warnings -- Available actions based on status -- Comments from other curators - ---- - -## Change Set Statuses - -| Status | Meaning | Your Action | +| Status | Meaning | Your action | |--------|---------|-------------| -| **Draft** | Merge in progress | Wait for completion | -| **Ready for Review** | Merge complete, needs review | Start review | -| **Under Review** | Being actively reviewed | Continue reviewing | -| **Applied** | Changes are in production | None (read-only) | -| **Discarded** | Changes were abandoned | None (read-only) | - ---- - -## Filtering Change Sets - -Use the dropdown filters at the top: -- **Type Filter**: Show only Y-DNA or mtDNA change sets -- **Status Filter**: Show only sets in a specific status - ---- - -## Reviewing a Change Set - -### Step 1: Start the Review - -1. Click a change set with "Ready for Review" status -2. Click **Start Review** in the detail panel -3. Status changes to "Under Review" - -### Step 2: View the Full Diff - -Click **View Full Diff** to see all changes in the set: - -- **Green rows**: New nodes being created -- **Yellow rows**: Existing nodes being updated -- **Blue rows**: Nodes being reparented (moved in tree) - -The diff view shows: -- What changed (name, variants, parent) -- Before and after values for updates -- Confidence scores for ambiguous placements - -### Step 2b: View Tree Preview (ASCII) - -For a structural overview of proposed changes, click the **Tree Preview** button in the change set detail panel (next to "View Diff"). The preview opens in a new browser tab. - -> **Direct URL:** You can also access it at `/curator/change-sets/{id}/tree-preview` if needed. - -This returns a plain-text ASCII tree showing affected subtrees with markers: - -``` -=== Tree Preview: Y merge from ISOGG === -Type: Y | Status: DRAFT -New nodes: 8322 | Reparents: 306 | Variant additions: 76673 - -Legend: [+] = new node, [→] = reparented, [~] = modified -================================================== - -Y -├── [+] A00-T (V60, V168, +3 more) -│ ├── [→] A00 -│ └── [+] A0-T -│ ├── [→] A0 -│ └── [→] A1 -└── BT - ---- Nodes reparented to new WIP nodes --- - A00: Y → A00-T [+] - A0: Y → A0-T [+] - ---- Variant additions to existing nodes --- - E1a-Y947: (M4671, CTS9320, +38 more) -``` - -**Legend:** -| Marker | Meaning | -|--------|---------| -| `[+]` | New node to be created | -| `[→]` | Existing node being reparented | -| `[~]` | Existing node with variant additions | - -The preview shows: -- New nodes in their proposed tree position -- Existing siblings for context -- Up to 5 variant names per node -- Summary of reparent operations -- Summary of variant additions to existing nodes - -#### Tips for Navigating Large Previews - -For large merges with thousands of nodes, the ASCII preview can be dense. Here are some practical tips: - -1. **Use browser search (Ctrl+F / Cmd+F)** - Search for specific haplogroup names or variant names to jump directly to areas of interest - -2. **Focus on the summary sections** - Scroll to the bottom for: - - "Nodes reparented to new WIP nodes" — shows all reparent operations in a compact list - - "Variant additions to existing nodes" — lists all existing nodes receiving new variants - -3. **Look for markers first** - Search for `[+]` to find all new nodes, or `[→]` to find all reparented nodes - -4. **Copy to a text editor** - For very large previews, copy the text to an editor with better navigation (code folding, outline view, etc.) - -5. **Cross-reference with the Diff view** - Use the Tree Preview to understand structure, then switch to the Diff view for detailed change-by-change review - -> **Future Enhancement:** A graphical side-by-side tree comparison view is planned for a future release, which will provide a more visual way to review structural changes. - -### Step 3: Handle Ambiguities - -If the change set has ambiguities: +| **DRAFT** | Merge/graft still materializing | Wait, then Start Review | +| **READY_FOR_REVIEW** | Materialized, awaiting review | Start Review (or Apply) | +| **UNDER_REVIEW** | Being actively reviewed | Approve/reject changes, then Apply | +| **APPLIED** | Live in production | None (read-only) | +| **DISCARDED** | Abandoned | None (read-only) | -1. Look for the yellow warning banner showing the count -2. Click **View Report** to see the ambiguity report -3. For each ambiguity, decide: - - **Accept the placement** (approve the change) - - **Reject the placement** (skip this change) - - **Manually fix** the data before applying +Per-change status runs `PENDING → APPROVED`/`REJECTED → APPLIED`. -Ambiguities occur when: -- Multiple possible parent placements exist -- The algorithm chose based on heuristics -- A confidence score below threshold was assigned +### Lifecycle actions -#### Understanding Confidence Scores +| Action | Endpoint | Available when | +|--------|----------|----------------| +| **Start Review** | `POST /curator/change-sets/:id/start-review` | DRAFT or READY_FOR_REVIEW → UNDER_REVIEW | +| **Review one change** (approve/reject) | `POST /curator/change-sets/:id/changes/:change_id/review` | UNDER_REVIEW | +| **Approve All Pending** | `POST /curator/change-sets/:id/approve-all` | UNDER_REVIEW | +| **Apply** | `POST /curator/change-sets/:id/apply` | READY_FOR_REVIEW or UNDER_REVIEW | +| **Discard** | `POST /curator/change-sets/:id/discard` | any non-APPLIED state | +| **Comment** | `POST /curator/change-sets/:id/comments` | any | -Each ambiguous placement includes a confidence score from 0.0 to 1.0. Use this guide to prioritize your review: - -| Score Range | Risk Level | Recommended Action | -|-------------|------------|-------------------| -| **0.80 – 1.00** | Low | Generally safe to approve. Algorithm had strong SNP overlap. Spot-check a few. | -| **0.50 – 0.79** | Medium | Review the placement. Check if the shared SNPs make sense for this branch. | -| **0.20 – 0.49** | High | Manual verification required. Compare source data against known phylogeny. | -| **0.00 – 0.19** | Very High | Likely incorrect placement. Consider skipping or manually researching. | - -**What affects confidence:** -- **SNP overlap** — More shared defining variants = higher confidence -- **Conflicting variants** — Variants that contradict the placement lower confidence -- **Tree depth** — Deeper placements with fewer distinguishing SNPs may have lower scores -- **Source quality** — Some sources have more complete variant data than others - -### Step 4: Review Individual Changes - -In the "Under Review" status, you'll see pending changes: - -For each change you can: -- **Approve**: Mark as validated -- **Skip**: Exclude from this promotion (stays in set but won't apply) - -Use **Approve All Pending** to quickly approve remaining changes after you've reviewed the ambiguities. +**Apply** promotes only the **APPROVED** changes to the live (temporal) tree and +marks the set APPLIED; it's idempotent (re-applying an APPLIED set is a no-op). +**Discard** requires a reason. --- -## Reviewing Large Change Sets +## Reviewing a change set -When dealing with thousands of changes (common for major source updates like ISOGG), a systematic approach is essential. +1. **Start the review** — select a READY_FOR_REVIEW set and click Start Review + (status → UNDER_REVIEW). +2. **Read the diff** in the detail panel: + - added (new nodes), removed, modified (e.g. variant edits), reparented (moved). +3. **Resolve any flagged placements** in the **Merge Reviews** screen (below) — the + merge/graft routes anything it couldn't place confidently there, rather than + guessing. +4. **Approve individual changes**, or **Approve All Pending** once you've vetted the + flagged items. +5. **Apply** when satisfied. -### Current Workflow +### Confidence scores -1. **Triage by confidence** — Start with the Ambiguity Report, sorted by lowest confidence first -2. **Use Tree Preview** — Get a structural overview before diving into details -3. **Spot-check by branch** — Use browser search (Ctrl+F) in the Tree Preview or Diff view to find specific clades -4. **Approve in bulk** — After reviewing ambiguities, use "Approve All Pending" for remaining items +Flagged placements carry an anchor strength (0–100%). Prioritize accordingly: -### Current Limitations +| Strength | Risk | Recommended action | +|----------|------|--------------------| +| **80–100%** | Low | Generally safe; spot-check a few | +| **50–79%** | Medium | Review the shared SNPs make sense for the branch | +| **20–49%** | High | Manual verification against known phylogeny | +| **0–19%** | Very high | Likely wrong; defer or research before accepting | -The following features are not yet available but are on the roadmap: - -| Desired Feature | Current Workaround | -|-----------------|-------------------| -| Filter diff by branch (e.g., "only R1b") | Use Ctrl+F in Tree Preview or Diff view | -| Bulk approve by subclade | Review ambiguities, then "Approve All Pending" | -| Assign branches to expert curators | Coordinate manually; add comments noting who reviewed what | -| Export diff to spreadsheet | Copy Tree Preview text to external tools | - -### Recommended Review Strategy for 5,000+ Changes - -1. **Don't review every change** — Focus on ambiguities and structural changes (reparents) -2. **Trust high-confidence placements** — Scores above 0.80 rarely need individual review -3. **Divide by expertise** — If multiple curators are available, coordinate by major branch: - - "I'll review everything under R1b" - - "You take the E-M96 subtree" -4. **Use comments** — Add comments to the change set noting what you reviewed -5. **Time-box your review** — Set a limit (e.g., 2 hours) then assess if more review is needed - -> **Feature Requests:** If you need filtering by branch or bulk approval by subclade, please submit a feature request. These are high-priority UX improvements under consideration. +What affects it: SNP overlap (more shared defining variants = stronger), conflicting +variants, tree depth, and source completeness. --- -## Conflict Resolution - -Beyond simply approving or skipping changes, you can now create **resolutions** to correct merge algorithm decisions before applying to production. +## Merge Reviews screen (`/curator/reviews`) -### Resolution Types +This is where you resolve the items the SNP-graft/merge staged for a human — the +`tree.wip_*` rows: SNP-graft Phase-4 flags, name collisions, and graft-blocked +branches. -| Type | Description | Use Case | -|------|-------------|----------| -| **REPARENT** | Change the parent of a node | Algorithm placed node under wrong parent | -| **EDIT_VARIANTS** | Add or remove variant associations | Missing or incorrect SNP assignments | -| **MERGE_EXISTING** | Map WIP node to existing production node | Duplicate detection — don't create, link instead | -| **DEFER** | Move to manual review queue | Needs expert review or more research | +### Left panel — the worklist -### Creating Resolutions +The staged items, filterable by **status** and **category**. Each row shows the +source, node name, category, best anchor, and any resolution already chosen. -Resolutions are created via the API. Each resolution targets either a WIP haplogroup (new node) or a WIP reparent (move operation). +### Right panel — one item's context + resolution form -#### REPARENT Resolution +Selecting an item loads its panel (`/curator/reviews/:wip_id/panel`) with the full +decision context: +- The reason it was flagged and its **category** +- **Best anchor** + **anchor strength %**, and the candidate anchor nodes (with hit + counts) +- Defining-SNP counts (and how many are known to the foundation) +- The source parent and its status; whether it's backbone +- The **tentative parent** and a preview of where it would land (that parent's + current children) +- Open / resolved / deferred counts for the parent change set -When the algorithm placed a node under the wrong parent: +### Resolving an item -```bash -# Via curl (example) -curl -X POST /curator/change-sets/123/resolve/reparent \ - -d "wipHaplogroupId=456" \ - -d "newParentId=789" \ - -d "notes=Source data shows this should be under R-M269" -``` +`POST /curator/reviews/:wip_id/resolve` with an `action`, an optional `target` +(a **node name**, resolved server-side — not a numeric id), and `notes`: -Parameters: -- `wipHaplogroupId` or `wipReparentId` — What to resolve (one required) -- `newParentId` or `newParentPlaceholderId` — New parent (one required) -- `notes` — Explanation for audit trail +| Action | `target` | Use case | +|--------|----------|----------| +| **REPARENT** | new parent's name (**required**) | Confirm the suggested anchor, or choose a different parent | +| **MERGE_EXISTING** | existing node's name (**required**) | The staged node duplicates a production node — link instead of creating | +| **DEFER** | — | Needs more research; excluded from Apply until resolved | -#### EDIT_VARIANTS Resolution +An unknown `target` name is rejected with a notice (no node is created from a typo). +Decisions are written to `wip_resolution` and attributed to you. -When variant associations need correction: +### Applying resolutions -```bash -curl -X POST /curator/change-sets/123/resolve/edit-variants \ - -d "wipHaplogroupId=456" \ - -d 'variantsToAdd=[101, 102, 103]' \ - -d 'variantsToRemove=[50]' \ - -d "notes=Adding missing defining SNPs per ISOGG" -``` +`POST /curator/reviews/:wip_id/apply` bumps the parent change set to UNDER_REVIEW +and runs the **same tested change-set apply engine**, enacting your resolutions. It +reports created / variant-edits / skipped counts. Deferred items are skipped and +remain in the worklist. -Parameters: -- `wipHaplogroupId` or `wipReparentId` — What to resolve -- `variantsToAdd` — JSON array of variant IDs to add -- `variantsToRemove` — JSON array of variant IDs to remove -- `notes` — Explanation - -#### MERGE_EXISTING Resolution - -When a WIP node duplicates an existing production node: - -```bash -curl -X POST /curator/change-sets/123/resolve/merge-existing \ - -d "wipHaplogroupId=456" \ - -d "mergeTargetId=200" \ - -d "notes=R-M269 already exists as ID 200" -``` - -This prevents creating a duplicate — the WIP node's relationships will be redirected to the existing production node. - -#### DEFER Resolution - -When an item needs expert review before deciding: +--- -```bash -curl -X POST /curator/change-sets/123/resolve/defer \ - -d "wipHaplogroupId=456" \ - -d "priority=HIGH" \ - -d "reason=Disputed placement - needs phylogeny expert review" \ - -d "notes=See ISOGG discussion thread #4521" -``` +## Reviewing large change sets -Priority levels: `LOW`, `NORMAL`, `HIGH`, `CRITICAL` +Major source updates produce thousands of changes. A systematic pass: -Deferred items are excluded from Apply until resolved. +1. **Triage by anchor strength** — handle the lowest-confidence flagged items first + in Merge Reviews. +2. **Read the diff** for structural changes (reparents) before bulk-approving. +3. **Search** — use the browser find (Ctrl/Cmd-F) to jump to specific clades in the + diff. +4. **Approve in bulk** — after resolving the flagged items, use **Approve All + Pending** for the rest. -### Viewing Resolutions +Recommended strategy for 5,000+ changes: +- Don't review every change — focus on flagged items and reparents. +- Trust high-strength placements (>80%). +- Divide by expertise across curators (e.g. "I'll take R1b, you take E-M96"); use + change-set **comments** to note who reviewed what. +- Time-box the review, then reassess. -**All resolutions for a change set:** -``` -GET /curator/change-sets/123/resolutions -``` +--- -**Deferred items only:** -``` -GET /curator/change-sets/123/deferred -``` +## Applying to production -### Cancelling a Resolution +When you're satisfied: +1. Ensure flagged items are resolved (or deferred) in Merge Reviews. +2. Click **Apply** on the change set. +3. Confirm. -If you created a resolution by mistake: +The approved changes are applied to the live tree, the status becomes **APPLIED**, +and an audit record (`promoted_by`/`promoted_at`) is written. -```bash -curl -X DELETE /curator/change-sets/123/resolutions/999 -``` +## Discarding a change set -This sets the resolution status to `CANCELLED`, effectively removing it. +If the changes shouldn't be applied, click **Discard** and enter a reason. Common +reasons: data-quality issues, superseded by a newer merge, or a test merge. -### Resolution Workflow +--- -1. **During review**, identify problematic placements via the Ambiguity Report or Tree Preview -2. **Create resolutions** for items needing correction -3. **View resolutions** to verify all corrections are in place -4. **Apply** — the system applies your resolutions during promotion: - - REPARENT: Uses your specified parent instead of the original - - EDIT_VARIANTS: Adds/removes variants after node creation - - MERGE_EXISTING: Skips node creation, remaps relationships - - DEFER: Skips the item entirely (remains in WIP) -5. **Resolution status** is updated to `APPLIED` after successful processing +## Machine / scripted access (management API) -### API Summary +The interactive screens above are for curators. A separate **management API** +(under `/manage/*`, X-API-Key) drives the same lifecycle for automation — e.g. the +tree-init/graft tooling: | Endpoint | Method | Description | |----------|--------|-------------| -| `/curator/change-sets/:id/resolutions` | GET | List all resolutions | -| `/curator/change-sets/:id/deferred` | GET | List deferred items | -| `/curator/change-sets/:id/resolve/reparent` | POST | Create REPARENT resolution | -| `/curator/change-sets/:id/resolve/edit-variants` | POST | Create EDIT_VARIANTS resolution | -| `/curator/change-sets/:id/resolve/merge-existing` | POST | Create MERGE_EXISTING resolution | -| `/curator/change-sets/:id/resolve/defer` | POST | Create DEFER resolution | -| `/curator/change-sets/:csId/resolutions/:rId` | DELETE | Cancel a resolution | - -**Required Permission:** `tree.version.review` +| `/manage/haplogroups/merge` · `/merge/preview` | POST | Run / preview a merge | +| `/manage/change-sets` | GET / POST | List / create change sets | +| `/manage/change-sets/:id` | GET | Change-set detail | +| `/manage/change-sets/:id/changes` | POST | Add a change | +| `/manage/change-sets/:id/diff` | GET | Full diff (JSON) | +| `/manage/change-sets/:id/{start-review,approve-all,apply,discard}` | POST | Lifecycle | +| `/manage/change-sets/:id/changes/:change_id/review` | POST | Review one change | --- -## Applying to Production - -When you're satisfied with the review: +## Best practices -1. Ensure all high-priority items have been reviewed -2. Click **Apply to Production** -3. Confirm in the dialog - -**What happens:** -- All approved changes are applied to the live tree -- Users will see the updated tree structure -- Change set status becomes "Applied" -- An audit record is created - -**Required Permission:** `tree.version.promote` +- **Before reviewing:** check the source (trusted authority?) and the scale. +- **During review:** start with the lowest-confidence flagged items; read the diff + for unexpected reparents. +- **Before applying:** have another curator spot-check large sets; apply outside + peak usage. --- -## Discarding a Change Set - -If the changes should not be applied: +## Workflow example -1. Click **Discard** (red button) -2. Enter a reason (required, minimum 10 characters) -3. Click **Confirm Discard** +**Scenario:** an ISOGG update with thousands of new nodes. -**Common reasons to discard:** -- Data quality issues discovered -- Superseded by a newer merge -- Test merge that was never intended for production - -**Required Permission:** `tree.version.discard` - ---- - -## Best Practices - -### Before Reviewing - -- Check when the change set was created -- Review the source - is this a trusted authority? -- Note the scale - more changes = more careful review needed - -### During Review - -- Start with the ambiguity report -- Focus on low-confidence placements first -- Use the diff view to understand structural changes -- Look for unexpected reparenting operations - -### Before Applying - -- Have another curator spot-check large change sets -- Verify the merge statistics look reasonable -- Consider the timing (avoid applying during peak usage) - ---- - -## Workflow Example - -**Scenario:** ISOGG monthly update with 7,537 new nodes - -1. Receive notification that change set "isogg-2025-12" is ready -2. Navigate to Change Sets dashboard -3. Click the new change set to see details: - - 7,537 nodes created - - 2,695 nodes updated - - 684 ambiguities detected -4. Click "Start Review" -5. View the tree preview at `/curator/change-sets/{id}/tree-preview` to understand the structural changes -6. Click "View Report" to handle the 684 ambiguities -7. Review each ambiguity: - - Most are low-risk automatic placements (approve) - - Some need manual verification (check in tree view) - - A few should be skipped (data quality issues) -8. **Create resolutions** for items needing correction: - - Use REPARENT to fix incorrect parent placements - - Use EDIT_VARIANTS to add missing SNPs - - Use MERGE_EXISTING for duplicate nodes - - Use DEFER for items needing expert research -9. Return to detail panel -10. Click "View Full Diff" to spot-check changes -11. Check `/curator/change-sets/{id}/resolutions` to verify all corrections are in place -12. Click "Approve All Pending" for remaining items -13. Click "Apply to Production" -14. Confirm in the dialog -15. Done! Check the tree explorer to verify +1. A change set appears as **READY_FOR_REVIEW** in `/curator/change-sets`. +2. Open it; read the summary (e.g. *N created, M reparented*) and the diff. +3. Click **Start Review** (→ UNDER_REVIEW). +4. Switch to `/curator/reviews` and work the flagged worklist: + - **REPARENT** to fix or confirm a parent, + - **MERGE_EXISTING** for duplicates, + - **DEFER** items needing research. +5. **Apply** the resolutions from a review item (enacts them via the change-set + apply engine). +6. Back in `/curator/change-sets`, spot-check the diff and **Approve All Pending**. +7. **Apply** the change set → **APPLIED**. +8. Verify in the tree explorer (`/ytree` / `/mtree`). --- ## Troubleshooting -| Symptom | Possible Cause | Solution | +| Symptom | Possible cause | Solution | |---------|----------------|----------| -| "No change sets found" | Filters hiding results | Reset filters to "All Types" and "All Statuses" | -| "No change sets found" | Missing permissions | Request `tree.version.view` from Admin | -| "No change sets found" | No recent merges | Check with data team if merges are scheduled | -| "Apply" button disabled | Change set already applied | Check status — if "Applied", no action needed | -| "Apply" button disabled | Missing permissions | Request `tree.version.promote` from Admin | -| "Apply" button disabled | Unresolved ambiguities | View Ambiguity Report and resolve all items | -| "Discard" button not visible | Missing permissions | Request `tree.version.discard` from Admin | -| Changes not showing in production | Browser cache | Open in private/incognito window or clear cache | -| Changes not showing in production | Page not refreshed | Refresh the tree explorer page | -| Changes not showing in production | Not yet applied | Verify change set status is "Applied" | -| Tree structure looks wrong | Viewing cached data | Hard refresh (Ctrl+Shift+R / Cmd+Shift+R) | -| Tree structure looks wrong | Merge had errors | Check merge logs and ambiguity report | -| Ambiguity count seems high | Large structural changes | Normal for major source updates — review systematically | -| Resolution not applied | Status still "PENDING" | Check if Apply was run; resolutions apply during promotion | -| Resolution API returns 400 | Missing required fields | Ensure wipHaplogroupId or wipReparentId is provided | -| Deferred items still visible | Change set re-applied | Deferred items remain in WIP until manually resolved | -| Cannot create resolution | Missing permissions | Request `tree.version.review` from Admin | - ---- - -## Permissions Summary - -| Action | Permission Required | -|--------|---------------------| -| View change sets | `tree.version.view` | -| View resolutions | `tree.version.view` | -| Review changes | `tree.version.review` | -| Create resolutions | `tree.version.review` | -| Cancel resolutions | `tree.version.review` | -| Apply to production | `tree.version.promote` | -| Discard change set | `tree.version.discard` | - -Contact an administrator if you need additional permissions. +| "No change sets found" | Status filter hiding results | Reset the status filter | +| Can't reach the screen | Not a curator | Need the `Curator` / `TreeCurator` / `Admin` role | +| **Apply** unavailable | Already APPLIED | No action needed | +| **Apply** unavailable | Wrong status | Apply needs READY_FOR_REVIEW or UNDER_REVIEW (Start Review first) | +| Resolve returns a notice | Unknown target node name | Use an existing node's exact name | +| REPARENT/MERGE rejected | No `target` given | REPARENT and MERGE_EXISTING both require a target node name | +| Deferred items still listed | By design | Deferred items stay in the worklist until resolved | +| Changes not showing in production | Browser cache / not applied | Hard-refresh; confirm status is APPLIED | --- -## Related Documentation +## Related documentation -- [Tree Versioning System (Technical)](planning/tree-versioning-system.md) - Architecture and implementation details -- [Conflict Resolution System (Technical)](planning/conflict-resolution-system.md) - Resolution types and data model -- [Haplogroup Discovery System](planning/haplogroup-discovery-system.md) - How user observations propose new branches +- [Tree Versioning System (technical)](planning/tree-versioning-system.md) — architecture and data model. +- [Haplogroup Discovery System](planning/haplogroup-discovery-system.md) — how observations propose new branches. +- [`rust/README.md`](../rust/README.md) — the curator suite, merge/SNP-graft, and the management API in context. diff --git a/documents/decoding-us-Y-curator-review.json b/documents/decoding-us-Y-curator-review.json new file mode 100644 index 00000000..84c96fda --- /dev/null +++ b/documents/decoding-us-Y-curator-review.json @@ -0,0 +1,6963 @@ +{ + "source": "decoding-us", + "dna": "Y_DNA", + "summary": { + "weak_plurality": 125, + "parent_inconsistent": 171, + "name_collision": 2, + "graft_blocked": 130, + "total": 298 + }, + "items": [ + { + "node": "Y", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "C2b1a2b2b~", + "anchor_strength": 0.16666666666666666, + "candidates": [ + { + "node": "C2b1a2b2b~", + "hits": 3 + }, + { + "node": "G2a2b2a1a1c1a1a2a1a1a2b~", + "hits": 3 + }, + { + "node": "I1", + "hits": 3 + }, + { + "node": "O1b1a1a1a1a1b1a1", + "hits": 3 + }, + { + "node": "O1b1a1a1a1b1a1a1a1a1", + "hits": 3 + } + ], + "defining_snp_count": 1131, + "snps_known_to_foundation": 18, + "source_parent": null, + "source_parent_status": "(root)", + "is_backbone": true + }, + { + "node": "A0-T", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "A00", + "anchor_strength": 0.9090909090909091, + "candidates": [ + { + "node": "A00", + "hits": 30 + }, + { + "node": "J2b", + "hits": 3 + } + ], + "defining_snp_count": 1308, + "snps_known_to_foundation": 33, + "source_parent": "Y", + "source_parent_status": "flag_weak", + "is_backbone": true + }, + { + "node": "A0", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "A0", + "anchor_strength": 0.75, + "candidates": [ + { + "node": "A0", + "hits": 84 + }, + { + "node": "D1", + "hits": 3 + }, + { + "node": "E1b1b1b2a1a1a1a1b1~", + "hits": 3 + }, + { + "node": "G", + "hits": 3 + }, + { + "node": "I1", + "hits": 3 + } + ], + "defining_snp_count": 2329, + "snps_known_to_foundation": 109, + "source_parent": "A0-T", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": true + }, + { + "node": "A0-FTA5785", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I2a", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "I2a", + "hits": 3 + }, + { + "node": "I2a1b1a1a1a2~", + "hits": 3 + }, + { + "node": "Q1b1a1a1", + "hits": 3 + } + ], + "defining_snp_count": 1284, + "snps_known_to_foundation": 9, + "source_parent": "A0", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "A0-FTA5788", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1a1a1b1a1a1b~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1a1a1b1a1a1b~", + "hits": 3 + } + ], + "defining_snp_count": 297, + "snps_known_to_foundation": 3, + "source_parent": "A0-FTA5785", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "A1", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "A00", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "A00", + "hits": 3 + }, + { + "node": "A1", + "hits": 3 + }, + { + "node": "C1a2", + "hits": 3 + } + ], + "defining_snp_count": 573, + "snps_known_to_foundation": 9, + "source_parent": "A0-T", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": true + }, + { + "node": "A1a", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "A00", + "anchor_strength": 0.10714285714285714, + "candidates": [ + { + "node": "A00", + "hits": 3 + }, + { + "node": "I1a10b2b2~", + "hits": 3 + }, + { + "node": "I1a2a1a1a2b1b~", + "hits": 3 + }, + { + "node": "I2a1a2a1b", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1b1a1a1a~", + "hits": 3 + } + ], + "defining_snp_count": 3076, + "snps_known_to_foundation": 28, + "source_parent": "A1", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "A1b", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "A1b", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "A1b", + "hits": 3 + } + ], + "defining_snp_count": 129, + "snps_known_to_foundation": 3, + "source_parent": "A1", + "source_parent_status": "flag_weak", + "is_backbone": true + }, + { + "node": "A1b1-M10831", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "A1b1a1", + "anchor_strength": 0.3917525773195876, + "candidates": [ + { + "node": "A1b1a1", + "hits": 38 + }, + { + "node": "A1b1a", + "hits": 12 + }, + { + "node": "A1b1a1a2a1a", + "hits": 9 + }, + { + "node": "A1b1a1a2b~", + "hits": 6 + }, + { + "node": "G2a2a1a5", + "hits": 3 + } + ], + "defining_snp_count": 2775, + "snps_known_to_foundation": 97, + "source_parent": "A1b1", + "source_parent_status": "matched→A1b1 (100%)", + "is_backbone": false + }, + { + "node": "A1b1-M9429", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "A1b1b2", + "anchor_strength": 0.2608695652173913, + "candidates": [ + { + "node": "A1b1b2", + "hits": 12 + }, + { + "node": "A1b1b2b2~", + "hits": 7 + }, + { + "node": "A1b1b2b", + "hits": 5 + }, + { + "node": "A1b1b", + "hits": 3 + }, + { + "node": "I1a1b1a4a2f1a1a7b~", + "hits": 3 + } + ], + "defining_snp_count": 582, + "snps_known_to_foundation": 43, + "source_parent": "A1b1", + "source_parent_status": "matched→A1b1 (100%)", + "is_backbone": false + }, + { + "node": "A1b1-M9427", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "A1b1b2", + "anchor_strength": 0.45, + "candidates": [ + { + "node": "A1b1b2", + "hits": 9 + }, + { + "node": "A1b1b2b2~", + "hits": 5 + }, + { + "node": "E1b1a1a1a1c1b2a3a1~", + "hits": 3 + }, + { + "node": "H1a2a3~", + "hits": 3 + } + ], + "defining_snp_count": 527, + "snps_known_to_foundation": 20, + "source_parent": "A1b1-M9429", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "A1b1-M9431", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "A1b1b2a", + "anchor_strength": 0.16279069767441862, + "candidates": [ + { + "node": "A1b1b2a", + "hits": 7 + }, + { + "node": "B2b1a2a~", + "hits": 3 + }, + { + "node": "E1b1a1a1a1c1a1a3d6a", + "hits": 3 + }, + { + "node": "E1b1b1", + "hits": 3 + }, + { + "node": "E1b1b1b2a1b1~", + "hits": 3 + } + ], + "defining_snp_count": 1063, + "snps_known_to_foundation": 43, + "source_parent": "A1b1-M9427", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "A1b1-M9428", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "H2", + "anchor_strength": 0.3, + "candidates": [ + { + "node": "H2", + "hits": 3 + }, + { + "node": "Q1b2b1b2~", + "hits": 3 + }, + { + "node": "B2b3~", + "hits": 2 + }, + { + "node": "I2a2a1a1a2", + "hits": 2 + } + ], + "defining_snp_count": 231, + "snps_known_to_foundation": 10, + "source_parent": "A1b1-M9431", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "A1b1-V193", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "A1b1b2b", + "anchor_strength": 0.42028985507246375, + "candidates": [ + { + "node": "A1b1b2b", + "hits": 29 + }, + { + "node": "A1b1b2b2~", + "hits": 12 + }, + { + "node": "A1b1b2b3~", + "hits": 3 + }, + { + "node": "C1a1", + "hits": 3 + }, + { + "node": "E1a1a2b", + "hits": 3 + } + ], + "defining_snp_count": 1104, + "snps_known_to_foundation": 66, + "source_parent": "A1b1-M9427", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "A1b1-FGC40000", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a1a1c1a1a2a1a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G2a2b2a1a1c1a1a2a1a1a", + "hits": 3 + } + ], + "defining_snp_count": 102, + "snps_known_to_foundation": 3, + "source_parent": "A1b1-V5912", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "B-M8677", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "B2", + "anchor_strength": 0.9810924369747899, + "candidates": [ + { + "node": "B2", + "hits": 467 + }, + { + "node": "E1b1a1a1a1c1a1a3a1d1b1b1a5~", + "hits": 3 + }, + { + "node": "H1a1a1b", + "hits": 3 + }, + { + "node": "J2b2a", + "hits": 3 + } + ], + "defining_snp_count": 694, + "snps_known_to_foundation": 473, + "source_parent": "B-M8675", + "source_parent_status": "matched→B~ (100%)", + "is_backbone": false + }, + { + "node": "B-M6529", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "B2b1b1a", + "anchor_strength": 0.14285714285714285, + "candidates": [ + { + "node": "B2b1b1a", + "hits": 3 + }, + { + "node": "H1a2a3~", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a4a3a1b1b1~", + "hits": 3 + }, + { + "node": "O1b1a1a1b1a2", + "hits": 3 + }, + { + "node": "O2a2b1a1a1a1a1a1a1", + "hits": 3 + } + ], + "defining_snp_count": 1010, + "snps_known_to_foundation": 18, + "source_parent": "B-M7104", + "source_parent_status": "matched→B2b1b~ (96%)", + "is_backbone": false + }, + { + "node": "B-M6843", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "B2b1b1b~", + "anchor_strength": 0.375, + "candidates": [ + { + "node": "B2b1b1b~", + "hits": 9 + }, + { + "node": "C1b1a2b", + "hits": 3 + }, + { + "node": "C2a1b", + "hits": 3 + }, + { + "node": "E1b1b1a1a1b1a2a1~", + "hits": 3 + }, + { + "node": "N1a1a1a1a1a1a1a7b1~", + "hits": 3 + } + ], + "defining_snp_count": 909, + "snps_known_to_foundation": 24, + "source_parent": "B-M7104", + "source_parent_status": "matched→B2b1b~ (96%)", + "is_backbone": false + }, + { + "node": "B-Z5058", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "B3", + "anchor_strength": 0.9916247906197655, + "candidates": [ + { + "node": "B3", + "hits": 1184 + }, + { + "node": "C", + "hits": 3 + }, + { + "node": "I1a10b2~", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1e3e3~", + "hits": 3 + }, + { + "node": "H3a2b", + "hits": 1 + } + ], + "defining_snp_count": 1708, + "snps_known_to_foundation": 1188, + "source_parent": "B-M8675", + "source_parent_status": "matched→B~ (100%)", + "is_backbone": false + }, + { + "node": "B-Z22657", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "B3b~", + "anchor_strength": 0.2857142857142857, + "candidates": [ + { + "node": "B3b~", + "hits": 6 + }, + { + "node": "G2a1b2a", + "hits": 3 + }, + { + "node": "I1c1b1~", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b1~", + "hits": 3 + }, + { + "node": "N1a1a1a1a1a2a1a2e~", + "hits": 3 + } + ], + "defining_snp_count": 455, + "snps_known_to_foundation": 21, + "source_parent": "B-Z5058", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "B-BY14692", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "D1a2b", + "anchor_strength": 0.13636363636363635, + "candidates": [ + { + "node": "D1a2b", + "hits": 3 + }, + { + "node": "E1b1a1a1a1a4~", + "hits": 3 + }, + { + "node": "I2a1a1a1a1a3a~", + "hits": 3 + }, + { + "node": "O2a2a1a2a1", + "hits": 3 + }, + { + "node": "Q2a1a1a1a", + "hits": 3 + } + ], + "defining_snp_count": 1815, + "snps_known_to_foundation": 19, + "source_parent": "B", + "source_parent_status": "matched→B~ (96%)", + "is_backbone": false + }, + { + "node": "B-V1019", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a1g1", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "R1b1a1b1a1a1g1", + "hits": 2 + }, + { + "node": "I1a2a1a1d1a3~", + "hits": 1 + }, + { + "node": "I1a3b1a1", + "hits": 1 + } + ], + "defining_snp_count": 99, + "snps_known_to_foundation": 3, + "source_parent": "B-BY14692", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "C-F5621", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "C2a", + "anchor_strength": 0.4230769230769231, + "candidates": [ + { + "node": "C2a", + "hits": 165 + }, + { + "node": "C2a1a2a", + "hits": 150 + }, + { + "node": "C2a1a2", + "hits": 30 + }, + { + "node": "C2a1a", + "hits": 14 + }, + { + "node": "C2a1a2a1a1", + "hits": 7 + } + ], + "defining_snp_count": 744, + "snps_known_to_foundation": 384, + "source_parent": "C-CTS93", + "source_parent_status": "matched→C2 (98%)", + "is_backbone": false + }, + { + "node": "C-F3836", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "C2b1a1a", + "anchor_strength": 0.4375, + "candidates": [ + { + "node": "C2b1a1a", + "hits": 21 + }, + { + "node": "C2b1a1a1", + "hits": 15 + }, + { + "node": "C2b1a1", + "hits": 12 + } + ], + "defining_snp_count": 84, + "snps_known_to_foundation": 48, + "source_parent": "C-Z1300", + "source_parent_status": "matched→C2b1a (100%)", + "is_backbone": false + }, + { + "node": "C-F14880", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "C1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C1a1", + "hits": 1 + } + ], + "defining_snp_count": 174, + "snps_known_to_foundation": 1, + "source_parent": "C-A5925", + "source_parent_status": "matched→C2b1a1a1b~ (100%)", + "is_backbone": false + }, + { + "node": "F", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "C", + "hits": 3 + }, + { + "node": "F", + "hits": 3 + } + ], + "defining_snp_count": 440, + "snps_known_to_foundation": 6, + "source_parent": "CF", + "source_parent_status": "matched→CF (100%)", + "is_backbone": true + }, + { + "node": "F1", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I1a2a2a4b1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I1a2a2a4b1~", + "hits": 3 + } + ], + "defining_snp_count": 87, + "snps_known_to_foundation": 3, + "source_parent": "F", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "GHIJK", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "GHIJK", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "GHIJK", + "hits": 6 + } + ], + "defining_snp_count": 9, + "snps_known_to_foundation": 6, + "source_parent": "F", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": true + }, + { + "node": "G-FT344950", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "G2a1a1a1a1a1a1a2a1", + "anchor_strength": 0.22727272727272727, + "candidates": [ + { + "node": "G2a1a1a1a1a1a1a2a1", + "hits": 20 + }, + { + "node": "G2a1a1a1a1", + "hits": 17 + }, + { + "node": "G2a1a1a1a1a1a", + "hits": 9 + }, + { + "node": "G2a1a1a1a", + "hits": 8 + }, + { + "node": "G2a1a1a1", + "hits": 6 + } + ], + "defining_snp_count": 174, + "snps_known_to_foundation": 88, + "source_parent": "G-Z6616", + "source_parent_status": "matched→G2a1a1a (97%)", + "is_backbone": false + }, + { + "node": "G-Z3065", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a1a", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "G2a2b2a1a", + "hits": 3 + }, + { + "node": "G2a2b2a1a1", + "hits": 3 + } + ], + "defining_snp_count": 9, + "snps_known_to_foundation": 6, + "source_parent": "G-PF3331", + "source_parent_status": "matched→G2a2b2a1 (100%)", + "is_backbone": false + }, + { + "node": "G-Z41649", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "G2a2b2a1a1b1a1a2b1b1a", + "anchor_strength": 0.391304347826087, + "candidates": [ + { + "node": "G2a2b2a1a1b1a1a2b1b1a", + "hits": 9 + }, + { + "node": "G2a2b2a1a1b1a1a2b1b1a2~", + "hits": 6 + }, + { + "node": "G2a2b2a1a1b1a1a2b1", + "hits": 3 + }, + { + "node": "G2a2b2a1a1b1a1a2b1b", + "hits": 3 + }, + { + "node": "G2a2b2a1a1b1a1a2b", + "hits": 2 + } + ], + "defining_snp_count": 132, + "snps_known_to_foundation": 23, + "source_parent": "G-CTS35", + "source_parent_status": "matched→G2a2b2a1a1b1a1a2 (100%)", + "is_backbone": false + }, + { + "node": "G-S12047", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "G2a2b2a1a1b1a1a1", + "anchor_strength": 0.47058823529411764, + "candidates": [ + { + "node": "G2a2b2a1a1b1a1a1", + "hits": 8 + }, + { + "node": "G2a2b2a1a1b1a1a1a", + "hits": 6 + }, + { + "node": "G2a2b2a1a1b1a1a1a1", + "hits": 3 + } + ], + "defining_snp_count": 27, + "snps_known_to_foundation": 17, + "source_parent": "G-CTS2230", + "source_parent_status": "matched→G2a2b2a1a1b1a1a (100%)", + "is_backbone": false + }, + { + "node": "G-Y38189", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "G2a2b2a1a1a1a1a1a1a1a1", + "anchor_strength": 0.28205128205128205, + "candidates": [ + { + "node": "G2a2b2a1a1a1a1a1a1a1a1", + "hits": 11 + }, + { + "node": "G2a2b2a1a1a1a1a1a1a1a", + "hits": 9 + }, + { + "node": "G2a2b2a1a1a1a1a1a1a1a1a", + "hits": 6 + }, + { + "node": "G2a2b2a1a1a1a1a1a1a", + "hits": 5 + }, + { + "node": "G2a2b2a1a1a1a1a1a1a1", + "hits": 5 + } + ], + "defining_snp_count": 60, + "snps_known_to_foundation": 37, + "source_parent": "G-Z6158", + "source_parent_status": "matched→G2a2b2a1a1a1a (72%)", + "is_backbone": false + }, + { + "node": "G-Z27232", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "G2a2b2a1a1c1a1a2a1", + "anchor_strength": 0.42857142857142855, + "candidates": [ + { + "node": "G2a2b2a1a1c1a1a2a1", + "hits": 6 + }, + { + "node": "G2a2b2a1a1c1a1a2a1a", + "hits": 5 + }, + { + "node": "E1b1a1a1a1c1b2a1a", + "hits": 3 + } + ], + "defining_snp_count": 12, + "snps_known_to_foundation": 11, + "source_parent": "G-Z3292", + "source_parent_status": "matched→G2a2b2a1a1c1a1a2 (100%)", + "is_backbone": false + }, + { + "node": "H-PH24", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "H3b2", + "anchor_strength": 0.3, + "candidates": [ + { + "node": "H3b2", + "hits": 9 + }, + { + "node": "H3b2a~", + "hits": 9 + }, + { + "node": "E1b1a1", + "hits": 3 + }, + { + "node": "E1b1b1a1b2a4c1a1a1a3~", + "hits": 3 + }, + { + "node": "Q1b1a1a1e1b1a~", + "hits": 3 + } + ], + "defining_snp_count": 521, + "snps_known_to_foundation": 30, + "source_parent": "H-Z13871", + "source_parent_status": "matched→H3b (100%)", + "is_backbone": false + }, + { + "node": "H-Z34945", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C2a1a1b1", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "C2a1a1b1", + "hits": 3 + }, + { + "node": "H3b2a~", + "hits": 3 + } + ], + "defining_snp_count": 105, + "snps_known_to_foundation": 6, + "source_parent": "H-PH24", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "H-Y27295", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "G2a1a1a1a1a1a1a1a2a2~", + "anchor_strength": 0.42857142857142855, + "candidates": [ + { + "node": "G2a1a1a1a1a1a1a1a2a2~", + "hits": 3 + }, + { + "node": "H1a2a1~", + "hits": 3 + }, + { + "node": "R1b1a1a2", + "hits": 1 + } + ], + "defining_snp_count": 423, + "snps_known_to_foundation": 7, + "source_parent": "H-Z13996", + "source_parent_status": "matched→H1a2a (100%)", + "is_backbone": false + }, + { + "node": "H-Y25630", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "C1a2a", + "anchor_strength": 0.25, + "candidates": [ + { + "node": "C1a2a", + "hits": 3 + }, + { + "node": "H3a1", + "hits": 3 + }, + { + "node": "I1a2b4b1~", + "hits": 3 + }, + { + "node": "L1a1b3c~", + "hits": 3 + } + ], + "defining_snp_count": 526, + "snps_known_to_foundation": 12, + "source_parent": "H-Z13966", + "source_parent_status": "matched→H1a2a (98%)", + "is_backbone": false + }, + { + "node": "H-Z34660", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "H1a1a4b2a1", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "H1a1a4b2a1", + "hits": 6 + }, + { + "node": "E1b1a1", + "hits": 3 + }, + { + "node": "H1a1a4b2a1b~", + "hits": 3 + }, + { + "node": "I1", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a3b~", + "hits": 3 + } + ], + "defining_snp_count": 147, + "snps_known_to_foundation": 18, + "source_parent": "H-Z14448", + "source_parent_status": "matched→H1a1a4b2a (100%)", + "is_backbone": false + }, + { + "node": "H-FTA9381", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C2a", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "C2a", + "hits": 3 + }, + { + "node": "G2a2b2a1a1a1a1a1a1a5b~", + "hits": 3 + } + ], + "defining_snp_count": 189, + "snps_known_to_foundation": 6, + "source_parent": "H-FT282446", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "H-FT327094", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G1a1a2b1c", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "G1a1a2b1c", + "hits": 3 + }, + { + "node": "O1b1a1a1a1b1b1", + "hits": 3 + } + ], + "defining_snp_count": 203, + "snps_known_to_foundation": 6, + "source_parent": "H-FT313923", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "L-Z20500", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "L1a1b3a1a1", + "anchor_strength": 0.6666666666666666, + "candidates": [ + { + "node": "L1a1b3a1a1", + "hits": 2 + }, + { + "node": "L1a1b3a1a1a~", + "hits": 1 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "L-Z5933", + "source_parent_status": "matched→L1a1b3a1 (100%)", + "is_backbone": false + }, + { + "node": "L-FT178620", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "D1a1a1a1a1b", + "anchor_strength": 0.25, + "candidates": [ + { + "node": "D1a1a1a1a1b", + "hits": 3 + }, + { + "node": "G2a2b2a1a1b1a1a2c", + "hits": 3 + }, + { + "node": "I2a1a1a", + "hits": 3 + }, + { + "node": "I2a1a1a3~", + "hits": 3 + } + ], + "defining_snp_count": 327, + "snps_known_to_foundation": 9, + "source_parent": "L-Z20336", + "source_parent_status": "matched→L1a1 (95%)", + "is_backbone": false + }, + { + "node": "T-Z709", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "T1a1a1b", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "T1a1a1b", + "hits": 3 + }, + { + "node": "T1a1a1b2", + "hits": 3 + }, + { + "node": "T1a1a1b2b", + "hits": 3 + } + ], + "defining_snp_count": 18, + "snps_known_to_foundation": 9, + "source_parent": "T-FGC3954", + "source_parent_status": "matched→T1a1 (57%)", + "is_backbone": false + }, + { + "node": "T-CTS934", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "T1a1a1b2b2b1a", + "anchor_strength": 0.41379310344827586, + "candidates": [ + { + "node": "T1a1a1b2b2b1a", + "hits": 12 + }, + { + "node": "T1a1a1b2b2", + "hits": 11 + }, + { + "node": "Q1b1a1a1h1", + "hits": 3 + }, + { + "node": "T1a1a1b2b2b1", + "hits": 3 + } + ], + "defining_snp_count": 69, + "snps_known_to_foundation": 29, + "source_parent": "T-Z709", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "T-B251", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "Q1b1a1a2b2a1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "Q1b1a1a2b2a1~", + "hits": 3 + } + ], + "defining_snp_count": 132, + "snps_known_to_foundation": 3, + "source_parent": "T-CTS934", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "T-M11045", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1a1a1c1b2a~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "E1b1b1a1a1c1b2a~", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c2b2a1b6b", + "hits": 3 + } + ], + "defining_snp_count": 108, + "snps_known_to_foundation": 6, + "source_parent": "T-CTS6280", + "source_parent_status": "matched→T1a1a1b2b2b1a1a2 (100%)", + "is_backbone": false + }, + { + "node": "T-Y5289", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a1a1b1a1a1a1c3a2~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "G2a2b2a1a1b1a1a1a1c3a2~", + "hits": 3 + }, + { + "node": "T1a1a1b2b2b1a1a1", + "hits": 3 + } + ], + "defining_snp_count": 15, + "snps_known_to_foundation": 6, + "source_parent": "T-A22205", + "source_parent_status": "matched→T1a1a1b2b2b1a1a (100%)", + "is_backbone": false + }, + { + "node": "T-CTS629", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "T1a1a1b2b2b1a1a1c", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "T1a1a1b2b2b1a1a1c", + "hits": 3 + } + ], + "defining_snp_count": 96, + "snps_known_to_foundation": 3, + "source_parent": "T-Y5289", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "T-FGC29101", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "T1a2a~", + "anchor_strength": 0.4186046511627907, + "candidates": [ + { + "node": "T1a2a~", + "hits": 36 + }, + { + "node": "T1a2b", + "hits": 33 + }, + { + "node": "T1a2b1", + "hits": 6 + }, + { + "node": "C2a1a2a", + "hits": 3 + }, + { + "node": "Q2b2a1", + "hits": 3 + } + ], + "defining_snp_count": 186, + "snps_known_to_foundation": 86, + "source_parent": "T", + "source_parent_status": "matched→T (57%)", + "is_backbone": false + }, + { + "node": "K2a", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "NO", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "NO", + "hits": 18 + } + ], + "defining_snp_count": 21, + "snps_known_to_foundation": 18, + "source_parent": "K-M526", + "source_parent_status": "novel", + "is_backbone": true + }, + { + "node": "O-FT319264", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "H3b", + "anchor_strength": 0.6666666666666666, + "candidates": [ + { + "node": "H3b", + "hits": 6 + }, + { + "node": "E1b1a1a1a1c1a1a3d6a", + "hits": 3 + } + ], + "defining_snp_count": 58, + "snps_known_to_foundation": 9, + "source_parent": "O-Z23867", + "source_parent_status": "matched→O1b1a1a1a1b1b (100%)", + "is_backbone": false + }, + { + "node": "O-ACT740", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1a1b2a4c1~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "E1b1b1a1b2a4c1~", + "hits": 3 + }, + { + "node": "O2a2a", + "hits": 3 + } + ], + "defining_snp_count": 215, + "snps_known_to_foundation": 6, + "source_parent": "O-P201", + "source_parent_status": "matched→O2a2 (100%)", + "is_backbone": false + }, + { + "node": "O-MF654042", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "O2a2a1a", + "anchor_strength": 0.7, + "candidates": [ + { + "node": "O2a2a1a", + "hits": 21 + }, + { + "node": "O2a2a1", + "hits": 9 + } + ], + "defining_snp_count": 249, + "snps_known_to_foundation": 30, + "source_parent": "O-ACT740", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "O-Z25268", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1a1a1a1c1a1a3d6a", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "E1b1a1a1a1c1a1a3d6a", + "hits": 1 + }, + { + "node": "G2a2a1a2a1", + "hits": 1 + }, + { + "node": "R1b1a1b1a1a1c2b2a1b1a1b2b", + "hits": 1 + } + ], + "defining_snp_count": 207, + "snps_known_to_foundation": 3, + "source_parent": "O-Z25253", + "source_parent_status": "matched→O2a2a1a2a1b (100%)", + "is_backbone": false + }, + { + "node": "O-FT321875", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "O2a2a1a2a1a1", + "anchor_strength": 0.5384615384615384, + "candidates": [ + { + "node": "O2a2a1a2a1a1", + "hits": 7 + }, + { + "node": "I2", + "hits": 3 + }, + { + "node": "O2a2a1a2a1a", + "hits": 3 + } + ], + "defining_snp_count": 69, + "snps_known_to_foundation": 13, + "source_parent": "O-Z25253", + "source_parent_status": "matched→O2a2a1a2a1b (100%)", + "is_backbone": false + }, + { + "node": "O-M2775", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2c1a2b2b1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a2b2b1a", + "hits": 3 + } + ], + "defining_snp_count": 16, + "snps_known_to_foundation": 3, + "source_parent": "O-FT323782", + "source_parent_status": "matched→O2a2a1a2a1a1a2 (73%)", + "is_backbone": false + }, + { + "node": "O-F6280", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "O2a2b1a2a1a1b1b2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "O2a2b1a2a1a1b1b2", + "hits": 1 + } + ], + "defining_snp_count": 4, + "snps_known_to_foundation": 1, + "source_parent": "O-CTS3763", + "source_parent_status": "matched→O2a2b1a2a1a1b1b (100%)", + "is_backbone": false + }, + { + "node": "O-PF3228", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I1a2b1c1~", + "anchor_strength": 0.75, + "candidates": [ + { + "node": "I1a2b1c1~", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a5", + "hits": 1 + } + ], + "defining_snp_count": 255, + "snps_known_to_foundation": 4, + "source_parent": "O-Z25925", + "source_parent_status": "matched→O2a2b1a1a1a4a1 (100%)", + "is_backbone": false + }, + { + "node": "O-F14479", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "O2a2b1a1a1a4a2a1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "O2a2b1a1a1a4a2a1a1", + "hits": 1 + } + ], + "defining_snp_count": 5, + "snps_known_to_foundation": 1, + "source_parent": "O-CP086569.2:29543615 G->A", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "O-Z31492", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "O2a2b1a1a1a3a1", + "anchor_strength": 0.42857142857142855, + "candidates": [ + { + "node": "O2a2b1a1a1a3a1", + "hits": 9 + }, + { + "node": "O2a2b1a1a1a3a1a", + "hits": 9 + }, + { + "node": "G1a1a1b2a~", + "hits": 3 + } + ], + "defining_snp_count": 198, + "snps_known_to_foundation": 21, + "source_parent": "O-F14249", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "O-F14203", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "D2*", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "D2*", + "hits": 2 + } + ], + "defining_snp_count": 10, + "snps_known_to_foundation": 2, + "source_parent": "O-Z31492", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "O-F2", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "O2a1b1a1a1", + "anchor_strength": 0.4117647058823529, + "candidates": [ + { + "node": "O2a1b1a1a1", + "hits": 49 + }, + { + "node": "O2a1b1a1a1a", + "hits": 21 + }, + { + "node": "O2a1b1a1a", + "hits": 18 + }, + { + "node": "O2a1b1a1a1a1", + "hits": 16 + }, + { + "node": "O2a1b1a1", + "hits": 12 + } + ], + "defining_snp_count": 224, + "snps_known_to_foundation": 116, + "source_parent": "O-CP086569.2:5919079 T->C", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "O-M5420", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "E1b1", + "hits": 3 + }, + { + "node": "O2a1b1a1a1a1e", + "hits": 3 + } + ], + "defining_snp_count": 5, + "snps_known_to_foundation": 3, + "source_parent": "O-F2", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "O-A4899", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "A00", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "A00", + "hits": 3 + } + ], + "defining_snp_count": 11, + "snps_known_to_foundation": 3, + "source_parent": "O-F17", + "source_parent_status": "matched→O2a1b1a1a1a1a1a1 (91%)", + "is_backbone": false + }, + { + "node": "O-MF190873", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "G", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G", + "hits": 1 + } + ], + "defining_snp_count": 10, + "snps_known_to_foundation": 1, + "source_parent": "O-FT38156", + "source_parent_status": "matched→O2a1b (92%)", + "is_backbone": false + }, + { + "node": "N", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "N~", + "anchor_strength": 0.4516765285996055, + "candidates": [ + { + "node": "N~", + "hits": 229 + }, + { + "node": "N", + "hits": 197 + }, + { + "node": "N1", + "hits": 75 + }, + { + "node": "N1a1a1a1a1a2a", + "hits": 3 + }, + { + "node": "R1a1a1b2a1a2c2d2a~", + "hits": 3 + } + ], + "defining_snp_count": 1212, + "snps_known_to_foundation": 501, + "source_parent": "NO", + "source_parent_status": "matched→NO1 (84%)", + "is_backbone": true + }, + { + "node": "N-Z19801", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "N1a1a1a1", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "N1a1a1a1", + "hits": 24 + }, + { + "node": "N1a1a1a1a", + "hits": 24 + } + ], + "defining_snp_count": 65, + "snps_known_to_foundation": 33, + "source_parent": "N-Z4745", + "source_parent_status": "matched→N1a1a1a1a (75%)", + "is_backbone": false + }, + { + "node": "N-Z1922", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "N1a1a1a1a2a1", + "anchor_strength": 0.38095238095238093, + "candidates": [ + { + "node": "N1a1a1a1a2a1", + "hits": 8 + }, + { + "node": "N1a1a1a1a2", + "hits": 7 + }, + { + "node": "N1a1a1a1a2a~", + "hits": 6 + } + ], + "defining_snp_count": 45, + "snps_known_to_foundation": 21, + "source_parent": "N-CTS27", + "source_parent_status": "matched→N1a1a1a1a (100%)", + "is_backbone": false + }, + { + "node": "N-Y262049", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a1c2b2a1b6b", + "anchor_strength": 0.6666666666666666, + "candidates": [ + { + "node": "R1b1a1b1a1a1c2b2a1b6b", + "hits": 6 + }, + { + "node": "N1a1a1a1a2a1a1a1a1a1a1", + "hits": 3 + } + ], + "defining_snp_count": 22, + "snps_known_to_foundation": 9, + "source_parent": "N-Z19826", + "source_parent_status": "matched→N1a1a1a1a2a1a1a1a1a1a (100%)", + "is_backbone": false + }, + { + "node": "N-Z19831", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "N1a1a1a1a2a1a1a1a1a1a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "N1a1a1a1a2a1a1a1a1a1a1a", + "hits": 7 + } + ], + "defining_snp_count": 17, + "snps_known_to_foundation": 7, + "source_parent": "N-Y262049", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "N-Z8029", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I1a1b1a1c1~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "I1a1b1a1c1~", + "hits": 3 + }, + { + "node": "N1b1a", + "hits": 3 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "N-ACT2487", + "source_parent_status": "matched→N1b1 (100%)", + "is_backbone": false + }, + { + "node": "N-Z19706", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "N1b1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "N1b1a1", + "hits": 6 + } + ], + "defining_snp_count": 20, + "snps_known_to_foundation": 6, + "source_parent": "N-Z8029", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "N-PF3228", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4d2a2a5", + "hits": 1 + }, + { + "node": "N1a1a1a1a2a1a", + "hits": 1 + } + ], + "defining_snp_count": 25, + "snps_known_to_foundation": 2, + "source_parent": "N-FT324649", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "S-Z41931", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "D1a2a", + "anchor_strength": 0.25, + "candidates": [ + { + "node": "D1a2a", + "hits": 3 + }, + { + "node": "G2a2b2b1a1a1a1a1", + "hits": 3 + }, + { + "node": "R", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2c1a1d1a", + "hits": 3 + } + ], + "defining_snp_count": 1212, + "snps_known_to_foundation": 12, + "source_parent": "S", + "source_parent_status": "matched→S (100%)", + "is_backbone": false + }, + { + "node": "P", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I2a2a1a1a2", + "anchor_strength": 0.375, + "candidates": [ + { + "node": "I2a2a1a1a2", + "hits": 3 + }, + { + "node": "P", + "hits": 3 + }, + { + "node": "R1b1a1b", + "hits": 2 + } + ], + "defining_snp_count": 267, + "snps_known_to_foundation": 8, + "source_parent": "K2b", + "source_parent_status": "novel", + "is_backbone": true + }, + { + "node": "Q-FT310416", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "Q2b1a1", + "anchor_strength": 0.475, + "candidates": [ + { + "node": "Q2b1a1", + "hits": 19 + }, + { + "node": "Q2b1", + "hits": 12 + }, + { + "node": "Q2b1a", + "hits": 6 + }, + { + "node": "Q2b1a1a~", + "hits": 3 + } + ], + "defining_snp_count": 170, + "snps_known_to_foundation": 40, + "source_parent": "Q-YP748", + "source_parent_status": "matched→Q2b (98%)", + "is_backbone": false + }, + { + "node": "Q-BZ3056", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "Q2a", + "anchor_strength": 0.33916083916083917, + "candidates": [ + { + "node": "Q2a", + "hits": 97 + }, + { + "node": "Q2a1a", + "hits": 59 + }, + { + "node": "Q2a1", + "hits": 46 + }, + { + "node": "Q2a1a3", + "hits": 39 + }, + { + "node": "Q2a1a3a1a~", + "hits": 36 + } + ], + "defining_snp_count": 404, + "snps_known_to_foundation": 283, + "source_parent": "Q-L612", + "source_parent_status": "matched→Q2 (96%)", + "is_backbone": false + }, + { + "node": "Q-F4531", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "Q1a2", + "anchor_strength": 0.36829268292682926, + "candidates": [ + { + "node": "Q1a2", + "hits": 151 + }, + { + "node": "Q1a2a2a~", + "hits": 120 + }, + { + "node": "Q1a2a", + "hits": 72 + }, + { + "node": "Q1a2a2~", + "hits": 55 + }, + { + "node": "Q1a2a2a1~", + "hits": 6 + } + ], + "defining_snp_count": 712, + "snps_known_to_foundation": 407, + "source_parent": "Q-Y663", + "source_parent_status": "matched→Q1a (100%)", + "is_backbone": false + }, + { + "node": "Q-FT6742", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "C2", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "C2", + "hits": 3 + }, + { + "node": "Q1a1a2", + "hits": 3 + }, + { + "node": "Q1a1a2a~", + "hits": 3 + } + ], + "defining_snp_count": 162, + "snps_known_to_foundation": 9, + "source_parent": "Q-Y683", + "source_parent_status": "matched→Q1a1a (98%)", + "is_backbone": false + }, + { + "node": "Q-Z32422", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1b1a2a", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "C1b1a2a", + "hits": 3 + }, + { + "node": "Q1b1a1a1h", + "hits": 3 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "Q-M826", + "source_parent_status": "matched→Q1b1a1a (69%)", + "is_backbone": false + }, + { + "node": "Q-B35", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "Q1b1a1a1h1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "Q1b1a1a1h1", + "hits": 88 + } + ], + "defining_snp_count": 219, + "snps_known_to_foundation": 88, + "source_parent": "Q-Z32422", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "Q-FT333378", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "Q1b1a1a1j1b~", + "anchor_strength": 0.38461538461538464, + "candidates": [ + { + "node": "Q1b1a1a1j1b~", + "hits": 15 + }, + { + "node": "Q1b1a1a1k1b~", + "hits": 15 + }, + { + "node": "B2b1a2~", + "hits": 3 + }, + { + "node": "H1a2b1b", + "hits": 3 + }, + { + "node": "Q1a2a2b~", + "hits": 3 + } + ], + "defining_snp_count": 267, + "snps_known_to_foundation": 24, + "source_parent": "Q-Z19429", + "source_parent_status": "matched→Q1b1a1a1k1 (75%)", + "is_backbone": false + }, + { + "node": "Q-CTS193", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "Q1b1a1a1e1b~", + "anchor_strength": 0.4117647058823529, + "candidates": [ + { + "node": "Q1b1a1a1e1b~", + "hits": 21 + }, + { + "node": "Q1b1a1a1e1b1a~", + "hits": 15 + }, + { + "node": "Q1b1a1a1e1b1~", + "hits": 9 + }, + { + "node": "B3", + "hits": 3 + }, + { + "node": "J2a", + "hits": 3 + } + ], + "defining_snp_count": 304, + "snps_known_to_foundation": 48, + "source_parent": "Q-M825", + "source_parent_status": "matched→Q1b1a1a1e1 (100%)", + "is_backbone": false + }, + { + "node": "Q-YP4716", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "N1a1a1a1a1a", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "N1a1a1a1a1a", + "hits": 3 + }, + { + "node": "Q1b1a1a1e1a", + "hits": 3 + }, + { + "node": "Q1b1a1a1m", + "hits": 3 + } + ], + "defining_snp_count": 26, + "snps_known_to_foundation": 9, + "source_parent": "Q-FGC8093", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "Q-Y28017", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "Q1b1a1a1m2~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "Q1b1a1a1m2~", + "hits": 15 + } + ], + "defining_snp_count": 30, + "snps_known_to_foundation": 15, + "source_parent": "Q-YP4716", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "R2-FGC46630", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "A00", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "A00", + "hits": 3 + }, + { + "node": "R2a2b2a", + "hits": 3 + } + ], + "defining_snp_count": 160, + "snps_known_to_foundation": 6, + "source_parent": "R2-FGC51793", + "source_parent_status": "matched→R2a2b2 (100%)", + "is_backbone": false + }, + { + "node": "R2-Z29192", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1a1a", + "hits": 3 + } + ], + "defining_snp_count": 111, + "snps_known_to_foundation": 3, + "source_parent": "R2-Y1332", + "source_parent_status": "matched→R2a2b1b2b3 (100%)", + "is_backbone": false + }, + { + "node": "R2-Z29227", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J2a2~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J2a2~", + "hits": 3 + } + ], + "defining_snp_count": 44, + "snps_known_to_foundation": 3, + "source_parent": "R2-V2434", + "source_parent_status": "matched→R2a2b1b2b3b (100%)", + "is_backbone": false + }, + { + "node": "R2-FGC18155", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1a2a1a1b3b", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "I2a1a2a1a1b3b", + "hits": 3 + }, + { + "node": "R2a2b1b2b3b2a1b", + "hits": 3 + } + ], + "defining_snp_count": 150, + "snps_known_to_foundation": 3, + "source_parent": "R2-V2947", + "source_parent_status": "matched→R2a2b1b2b3b2a1 (100%)", + "is_backbone": false + }, + { + "node": "R2-A27695", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J2b", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J2b", + "hits": 3 + } + ], + "defining_snp_count": 196, + "snps_known_to_foundation": 3, + "source_parent": "R2-V4569", + "source_parent_status": "matched→R2a2b1b2b (100%)", + "is_backbone": false + }, + { + "node": "R2-S10301", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2a1a2a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G2a2a1a2a1a", + "hits": 3 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "R2-Z29284", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R2-FGC17618", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "Q1b1b~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "Q1b1b~", + "hits": 3 + }, + { + "node": "R2a2b1b1", + "hits": 3 + } + ], + "defining_snp_count": 75, + "snps_known_to_foundation": 6, + "source_parent": "R2-V1180", + "source_parent_status": "matched→R2a2b1 (100%)", + "is_backbone": false + }, + { + "node": "R2-Y61448", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1a1a1b2~", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "E1b1b1a1a1b2~", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1e1a2~", + "hits": 3 + }, + { + "node": "R1a1a1b1a3a1a2d~", + "hits": 3 + } + ], + "defining_snp_count": 159, + "snps_known_to_foundation": 9, + "source_parent": "R2-FGC17618", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R2-FGC17629", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "C1b2b", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "C1b2b", + "hits": 3 + }, + { + "node": "N1a1a1a1a", + "hits": 3 + }, + { + "node": "R2a2b1b1a", + "hits": 3 + } + ], + "defining_snp_count": 156, + "snps_known_to_foundation": 6, + "source_parent": "R2-FGC17618", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R2-FGC57535", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R2a2b1b1a2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R2a2b1b1a2", + "hits": 3 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "R2-FGC17629", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "R2-FGC17661", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R2a2b1b1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R2a2b1b1a1", + "hits": 3 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "R2-FGC17629", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "R2-Y17972", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1a1b2", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "I2a1a1b2", + "hits": 3 + }, + { + "node": "R2a2a1a1", + "hits": 3 + } + ], + "defining_snp_count": 57, + "snps_known_to_foundation": 6, + "source_parent": "R2-FGC13192", + "source_parent_status": "matched→R2a2a1 (100%)", + "is_backbone": false + }, + { + "node": "R2-FGC13185", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "A00", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "A00", + "hits": 3 + }, + { + "node": "E1a2b1a1a1", + "hits": 3 + } + ], + "defining_snp_count": 138, + "snps_known_to_foundation": 6, + "source_parent": "R2-Y17972", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R2-FGC13210", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R2a2a1a1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R2a2a1a1a1", + "hits": 6 + } + ], + "defining_snp_count": 12, + "snps_known_to_foundation": 6, + "source_parent": "R2-FGC13185", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R2-FGC61415", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "O2a2b1a1a1d2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "O2a2b1a1a1d2", + "hits": 3 + } + ], + "defining_snp_count": 69, + "snps_known_to_foundation": 3, + "source_parent": "R2-FGC13203", + "source_parent_status": "matched→R2a2a (100%)", + "is_backbone": false + }, + { + "node": "R1a-PF6234", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1a1", + "anchor_strength": 0.36101083032490977, + "candidates": [ + { + "node": "R1a1", + "hits": 100 + }, + { + "node": "R1a1a", + "hits": 90 + }, + { + "node": "R1a1a1", + "hits": 76 + }, + { + "node": "I1a2a1a1a3a2~", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4c4a1a~", + "hits": 3 + } + ], + "defining_snp_count": 341, + "snps_known_to_foundation": 266, + "source_parent": "R1a", + "source_parent_status": "matched→R1a (97%)", + "is_backbone": false + }, + { + "node": "R1a-S2846", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1a1a1a1b1a~", + "anchor_strength": 0.47368421052631576, + "candidates": [ + { + "node": "R1a1a1a1b1a~", + "hits": 27 + }, + { + "node": "R1a1a1a1", + "hits": 9 + }, + { + "node": "R1a1a1a~", + "hits": 9 + }, + { + "node": "R1a1a1a1b1~", + "hits": 6 + }, + { + "node": "R1a1a1a1b~", + "hits": 6 + } + ], + "defining_snp_count": 80, + "snps_known_to_foundation": 57, + "source_parent": "R1a-PF6234", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "R1a-FGC21102", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1b1~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "J1b1~", + "hits": 3 + }, + { + "node": "R1a1a1a1b1a2~", + "hits": 3 + } + ], + "defining_snp_count": 45, + "snps_known_to_foundation": 3, + "source_parent": "R1a-S2846", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "R1a-Y60196", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a1a1a1b1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G2a2b2a1a1a1b1", + "hits": 3 + } + ], + "defining_snp_count": 114, + "snps_known_to_foundation": 3, + "source_parent": "R1a-Y878", + "source_parent_status": "matched→R1a1a1b2a1a2c1d2~ (100%)", + "is_backbone": false + }, + { + "node": "R1a-Z31469", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "G2a1a2a2~", + "anchor_strength": 0.25, + "candidates": [ + { + "node": "G2a1a2a2~", + "hits": 3 + }, + { + "node": "I2a2b", + "hits": 3 + }, + { + "node": "R1a1a1b1a2a3a1b~", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c2b2b1a1a1d1", + "hits": 3 + } + ], + "defining_snp_count": 81, + "snps_known_to_foundation": 12, + "source_parent": "R1a-Y878", + "source_parent_status": "matched→R1a1a1b2a1a2c1d2~ (100%)", + "is_backbone": false + }, + { + "node": "R1a-A24429", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1a1", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "E1b1b1a1", + "hits": 3 + }, + { + "node": "R1a1a1b2a1a2c2d5a~", + "hits": 3 + }, + { + "node": "R1a1a1b2a1a2c2d5~", + "hits": 3 + } + ], + "defining_snp_count": 81, + "snps_known_to_foundation": 9, + "source_parent": "R1a-Y944", + "source_parent_status": "matched→R1a1a1b2a1a2c2d~ (100%)", + "is_backbone": false + }, + { + "node": "R1a-FGC7401", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1a1a1b2a1a2b~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1a1a1b2a1a2b~", + "hits": 9 + } + ], + "defining_snp_count": 9, + "snps_known_to_foundation": 9, + "source_parent": "R1a-FGC7398", + "source_parent_status": "matched→R1a1a1b2a1a2a~ (100%)", + "is_backbone": false + }, + { + "node": "R1a-FTA9496", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1b2a1a6d1a1b~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1b1b2a1a6d1a1b~", + "hits": 3 + } + ], + "defining_snp_count": 96, + "snps_known_to_foundation": 3, + "source_parent": "R1a-Y929", + "source_parent_status": "matched→R1a1a1b2a1a1a1f1~ (100%)", + "is_backbone": false + }, + { + "node": "R1a-FT310821", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E", + "hits": 3 + } + ], + "defining_snp_count": 48, + "snps_known_to_foundation": 3, + "source_parent": "R1a-Y20746", + "source_parent_status": "matched→R1a1a1b2a2a1c~ (100%)", + "is_backbone": false + }, + { + "node": "R1a-M12427", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1a1a1a1c1a1a3e1~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "E1b1a1a1a1c1a1a3e1~", + "hits": 3 + }, + { + "node": "Q1b1b~", + "hits": 3 + } + ], + "defining_snp_count": 66, + "snps_known_to_foundation": 6, + "source_parent": "R1a-M12441", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1a-Y144479", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1a1a1b1a2b3a3a2h1b1b~", + "anchor_strength": 0.625, + "candidates": [ + { + "node": "R1a1a1b1a2b3a3a2h1b1b~", + "hits": 15 + }, + { + "node": "I1a2b1c1~", + "hits": 3 + }, + { + "node": "R1a1a1b1a2b3a3a2g2c1~", + "hits": 3 + }, + { + "node": "R1a1a1b1a2b3a3a2g2c~", + "hits": 3 + } + ], + "defining_snp_count": 75, + "snps_known_to_foundation": 21, + "source_parent": "R1a-FGC10352", + "source_parent_status": "matched→R1a1a1b1a2b3a3a2g2~ (100%)", + "is_backbone": false + }, + { + "node": "R1a-FGC11896", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1a1a1b1a3a1a1a1a1b2~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1a1a1b1a3a1a1a1a1b2~", + "hits": 2 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 2, + "source_parent": "R1a-FGC11888", + "source_parent_status": "matched→R1a1a1b1a3a1a1a (50%)", + "is_backbone": false + }, + { + "node": "R1a-FGC55633", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1a1a1b1a3a1a3c~", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "R1a1a1b1a3a1a3c~", + "hits": 6 + }, + { + "node": "R1a1a1b1a3a1a3~", + "hits": 6 + }, + { + "node": "R1a1a1b1a3a1a3a", + "hits": 3 + }, + { + "node": "R1a1a1b1a3a1a3c1~", + "hits": 3 + } + ], + "defining_snp_count": 21, + "snps_known_to_foundation": 18, + "source_parent": "R1a-CTS3438", + "source_parent_status": "matched→R1a1a1b1a3a1a (100%)", + "is_backbone": false + }, + { + "node": "R1a-FGC33255", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1a1a1b1a3a2b", + "anchor_strength": 0.42857142857142855, + "candidates": [ + { + "node": "R1a1a1b1a3a2b", + "hits": 27 + }, + { + "node": "R1a1a1b1a3a2b2b~", + "hits": 15 + }, + { + "node": "R1a1a1b1a3a2b2b1b~", + "hits": 6 + }, + { + "node": "R1a1a1b1a3a2b2~", + "hits": 6 + }, + { + "node": "J2b2a1a1a1a1a2a1~", + "hits": 3 + } + ], + "defining_snp_count": 66, + "snps_known_to_foundation": 54, + "source_parent": "R1a-S5084", + "source_parent_status": "matched→R1a1a1b1a3a2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-BY19023", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a1c2f~", + "anchor_strength": 0.75, + "candidates": [ + { + "node": "R1b1a1b1a1a1c2f~", + "hits": 6 + }, + { + "node": "R1b1a1b1a1a1c2d1", + "hits": 2 + } + ], + "defining_snp_count": 96, + "snps_known_to_foundation": 8, + "source_parent": "R1b-FGC8512", + "source_parent_status": "matched→R1b1a1b1a1a1c2d (100%)", + "is_backbone": false + }, + { + "node": "R1b-A5587", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1a2a1b1b~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "E1a2a1b1b~", + "hits": 3 + }, + { + "node": "J1a2a1a2c1a", + "hits": 3 + } + ], + "defining_snp_count": 72, + "snps_known_to_foundation": 6, + "source_parent": "R1b-S1731", + "source_parent_status": "matched→R1b1a1b1a1a1c2b2b1a (88%)", + "is_backbone": false + }, + { + "node": "R1b-CTS832", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2c1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a", + "hits": 3 + } + ], + "defining_snp_count": 33, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S11136", + "source_parent_status": "matched→R1b1a1b1a1a1c2b2b1a2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-FGC51313", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1b1a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C1b1a1a", + "hits": 3 + } + ], + "defining_snp_count": 48, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Y21408", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-A17378", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C2b1a2a2b", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C2b1a2a2b", + "hits": 3 + } + ], + "defining_snp_count": 21, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Z5054", + "source_parent_status": "matched→R1b1a1b1a1a1c2b2b1a1a1e3 (100%)", + "is_backbone": false + }, + { + "node": "R1b-FT156503", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1a1a1b1a2b3a1b2a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1a1a1b1a2b3a1b2a~", + "hits": 3 + } + ], + "defining_snp_count": 42, + "snps_known_to_foundation": 3, + "source_parent": "R1b-A17378", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R1b-CTS349", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1a3~", + "anchor_strength": 0.42857142857142855, + "candidates": [ + { + "node": "E1a3~", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c2b2b1a3~", + "hits": 3 + }, + { + "node": "C1a1", + "hits": 1 + } + ], + "defining_snp_count": 65, + "snps_known_to_foundation": 7, + "source_parent": "R1b-Z80", + "source_parent_status": "matched→R1b1a1b1a1a1c2b2b1a1a1b2b (50%)", + "is_backbone": false + }, + { + "node": "R1b-FGC84309", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1b2a2a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I2a1b2a2a", + "hits": 3 + } + ], + "defining_snp_count": 51, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S10353", + "source_parent_status": "matched→R1b1a1b1a1a1c2b2a1b2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-S5235", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "Q1a1", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "Q1a1", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c2b2a1b1a4b2a2", + "hits": 3 + } + ], + "defining_snp_count": 12, + "snps_known_to_foundation": 6, + "source_parent": "R1b-S5231", + "source_parent_status": "matched→R1b1a1b1a1a1c2b2a1b1a4b2 (50%)", + "is_backbone": false + }, + { + "node": "R1b-S5627", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a1c2b2a1b1a4b2a2c", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a1c2b2a1b1a4b2a2c", + "hits": 3 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S5235", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R1b-BY25300", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1a1a1a1a2~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I2a1a1a1a1a2~", + "hits": 3 + } + ], + "defining_snp_count": 41, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S20321", + "source_parent_status": "matched→R1b1a1b1a1a1c2b2a1b1a4b1a1 (100%)", + "is_backbone": false + }, + { + "node": "R1b-CTS604", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "B2b1a2b2a~", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "B2b1a2b2a~", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c2b2a1b1a1a2a", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c2b2a1b1a1a2a1", + "hits": 3 + } + ], + "defining_snp_count": 60, + "snps_known_to_foundation": 9, + "source_parent": "R1b-Z8175", + "source_parent_status": "matched→R1b1a1b1a1a1c2b2a1b1a1a2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-FGC61369", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "H3b1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "H3b1", + "hits": 3 + } + ], + "defining_snp_count": 81, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S271", + "source_parent_status": "matched→R1b1a1b1a1a1c2b2a (100%)", + "is_backbone": false + }, + { + "node": "R1b-FT289143", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "Q2a1c2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "Q2a1c2", + "hits": 3 + } + ], + "defining_snp_count": 51, + "snps_known_to_foundation": 3, + "source_parent": "R1b-L48", + "source_parent_status": "matched→R1b1a1b1a1a1c2b (100%)", + "is_backbone": false + }, + { + "node": "R1b-FGC30517", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a1c2a1d2a2", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "R1b1a1b1a1a1c2a1d2a2", + "hits": 6 + }, + { + "node": "R1b1a1b1a1a1c2a1d2a3b1", + "hits": 6 + }, + { + "node": "R1b1a1b1a1a1c2a1d2a3", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c2a1d2a3b", + "hits": 3 + } + ], + "defining_snp_count": 27, + "snps_known_to_foundation": 18, + "source_parent": "R1b-S9787", + "source_parent_status": "matched→R1b1a1b1a1a1c2a1d2 (50%)", + "is_backbone": false + }, + { + "node": "R1b-FGC13326", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a1c1a2b", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a1c1a2b", + "hits": 2 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 2, + "source_parent": "R1b-S1785", + "source_parent_status": "matched→R1b1a1b1a1a1c1a2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-YFS154845", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a1c1a2a1", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "R1b1a1b1a1a1c1a2a1", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c1a2b1a", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c1a2b1a1", + "hits": 3 + } + ], + "defining_snp_count": 75, + "snps_known_to_foundation": 9, + "source_parent": "R1b-S25234", + "source_parent_status": "matched→R1b1a1b1a1a1c1a2b1 (100%)", + "is_backbone": false + }, + { + "node": "R1b-FGC23197", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I1a1a1a1b1a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I1a1a1a1b1a~", + "hits": 3 + } + ], + "defining_snp_count": 105, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S1785", + "source_parent_status": "matched→R1b1a1b1a1a1c1a2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-S18823", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a1a1c1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G2a2b2a1a1c1a", + "hits": 3 + } + ], + "defining_snp_count": 9, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S265", + "source_parent_status": "matched→R1b1a1b1a1a1c1a (100%)", + "is_backbone": false + }, + { + "node": "R1b-Y18881", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I1a2a1a1c2~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "I1a2a1a1c2~", + "hits": 3 + }, + { + "node": "L1a1b3a1a1a~", + "hits": 2 + }, + { + "node": "R1b1a1b1a1a1c2c", + "hits": 1 + } + ], + "defining_snp_count": 69, + "snps_known_to_foundation": 6, + "source_parent": "R1b-S263", + "source_parent_status": "matched→R1b1a1b1a1a1c (100%)", + "is_backbone": false + }, + { + "node": "R1b-S5676", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "B2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "B2", + "hits": 3 + } + ], + "defining_snp_count": 33, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S3207", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-A14184", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1b1a2~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1b1b1a2~", + "hits": 3 + } + ], + "defining_snp_count": 27, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S5676", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R1b-SK2102", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I2a1b1a2a2a1", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "I2a1b1a2a2a1", + "hits": 3 + }, + { + "node": "I2a2a1b1", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1d2", + "hits": 3 + } + ], + "defining_snp_count": 94, + "snps_known_to_foundation": 9, + "source_parent": "R1b-FGC396", + "source_parent_status": "matched→R1b1a1b1a1a1d (100%)", + "is_backbone": false + }, + { + "node": "R1b-BY700", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "N1a1a1a1a1a2a1a2i1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "N1a1a1a1a1a2a1a2i1~", + "hits": 3 + } + ], + "defining_snp_count": 108, + "snps_known_to_foundation": 3, + "source_parent": "R1b-DF63", + "source_parent_status": "matched→R1b1a1b1a1a2c1b (100%)", + "is_backbone": false + }, + { + "node": "R1b-BY7771", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C2b1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C2b1a1", + "hits": 3 + } + ], + "defining_snp_count": 117, + "snps_known_to_foundation": 3, + "source_parent": "R1b-FGC36422", + "source_parent_status": "matched→R1b1a1b1a1a2c1b2a1a1a (86%)", + "is_backbone": false + }, + { + "node": "R1b-FGC17160", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "O1a1b", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "O1a1b", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2c1b1a", + "hits": 3 + } + ], + "defining_snp_count": 89, + "snps_known_to_foundation": 6, + "source_parent": "R1b-A91", + "source_parent_status": "matched→R1b1a1b1a1a2c1b1 (100%)", + "is_backbone": false + }, + { + "node": "R1b-Y23251", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I1a1b1a4a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I1a1b1a4a1a", + "hits": 3 + } + ], + "defining_snp_count": 54, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Y23438", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-FGC5496", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2c1a", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2c1a6a", + "hits": 3 + } + ], + "defining_snp_count": 6, + "snps_known_to_foundation": 6, + "source_parent": "R1b-FGC5494", + "source_parent_status": "matched→R1b1a1b1a1a2c1a6 (100%)", + "is_backbone": false + }, + { + "node": "R1b-FT299995", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C1a1", + "hits": 3 + } + ], + "defining_snp_count": 110, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Z17967", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-Z18090", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "B3", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "B3", + "hits": 3 + }, + { + "node": "J1a2a1a2d1~", + "hits": 3 + } + ], + "defining_snp_count": 123, + "snps_known_to_foundation": 6, + "source_parent": "R1b-S9294", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-A224", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2c1a1a1a1a1", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a1a1a1a1", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2c1a1a1a1a1a1a4a", + "hits": 3 + } + ], + "defining_snp_count": 6, + "snps_known_to_foundation": 6, + "source_parent": "R1b-A223", + "source_parent_status": "matched→R1b1a1b1a1a2c1a1a1a1a1a1a4 (100%)", + "is_backbone": false + }, + { + "node": "R1b-Y16739", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "Q1b2b1a2e~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "Q1b2b1a2e~", + "hits": 3 + } + ], + "defining_snp_count": 6, + "snps_known_to_foundation": 3, + "source_parent": "R1b-FGC52372", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-ZS8379", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5a1e3f~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1e3f~", + "hits": 3 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S6151", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-A13155", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1b2a1b1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1b1b2a1b1~", + "hits": 3 + } + ], + "defining_snp_count": 6, + "snps_known_to_foundation": 3, + "source_parent": "R1b-A694", + "source_parent_status": "matched→R1b1a1b1a1a2c1a1a1a1a1a1a2a2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-Z17592", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2a", + "hits": 3 + } + ], + "defining_snp_count": 81, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Z2961", + "source_parent_status": "matched→R1b1a1b1a1a2c1a1a1a1a (100%)", + "is_backbone": false + }, + { + "node": "R1b-CTS78", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1a1b2a4b1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1b1a1b2a4b1~", + "hits": 3 + } + ], + "defining_snp_count": 32, + "snps_known_to_foundation": 3, + "source_parent": "R1b-CTS360", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-FGC46820", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E2b", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E2b", + "hits": 3 + } + ], + "defining_snp_count": 108, + "snps_known_to_foundation": 3, + "source_parent": "R1b-FGC42321", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-A97", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2c1a1d5a", + "anchor_strength": 0.46153846153846156, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a1d5a", + "hits": 18 + }, + { + "node": "R1b1a1b1a1a2c1a1d5", + "hits": 9 + }, + { + "node": "R1b1a1b1a1a1c2a1d2", + "hits": 6 + }, + { + "node": "R1b1a1b1a1a2c1a1d5a1~", + "hits": 6 + } + ], + "defining_snp_count": 57, + "snps_known_to_foundation": 33, + "source_parent": "R1b-A98", + "source_parent_status": "matched→R1b1a1b1a1a2c1a1d5 (100%)", + "is_backbone": false + }, + { + "node": "R1b-S3808", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a1a1a1a", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "G2a2b2a1a1a1a", + "hits": 3 + }, + { + "node": "N~", + "hits": 3 + } + ], + "defining_snp_count": 122, + "snps_known_to_foundation": 6, + "source_parent": "R1b-V1246", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-FGC13776", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1a1a1a1c1a1a3d6a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1a1a1a1c1a1a3d6a", + "hits": 3 + } + ], + "defining_snp_count": 52, + "snps_known_to_foundation": 3, + "source_parent": "R1b-FGC13783", + "source_parent_status": "matched→R1b1a1b1a1a2c1a1j (100%)", + "is_backbone": false + }, + { + "node": "R1b-BY64238", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1b2b2b1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1b1b2b2b1~", + "hits": 3 + } + ], + "defining_snp_count": 165, + "snps_known_to_foundation": 3, + "source_parent": "R1b-DC268", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-FGC18023", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1a1a1b1a3a1a2e2~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1a1a1b1a3a1a2e2~", + "hits": 2 + } + ], + "defining_snp_count": 39, + "snps_known_to_foundation": 2, + "source_parent": "R1b-FGC18022", + "source_parent_status": "matched→R1b1a1b1a1a2c1a3b1 (100%)", + "is_backbone": false + }, + { + "node": "R1b-A9040", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "N1b1", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "N1b1", + "hits": 3 + }, + { + "node": "R1a1a1b1a2b3a1c~", + "hits": 3 + } + ], + "defining_snp_count": 155, + "snps_known_to_foundation": 6, + "source_parent": "R1b-Z17992", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-FGC49407", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1a1a1b1a1a1c1c1b1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1a1a1b1a1a1c1c1b1~", + "hits": 3 + } + ], + "defining_snp_count": 25, + "snps_known_to_foundation": 3, + "source_parent": "R1b-A286", + "source_parent_status": "matched→R1b1a1b1a1a2c1a3b (100%)", + "is_backbone": false + }, + { + "node": "R1b-A6518", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1a1a1a1a1a1e3b~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I2a1a1a1a1a1a1e3b~", + "hits": 3 + } + ], + "defining_snp_count": 50, + "snps_known_to_foundation": 3, + "source_parent": "R1b-A212", + "source_parent_status": "matched→R1b1a1b1a1a2c1a3a2a2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-Y34442", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1b1a2b1a1b1a2~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I2a1b1a2b1a1b1a2~", + "hits": 3 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "R1b-BY93490", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-FGC29280", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2c1a3a2a1a2a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a3a2a1a2a", + "hits": 1 + } + ], + "defining_snp_count": 9, + "snps_known_to_foundation": 1, + "source_parent": "R1b-CP086569.2:18958846 G->A", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-Z2358", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1a2a~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "J1a2a~", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2c1a5d3a1~", + "hits": 3 + } + ], + "defining_snp_count": 12, + "snps_known_to_foundation": 6, + "source_parent": "R1b-Z17981", + "source_parent_status": "matched→R1b1a1b1a1a2c1a3a2a1a2c1 (100%)", + "is_backbone": false + }, + { + "node": "R1b-BY207095", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "B2b1a2a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "B2b1a2a~", + "hits": 3 + } + ], + "defining_snp_count": 21, + "snps_known_to_foundation": 3, + "source_parent": "R1b-BY208692", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-A155", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2c1a3a2a1b1a2a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a3a2a1b1a2a", + "hits": 1 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 1, + "source_parent": "R1b-A89", + "source_parent_status": "matched→R1b1a1b1a1a2c1a3a2a1b1a2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-FGC65809", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1b1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C1b1a1", + "hits": 3 + } + ], + "defining_snp_count": 111, + "snps_known_to_foundation": 3, + "source_parent": "R1b-A9871", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-S15280", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2c1a4b5a2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a4b5a2", + "hits": 1 + } + ], + "defining_snp_count": 6, + "snps_known_to_foundation": 1, + "source_parent": "R1b-S7898", + "source_parent_status": "matched→R1b1a1b1a1a2c1a4b5a (100%)", + "is_backbone": false + }, + { + "node": "R1b-S841", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "C1a2a2", + "anchor_strength": 0.375, + "candidates": [ + { + "node": "C1a2a2", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2c1a4b", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2c1a4b4a", + "hits": 2 + } + ], + "defining_snp_count": 83, + "snps_known_to_foundation": 8, + "source_parent": "R1b-Z253", + "source_parent_status": "matched→R1b1a1b1a1a2c1a4b (100%)", + "is_backbone": false + }, + { + "node": "R1b-Z17685", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2c1a4b4a1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a4b4a1a1", + "hits": 3 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S841", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "R1b-FGC3222", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2c1a4b3a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a4b3a", + "hits": 2 + } + ], + "defining_snp_count": 84, + "snps_known_to_foundation": 2, + "source_parent": "R1b-Z253", + "source_parent_status": "matched→R1b1a1b1a1a2c1a4b (100%)", + "is_backbone": false + }, + { + "node": "R1b-Z17673", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2c1a4b2c1a1", + "anchor_strength": 0.4, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a4b2c1a1", + "hits": 6 + }, + { + "node": "R1b1a1b1a1a2c1a4b2c1a1b", + "hits": 6 + }, + { + "node": "R1b1a1b1a1a2c1a4b2c1a1b1", + "hits": 3 + } + ], + "defining_snp_count": 60, + "snps_known_to_foundation": 15, + "source_parent": "R1b-L1066", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-BY17724", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "Q2a1a4b~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "Q2a1a4b~", + "hits": 3 + } + ], + "defining_snp_count": 48, + "snps_known_to_foundation": 3, + "source_parent": "R1b-L159", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-A6903", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1a2a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C1a2a", + "hits": 3 + } + ], + "defining_snp_count": 6, + "snps_known_to_foundation": 3, + "source_parent": "R1b-chrY:9914120 A->G", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-Y139017", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1b2a2a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1b2a2a1a", + "hits": 3 + } + ], + "defining_snp_count": 41, + "snps_known_to_foundation": 3, + "source_parent": "R1b-A6903", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R1b-BY9596", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "Q1b1b1a~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "Q1b1b1a~", + "hits": 9 + }, + { + "node": "R1b1a1b1a1a2c1a5b1a1a1a1b", + "hits": 9 + } + ], + "defining_snp_count": 12, + "snps_known_to_foundation": 9, + "source_parent": "R1b-S281", + "source_parent_status": "matched→R1b1a1b1a1a2c1a5b1a1a (100%)", + "is_backbone": false + }, + { + "node": "R1b-S953", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a4", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "G2a2b2a4", + "hits": 3 + }, + { + "node": "H1a1a4a", + "hits": 3 + } + ], + "defining_snp_count": 48, + "snps_known_to_foundation": 6, + "source_parent": "R1b-S956", + "source_parent_status": "matched→R1b1a1b1a1a2c1a5d3a1a~ (100%)", + "is_backbone": false + }, + { + "node": "R1b-F15205", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "B2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "B2", + "hits": 3 + } + ], + "defining_snp_count": 75, + "snps_known_to_foundation": 3, + "source_parent": "R1b-FGC33056", + "source_parent_status": "matched→R1b1a1b1a1a2c1a5c2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-Z205", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2a1b1a1a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2a1b1a1a~", + "hits": 2 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 2, + "source_parent": "R1b-Z264", + "source_parent_status": "matched→R1b1a1b1a1a2a1b1a1 (100%)", + "is_backbone": false + }, + { + "node": "R1b-FGC65779", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "B2b1a2a~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "B2b1a2a~", + "hits": 3 + }, + { + "node": "I1c1c1a~", + "hits": 3 + } + ], + "defining_snp_count": 80, + "snps_known_to_foundation": 6, + "source_parent": "R1b-Z205", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "R1b-CTS422", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "D1a1a1b", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "D1a1a1b", + "hits": 3 + }, + { + "node": "E1b1a1a1a1c1a1a3c2b2~", + "hits": 3 + } + ], + "defining_snp_count": 104, + "snps_known_to_foundation": 6, + "source_parent": "R1b-Z205", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "R1b-FT294551", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "O2a1b1a1a1a1a2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "O2a1b1a1a1a1a2", + "hits": 3 + } + ], + "defining_snp_count": 99, + "snps_known_to_foundation": 3, + "source_parent": "R1b-FGC35927", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-Y31334", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1a2a2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I2a1a2a2", + "hits": 3 + } + ], + "defining_snp_count": 47, + "snps_known_to_foundation": 3, + "source_parent": "R1b-CTS7359", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-Y151131", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E2b1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E2b1a", + "hits": 3 + } + ], + "defining_snp_count": 6, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Z298", + "source_parent_status": "matched→R1b1a1b1a1a2a1a1a1a (100%)", + "is_backbone": false + }, + { + "node": "R1b-FT294663", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1a2b1b2a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J1a2b1b2a~", + "hits": 3 + } + ], + "defining_snp_count": 41, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Y151131", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R1b-chrY:8458210 C->T", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1a2a1a2d2b2a1c~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J1a2a1a2d2b2a1c~", + "hits": 3 + } + ], + "defining_snp_count": 71, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Z29668", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-CTS609", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5a1e4a6i12~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1e4a6i12~", + "hits": 3 + } + ], + "defining_snp_count": 42, + "snps_known_to_foundation": 3, + "source_parent": "R1b-PH312", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-FGC20540", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I1a1b1a1e2d1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I1a1b1a1e2d1~", + "hits": 3 + } + ], + "defining_snp_count": 45, + "snps_known_to_foundation": 3, + "source_parent": "R1b-BY21072", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-V2059", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2a5", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2a5", + "hits": 1 + } + ], + "defining_snp_count": 6, + "snps_known_to_foundation": 1, + "source_parent": "R1b-DF27", + "source_parent_status": "matched→R1b1a1b1a1a2a (100%)", + "is_backbone": false + }, + { + "node": "R1b-FT19025", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a1a1c1a1a2a2a4a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G2a2b2a1a1c1a1a2a2a4a~", + "hits": 3 + } + ], + "defining_snp_count": 78, + "snps_known_to_foundation": 3, + "source_parent": "R1b-CTS8001", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-CTS129", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2a7~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2a7~", + "hits": 7 + } + ], + "defining_snp_count": 96, + "snps_known_to_foundation": 7, + "source_parent": "R1b-V2059", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "R1b-Y23650", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1a2a1a2d2b2b2c4d2a2a1a1a1b~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4d2a2a1a1a1b~", + "hits": 3 + } + ], + "defining_snp_count": 12, + "snps_known_to_foundation": 3, + "source_parent": "R1b-DF27", + "source_parent_status": "matched→R1b1a1b1a1a2a (100%)", + "is_backbone": false + }, + { + "node": "R1b-FGC95121", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a1a1a1a1a1a1a1a1a3a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G2a1a1a1a1a1a1a1a1a3a~", + "hits": 3 + } + ], + "defining_snp_count": 77, + "snps_known_to_foundation": 3, + "source_parent": "R1b-FGC42062", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-Y14468", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1a2a1a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I2a1a2a1a1a", + "hits": 3 + } + ], + "defining_snp_count": 41, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Z2548", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-BY63832", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2a7~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2a7~", + "hits": 6 + } + ], + "defining_snp_count": 15, + "snps_known_to_foundation": 6, + "source_parent": "R1b-Z2571", + "source_parent_status": "matched→R1b1a1b1a1a2a6 (100%)", + "is_backbone": false + }, + { + "node": "R1b-Y150919", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2c1a3a2a1a2c1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a3a2a1a2c1", + "hits": 3 + } + ], + "defining_snp_count": 18, + "snps_known_to_foundation": 3, + "source_parent": "R1b-BY63832", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R1b-Y17221", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G1a1a2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G1a1a2", + "hits": 3 + } + ], + "defining_snp_count": 96, + "snps_known_to_foundation": 3, + "source_parent": "R1b-DF27", + "source_parent_status": "matched→R1b1a1b1a1a2a (100%)", + "is_backbone": false + }, + { + "node": "R1b-Y197002", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1a2a", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "C1a2a", + "hits": 3 + }, + { + "node": "C2b1a1a1a1a5a~", + "hits": 3 + } + ], + "defining_snp_count": 99, + "snps_known_to_foundation": 6, + "source_parent": "R1b-M12109", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-FT51793", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1b2a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C1b2a", + "hits": 3 + } + ], + "defining_snp_count": 101, + "snps_known_to_foundation": 3, + "source_parent": "R1b-FT47952", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-CTS6519", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2a7~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2a7~", + "hits": 1 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 1, + "source_parent": "R1b-CTS9545", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-CTS13", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C2b1b1e~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "C2b1b1e~", + "hits": 3 + }, + { + "node": "R1a1a1b2a4a~", + "hits": 3 + } + ], + "defining_snp_count": 111, + "snps_known_to_foundation": 6, + "source_parent": "R1b-Z1898", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-chrY:6804113 T->C", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2c1a4b5a2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2c1a4b5a2", + "hits": 1 + } + ], + "defining_snp_count": 99, + "snps_known_to_foundation": 1, + "source_parent": "R1b-Z31644", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-FGC17099", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I1a1b1a1e2f2b~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "I1a1b1a1e2f2b~", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2a", + "hits": 3 + } + ], + "defining_snp_count": 72, + "snps_known_to_foundation": 6, + "source_parent": "R1b-Z31644", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-Y7402", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1b1a2a1a1a2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I2a1b1a2a1a1a2", + "hits": 3 + } + ], + "defining_snp_count": 15, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Y19522", + "source_parent_status": "matched→R1b1a1b1a1a2a4 (100%)", + "is_backbone": false + }, + { + "node": "R1b-BY19153", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1a1", + "hits": 3 + } + ], + "defining_snp_count": 102, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Y19522", + "source_parent_status": "matched→R1b1a1b1a1a2a4 (100%)", + "is_backbone": false + }, + { + "node": "R1b-Y144477", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "L2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "L2", + "hits": 3 + } + ], + "defining_snp_count": 134, + "snps_known_to_foundation": 3, + "source_parent": "R1b-P312", + "source_parent_status": "matched→R1b1a1b1a1a2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-A7970", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "O2a1b1a1a1a1b1a1b1a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "O2a1b1a1a1a1b1a1b1a1a", + "hits": 3 + } + ], + "defining_snp_count": 15, + "snps_known_to_foundation": 3, + "source_parent": "R1b-S206", + "source_parent_status": "matched→R1b1a1b1a1a2b2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-BY4040", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J1a3~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J1a3~", + "hits": 1 + } + ], + "defining_snp_count": 81, + "snps_known_to_foundation": 1, + "source_parent": "R1b-A7970", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R1b-CTS188", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2b2a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2b2a~", + "hits": 8 + } + ], + "defining_snp_count": 27, + "snps_known_to_foundation": 8, + "source_parent": "R1b-A7970", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "R1b-Y19233", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1b2b3b~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1b1b2b3b~", + "hits": 3 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 3, + "source_parent": "R1b-FTA27217", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-FGC57678", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "H1a1a4a", + "anchor_strength": 0.375, + "candidates": [ + { + "node": "H1a1a4a", + "hits": 3 + }, + { + "node": "J1a2a2a~", + "hits": 3 + }, + { + "node": "I2a1b1a2a1a1a1a1", + "hits": 2 + } + ], + "defining_snp_count": 90, + "snps_known_to_foundation": 8, + "source_parent": "R1b-Y17177", + "source_parent_status": "matched→R1b1a1b1a1a2b1c2a (100%)", + "is_backbone": false + }, + { + "node": "R1b-BY3554", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "G2a1a1a1a1a1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G2a1a1a1a1a1a1", + "hits": 2 + } + ], + "defining_snp_count": 78, + "snps_known_to_foundation": 2, + "source_parent": "R1b-YSC0000193", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-Y139280", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1a2a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C1a2a", + "hits": 3 + } + ], + "defining_snp_count": 90, + "snps_known_to_foundation": 3, + "source_parent": "R1b-L20", + "source_parent_status": "matched→R1b1a1b1a1a2b1a1 (100%)", + "is_backbone": false + }, + { + "node": "R1b-CTS9044", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2b1a2", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "R1b1a1b1a1a2b1a2", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2b1a3~", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2b1a4~", + "hits": 3 + } + ], + "defining_snp_count": 12, + "snps_known_to_foundation": 9, + "source_parent": "R1b-S255", + "source_parent_status": "matched→R1b1a1b1a1a2b1a (100%)", + "is_backbone": false + }, + { + "node": "R1b-FT303311", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a2a1b1b1a1a1", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "I2a2a1b1b1a1a1", + "hits": 3 + }, + { + "node": "R1a1a1b2a1a2c2a1~", + "hits": 3 + } + ], + "defining_snp_count": 75, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Z275", + "source_parent_status": "matched→R1b1a1b1a1a2b1a2a1 (100%)", + "is_backbone": false + }, + { + "node": "R1b-CTS36", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "B2b3~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "B2b3~", + "hits": 3 + }, + { + "node": "I1a2a1a1a1a2b1~", + "hits": 3 + } + ], + "defining_snp_count": 126, + "snps_known_to_foundation": 6, + "source_parent": "R1b-PF6660", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "R1b-DF99", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1b1a1b1a1a2f", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2f", + "hits": 2 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 2, + "source_parent": "R1b-P312", + "source_parent_status": "matched→R1b1a1b1a1a2 (100%)", + "is_backbone": false + }, + { + "node": "R1b-FT345031", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "L1a2a1a2~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "L1a2a1a2~", + "hits": 3 + } + ], + "defining_snp_count": 105, + "snps_known_to_foundation": 3, + "source_parent": "R1b-DF99", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "R1b-Y139461", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "Q1b", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "Q1b", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a4", + "hits": 3 + } + ], + "defining_snp_count": 129, + "snps_known_to_foundation": 6, + "source_parent": "R1b-P310", + "source_parent_status": "matched→R1b1a1b1a1 (62%)", + "is_backbone": false + }, + { + "node": "R1b-FT407478", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1a2a1a2d2b2a1c~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J1a2a1a2d2b2a1c~", + "hits": 3 + } + ], + "defining_snp_count": 108, + "snps_known_to_foundation": 3, + "source_parent": "R1b-Y19469", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "I1a-Z2540", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I1a2b", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I1a2b", + "hits": 1 + } + ], + "defining_snp_count": 15, + "snps_known_to_foundation": 1, + "source_parent": "I1a-Z58", + "source_parent_status": "matched→I1a2 (100%)", + "is_backbone": false + }, + { + "node": "I1a-FGC43913", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I1a2b1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I1a2b1", + "hits": 3 + } + ], + "defining_snp_count": 6, + "snps_known_to_foundation": 3, + "source_parent": "I1a-S2293", + "source_parent_status": "matched→I1a2b3~ (100%)", + "is_backbone": false + }, + { + "node": "I1a-BY383", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I1a2a1a1a5a1a~", + "anchor_strength": 0.375, + "candidates": [ + { + "node": "I1a2a1a1a5a1a~", + "hits": 9 + }, + { + "node": "I1a2a1a1a5a~", + "hits": 9 + }, + { + "node": "I1a2a1a1a5a1~", + "hits": 3 + }, + { + "node": "I1a2a1a1a5~", + "hits": 3 + } + ], + "defining_snp_count": 55, + "snps_known_to_foundation": 24, + "source_parent": "I1a-FGC57872", + "source_parent_status": "matched→I1a2a1a1a5~ (100%)", + "is_backbone": false + }, + { + "node": "I1a-A1915", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a4a1b", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G2a2b2a4a1b", + "hits": 3 + } + ], + "defining_snp_count": 113, + "snps_known_to_foundation": 3, + "source_parent": "I1a-A1944", + "source_parent_status": "matched→I1a2a1a1a1a1a~ (100%)", + "is_backbone": false + }, + { + "node": "I1a-FT318040", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1b2a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C1b2a", + "hits": 3 + } + ], + "defining_snp_count": 94, + "snps_known_to_foundation": 3, + "source_parent": "I1a-Y3866", + "source_parent_status": "matched→I1a1a~ (100%)", + "is_backbone": false + }, + { + "node": "I1a-FT85559", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I1a1a1b4a~", + "anchor_strength": 0.43859649122807015, + "candidates": [ + { + "node": "I1a1a1b4a~", + "hits": 25 + }, + { + "node": "I1a1a1b4~", + "hits": 15 + }, + { + "node": "I1a1a1b~", + "hits": 6 + }, + { + "node": "B2b1a2a~", + "hits": 3 + }, + { + "node": "I1a1a1b4a1~", + "hits": 3 + } + ], + "defining_snp_count": 142, + "snps_known_to_foundation": 57, + "source_parent": "I1a-Y3866", + "source_parent_status": "matched→I1a1a~ (100%)", + "is_backbone": false + }, + { + "node": "I1a-S9318", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I1a1b1g3a~", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "I1a1b1g3a~", + "hits": 3 + }, + { + "node": "I1a1b1g3~", + "hits": 3 + }, + { + "node": "I1a1b1g~", + "hits": 3 + } + ], + "defining_snp_count": 34, + "snps_known_to_foundation": 9, + "source_parent": "I1a-FGC41265", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "I2a-FGC3633", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I2a1b1a2b1a2a1", + "anchor_strength": 0.23809523809523808, + "candidates": [ + { + "node": "I2a1b1a2b1a2a1", + "hits": 5 + }, + { + "node": "I2a1b1a2b1a2a1a", + "hits": 5 + }, + { + "node": "I2a1b1a2b1a2a1a1a1a", + "hits": 5 + }, + { + "node": "I2a1b1a2b1a2a1a1a", + "hits": 3 + }, + { + "node": "I2a1b1a2b1a2a1a1a1", + "hits": 2 + } + ], + "defining_snp_count": 47, + "snps_known_to_foundation": 21, + "source_parent": "I2a-FGC3628", + "source_parent_status": "matched→I2a1b1a2b1a2a1 (100%)", + "is_backbone": false + }, + { + "node": "I2a-FT2426", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I2a1b1a2b1a2a1a1a1a1", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "I2a1b1a2b1a2a1a1a1a1", + "hits": 3 + }, + { + "node": "I2a1b1a2b1a2a1a1a1a1a", + "hits": 3 + }, + { + "node": "Q1a1a1a1a1~", + "hits": 3 + } + ], + "defining_snp_count": 16, + "snps_known_to_foundation": 6, + "source_parent": "I2a-FGC3633", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "I2a-FT255558", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1a1a1a1a1a1e5~", + "anchor_strength": 0.5384615384615384, + "candidates": [ + { + "node": "I2a1a1a1a1a1a1e5~", + "hits": 28 + }, + { + "node": "I2a1a1a1a1a1a1f2~", + "hits": 15 + }, + { + "node": "I2a1a1a1a1a1a1e5e~", + "hits": 6 + }, + { + "node": "I2a1a1a1a1a1a1e~", + "hits": 3 + } + ], + "defining_snp_count": 120, + "snps_known_to_foundation": 37, + "source_parent": "I2a-FGC93119", + "source_parent_status": "matched→I2a1a1a1a1a1a1f~ (100%)", + "is_backbone": false + }, + { + "node": "I2a-FT58949", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I2a1a1a1a1a1a2", + "anchor_strength": 0.34814814814814815, + "candidates": [ + { + "node": "I2a1a1a1a1a1a2", + "hits": 47 + }, + { + "node": "I2a1a1a1a1a1a2a1a~", + "hits": 44 + }, + { + "node": "I2a1a1a1a1a1a2a~", + "hits": 37 + }, + { + "node": "I2a1a1a1a1a1a2a1~", + "hits": 3 + }, + { + "node": "Q1a2a2~", + "hits": 3 + } + ], + "defining_snp_count": 260, + "snps_known_to_foundation": 132, + "source_parent": "I2a-Z105", + "source_parent_status": "matched→I2a1a1a1a1a1a (100%)", + "is_backbone": false + }, + { + "node": "I2a-Y7635", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "I2a1a1b1a1", + "anchor_strength": 0.3557692307692308, + "candidates": [ + { + "node": "I2a1a1b1a1", + "hits": 111 + }, + { + "node": "I2a1a1b1a", + "hits": 85 + }, + { + "node": "I2a1a1b1", + "hits": 67 + }, + { + "node": "I2a1a1b", + "hits": 22 + }, + { + "node": "D1a2a", + "hits": 3 + } + ], + "defining_snp_count": 874, + "snps_known_to_foundation": 294, + "source_parent": "I2a-FGC56761", + "source_parent_status": "matched→I2a1a1 (100%)", + "is_backbone": false + }, + { + "node": "J2b-Z2523", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J2b2a1a1a1a1a", + "anchor_strength": 0.46153846153846156, + "candidates": [ + { + "node": "J2b2a1a1a1a1a", + "hits": 6 + }, + { + "node": "J2b2a1a1a1a~", + "hits": 4 + }, + { + "node": "J2b2a1a1a1a1", + "hits": 3 + } + ], + "defining_snp_count": 26, + "snps_known_to_foundation": 13, + "source_parent": "J2b-Z8418", + "source_parent_status": "matched→J2b2a1a1a~ (75%)", + "is_backbone": false + }, + { + "node": "J2b-Z631", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J2b2a1a1a1a1a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J2b2a1a1a1a1a1a", + "hits": 18 + } + ], + "defining_snp_count": 46, + "snps_known_to_foundation": 18, + "source_parent": "J2b-Z1295", + "source_parent_status": "matched→J2b2a1a1a1a1a1b~ (50%)", + "is_backbone": false + }, + { + "node": "J2a-FT71373", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "O2a2b2a1b2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "O2a2b2a1b2", + "hits": 2 + } + ], + "defining_snp_count": 141, + "snps_known_to_foundation": 2, + "source_parent": "J2a-PF5084", + "source_parent_status": "matched→J2a1 (100%)", + "is_backbone": false + }, + { + "node": "J2a-SK1314", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1a1b", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1a1b", + "hits": 2 + } + ], + "defining_snp_count": 276, + "snps_known_to_foundation": 2, + "source_parent": "J2a-FT71373", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "J2a-Z37967", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a2", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I2a2", + "hits": 3 + } + ], + "defining_snp_count": 162, + "snps_known_to_foundation": 3, + "source_parent": "J2a-Z35827", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "J2a-FGC75665", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J2a1a1a2b2a1a1c2b1~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "J2a1a1a2b2a1a1c2b1~", + "hits": 1 + }, + { + "node": "J2a1a1a2b2a1a1c2b~", + "hits": 1 + } + ], + "defining_snp_count": 95, + "snps_known_to_foundation": 2, + "source_parent": "J2a-FGC75666", + "source_parent_status": "matched→J2a1a1a2b2a1a1c2~ (100%)", + "is_backbone": false + }, + { + "node": "J2a-Y3019", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J2a1a1a2b2a3b1b~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J2a1a1a2b2a3b1b~", + "hits": 1 + } + ], + "defining_snp_count": 14, + "snps_known_to_foundation": 1, + "source_parent": "J2a-Z6251", + "source_parent_status": "matched→J2a1a1a2b2a3b1~ (75%)", + "is_backbone": false + }, + { + "node": "J2a-FT280289", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J2a1a1a2b2a2b3a2a1a~", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "J2a1a1a2b2a2b3a2a1a~", + "hits": 3 + }, + { + "node": "J2a1a1a2b2a2b3a2a1~", + "hits": 3 + }, + { + "node": "J2a1a1a2b2a2b3a2~", + "hits": 3 + } + ], + "defining_snp_count": 200, + "snps_known_to_foundation": 9, + "source_parent": "J2a-L742", + "source_parent_status": "matched→J2a1a1a2b2a2b3 (71%)", + "is_backbone": false + }, + { + "node": "J2a-FGC15781", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J2a1a1a2a1a~", + "anchor_strength": 0.3, + "candidates": [ + { + "node": "J2a1a1a2a1a~", + "hits": 3 + }, + { + "node": "J2a1a1a2a1~", + "hits": 3 + }, + { + "node": "G2a2b2a1a1b1a1a1a1a2", + "hits": 2 + }, + { + "node": "H3a1", + "hits": 2 + } + ], + "defining_snp_count": 149, + "snps_known_to_foundation": 10, + "source_parent": "J2a-Z6065", + "source_parent_status": "matched→J2a1a1a2a (100%)", + "is_backbone": false + }, + { + "node": "J2a-Z27921", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1b1a1", + "anchor_strength": 0.17647058823529413, + "candidates": [ + { + "node": "E1b1b1b1a1", + "hits": 3 + }, + { + "node": "I", + "hits": 3 + }, + { + "node": "J2a1a1a2a1a1a~", + "hits": 3 + }, + { + "node": "J2a1a1a2a1a1~", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c2b2a1b5a", + "hits": 3 + } + ], + "defining_snp_count": 490, + "snps_known_to_foundation": 17, + "source_parent": "J2a-FGC15781", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "J2a-FT3329", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J2a1a1a2a2b", + "anchor_strength": 0.2916666666666667, + "candidates": [ + { + "node": "J2a1a1a2a2b", + "hits": 7 + }, + { + "node": "J2a1a1a2a2", + "hits": 6 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a2~", + "hits": 3 + }, + { + "node": "J2a1a1a2a2b2a~", + "hits": 3 + }, + { + "node": "J2a1a1a2a2b2~", + "hits": 3 + } + ], + "defining_snp_count": 532, + "snps_known_to_foundation": 24, + "source_parent": "J2a-Z6065", + "source_parent_status": "matched→J2a1a1a2a (100%)", + "is_backbone": false + }, + { + "node": "J2a-Z7372", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "R1a1a1b1a1a1b~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "R1a1a1b1a1a1b~", + "hits": 2 + }, + { + "node": "R1b1a1b1b1", + "hits": 2 + } + ], + "defining_snp_count": 364, + "snps_known_to_foundation": 4, + "source_parent": "J2a-PF5197", + "source_parent_status": "matched→J2a1a1b1 (100%)", + "is_backbone": false + }, + { + "node": "J2a-Z6092", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4d2a2a5", + "hits": 3 + } + ], + "defining_snp_count": 278, + "snps_known_to_foundation": 3, + "source_parent": "J2a-Z7294", + "source_parent_status": "matched→J2a1a1b1a1a (100%)", + "is_backbone": false + }, + { + "node": "J2a-FT62577", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J2b2a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J2b2a1", + "hits": 3 + } + ], + "defining_snp_count": 97, + "snps_known_to_foundation": 3, + "source_parent": "J2a-Z7294", + "source_parent_status": "matched→J2a1a1b1a1a (100%)", + "is_backbone": false + }, + { + "node": "J2a-Z7687", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J2a1a1b2a1", + "anchor_strength": 0.49122807017543857, + "candidates": [ + { + "node": "J2a1a1b2a1", + "hits": 28 + }, + { + "node": "J2a1a1b2a", + "hits": 27 + }, + { + "node": "R1a1a1b1a2b3a3a2h1a~", + "hits": 2 + } + ], + "defining_snp_count": 225, + "snps_known_to_foundation": 57, + "source_parent": "J2a-Z7680", + "source_parent_status": "matched→J2a1a1b2 (100%)", + "is_backbone": false + }, + { + "node": "J2a-A25182", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1a2b", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "C1a2b", + "hits": 3 + }, + { + "node": "J2a1a1b2a1b1b3a2a~", + "hits": 3 + } + ], + "defining_snp_count": 132, + "snps_known_to_foundation": 6, + "source_parent": "J2a-Y24651", + "source_parent_status": "matched→J2a1a1b2a1b1b3a2~ (100%)", + "is_backbone": false + }, + { + "node": "J2a-MF89074", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "L1", + "anchor_strength": 0.6, + "candidates": [ + { + "node": "L1", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2c1a4b2a1a1a", + "hits": 2 + } + ], + "defining_snp_count": 216, + "snps_known_to_foundation": 5, + "source_parent": "J2a-FGC70845", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "J2a-FT316587", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1a1a1a2a1b2~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "E1b1a1a1a2a1b2~", + "hits": 3 + }, + { + "node": "G2a2b2a1a1b1a1a3", + "hits": 3 + } + ], + "defining_snp_count": 223, + "snps_known_to_foundation": 6, + "source_parent": "J2a-FT178882", + "source_parent_status": "matched→J2a1a1b2a1a1 (100%)", + "is_backbone": false + }, + { + "node": "J2a-PF4993", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J2a2a", + "anchor_strength": 0.4634146341463415, + "candidates": [ + { + "node": "J2a2a", + "hits": 19 + }, + { + "node": "E1a2a1b1", + "hits": 3 + }, + { + "node": "H1a1a4a", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1e2a1a1~", + "hits": 3 + }, + { + "node": "J2a2a1a1a~", + "hits": 3 + } + ], + "defining_snp_count": 824, + "snps_known_to_foundation": 41, + "source_parent": "J2a-PF5030", + "source_parent_status": "matched→J2a2a (88%)", + "is_backbone": false + }, + { + "node": "J1a-CTS130", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J1a2a1a2d2b2b2c4c4a2a1~", + "anchor_strength": 0.4, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4c4a2a1~", + "hits": 6 + }, + { + "node": "J1a2a1a2d2b2b2c4c4a2a~", + "hits": 6 + }, + { + "node": "J1a2a1a2d2b2b2c4c4a2~", + "hits": 3 + } + ], + "defining_snp_count": 167, + "snps_known_to_foundation": 15, + "source_parent": "J1a-Z2291", + "source_parent_status": "matched→J1a2a1a2d2b2b2c4c4~ (80%)", + "is_backbone": false + }, + { + "node": "J1a-FGC12808", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1a2a1a2d2b2b2c4e~", + "anchor_strength": 0.5625, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4e~", + "hits": 18 + }, + { + "node": "J1a2a1a2d2b2b2c4a1~", + "hits": 12 + }, + { + "node": "G2a2a1a2a1", + "hits": 1 + }, + { + "node": "J1a2a1a2d2b2b2c4e1~", + "hits": 1 + } + ], + "defining_snp_count": 157, + "snps_known_to_foundation": 32, + "source_parent": "J1a-FGC12806", + "source_parent_status": "matched→J1a2a1a2d2b2b2c4a~ (75%)", + "is_backbone": false + }, + { + "node": "J1a-Y5585", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J1a2a1a2d2b2b2c4d2~", + "anchor_strength": 0.3684210526315789, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4d2~", + "hits": 7 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a~", + "hits": 6 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2~", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a~", + "hits": 3 + } + ], + "defining_snp_count": 30, + "snps_known_to_foundation": 19, + "source_parent": "J1a-BY6660", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "J1a-Y67920", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5a1b~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1b~", + "hits": 1 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 1, + "source_parent": "J1a-FGC4257", + "source_parent_status": "matched→J1a2a1a2d2b2b2c4d2a2a5a~ (81%)", + "is_backbone": false + }, + { + "node": "J1a-FGC3", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J1a2a1a2d2b2b2c4d2a2a5", + "anchor_strength": 0.16666666666666666, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4d2a2a5", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1e4a5a~", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1e4a5b1~", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1e4a5~", + "hits": 3 + }, + { + "node": "J1a2a1a2d2b2b2c4d2a2a5a1e4a~", + "hits": 3 + } + ], + "defining_snp_count": 15, + "snps_known_to_foundation": 15, + "source_parent": "J1a-Y9271", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "J1a-FT281164", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "C1b1a1a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "C1b1a1a", + "hits": 3 + } + ], + "defining_snp_count": 17, + "snps_known_to_foundation": 3, + "source_parent": "J1a-FGC60122", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "J1a-FTA27241", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J1a2a1a2d2b2b2c4b2a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4b2a~", + "hits": 1 + } + ], + "defining_snp_count": 194, + "snps_known_to_foundation": 1, + "source_parent": "J1a-FGC15941", + "source_parent_status": "matched→J1a2a1a2d2b2b2c4b2~ (100%)", + "is_backbone": false + }, + { + "node": "J1a-FGC35109", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "J1a2a1a2d2b2b2c4b1c1~", + "anchor_strength": 0.47058823529411764, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4b1c1~", + "hits": 16 + }, + { + "node": "J1a2a1a2d2b2b2c4b1c3~", + "hits": 9 + }, + { + "node": "J1a2a1a2d2b2b2c4b1c3a1~", + "hits": 6 + }, + { + "node": "J1a2a1a2d2b2b2c4b1c2~", + "hits": 3 + } + ], + "defining_snp_count": 117, + "snps_known_to_foundation": 28, + "source_parent": "J1a-Y3441", + "source_parent_status": "matched→J1a2a1a2d2b2b2c4b1c~ (60%)", + "is_backbone": false + }, + { + "node": "J1a-FGC86304", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J1a2a1a2d2b2b2c4b1c3a1a~", + "anchor_strength": 0.5714285714285714, + "candidates": [ + { + "node": "J1a2a1a2d2b2b2c4b1c3a1a~", + "hits": 12 + }, + { + "node": "J1a2a1a2d2b2b2c4b1c3a1a1~", + "hits": 9 + } + ], + "defining_snp_count": 43, + "snps_known_to_foundation": 21, + "source_parent": "J1a-FGC35109", + "source_parent_status": "flag_weak", + "is_backbone": false + }, + { + "node": "J1a-Z27661", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1b1a1b1a1a2b1d1a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1b1a1b1a1a2b1d1a~", + "hits": 3 + } + ], + "defining_snp_count": 263, + "snps_known_to_foundation": 3, + "source_parent": "J1a-Z2312", + "source_parent_status": "matched→J1a2a1a2d2b2b2~ (100%)", + "is_backbone": false + }, + { + "node": "D-PH4", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "D1a1a1", + "anchor_strength": 0.4435028248587571, + "candidates": [ + { + "node": "D1a1a1", + "hits": 157 + }, + { + "node": "D1a1a1a1", + "hits": 140 + }, + { + "node": "D1a1a1a1a", + "hits": 9 + }, + { + "node": "D1a1a1a1a1b", + "hits": 9 + }, + { + "node": "D1a1", + "hits": 6 + } + ], + "defining_snp_count": 1381, + "snps_known_to_foundation": 351, + "source_parent": "D", + "source_parent_status": "matched→D1 (90%)", + "is_backbone": false + }, + { + "node": "E2-FT322364", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E2b1a1d", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E2b1a1d", + "hits": 1 + } + ], + "defining_snp_count": 12, + "snps_known_to_foundation": 1, + "source_parent": "E2-V1001", + "source_parent_status": "matched→E2b1a1 (98%)", + "is_backbone": false + }, + { + "node": "E1a-ACT19", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1a2c~", + "anchor_strength": 0.75, + "candidates": [ + { + "node": "E1a2c~", + "hits": 9 + }, + { + "node": "N1a1a1a1a2a1a1a1a1a1a1c~", + "hits": 3 + } + ], + "defining_snp_count": 350, + "snps_known_to_foundation": 12, + "source_parent": "E1a-Z15084", + "source_parent_status": "matched→E1a2b (97%)", + "is_backbone": false + }, + { + "node": "E1a-PF7332", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "J2b1b2a~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "J2b1b2a~", + "hits": 3 + } + ], + "defining_snp_count": 135, + "snps_known_to_foundation": 3, + "source_parent": "E1a-Z15172", + "source_parent_status": "matched→E1a2a2 (97%)", + "is_backbone": false + }, + { + "node": "E1b-Y161059", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I2a1b1a1b1a1a2b", + "anchor_strength": 0.6, + "candidates": [ + { + "node": "I2a1b1a1b1a1a2b", + "hits": 3 + }, + { + "node": "R1a1a1b1a2a2a1b~", + "hits": 2 + } + ], + "defining_snp_count": 261, + "snps_known_to_foundation": 5, + "source_parent": "E1b-V264", + "source_parent_status": "matched→E1b1b1a1a2 (67%)", + "is_backbone": false + }, + { + "node": "E1b-V1174", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1a1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1b1a1", + "hits": 3 + } + ], + "defining_snp_count": 37, + "snps_known_to_foundation": 3, + "source_parent": "E1b-Y31640", + "source_parent_status": "matched→E1b1b1a1a2a1 (100%)", + "is_backbone": false + }, + { + "node": "E1b-S1954", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1a1a2", + "anchor_strength": 0.3333333333333333, + "candidates": [ + { + "node": "E1b1b1a1a2", + "hits": 3 + }, + { + "node": "I1a2a1a1a1a", + "hits": 3 + }, + { + "node": "J1a2a1a2c1a", + "hits": 3 + } + ], + "defining_snp_count": 75, + "snps_known_to_foundation": 9, + "source_parent": "E1b-V1174", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "E1b-MF736421", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1a1a1c1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1b1a1a1c1", + "hits": 1 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 1, + "source_parent": "E1b-FGC2177", + "source_parent_status": "matched→E1b1b1a1a1c1 (100%)", + "is_backbone": false + }, + { + "node": "E1b-Y2846", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1a1a1c1b1a1a~", + "anchor_strength": 0.5714285714285714, + "candidates": [ + { + "node": "E1b1b1a1a1c1b1a1a~", + "hits": 12 + }, + { + "node": "E1b1b1a1a1c1b1a~", + "hits": 6 + }, + { + "node": "E1b1b1a1a1c1b1a1~", + "hits": 3 + } + ], + "defining_snp_count": 55, + "snps_known_to_foundation": 21, + "source_parent": "E1b-Y2881", + "source_parent_status": "matched→E1b1b1a1a1c1b1c~ (58%)", + "is_backbone": false + }, + { + "node": "E1b-V4490", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1a1a1c3~", + "anchor_strength": 0.5384615384615384, + "candidates": [ + { + "node": "E1b1b1a1a1c3~", + "hits": 7 + }, + { + "node": "E1b1a1b", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2c1a5d3", + "hits": 3 + } + ], + "defining_snp_count": 175, + "snps_known_to_foundation": 13, + "source_parent": "E1b-FGC2177", + "source_parent_status": "matched→E1b1b1a1a1c1 (100%)", + "is_backbone": false + }, + { + "node": "E1b-V4258", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1a1a1b1", + "anchor_strength": 0.45901639344262296, + "candidates": [ + { + "node": "E1b1b1a1a1b1", + "hits": 28 + }, + { + "node": "E1b1b1a1a1b1a3", + "hits": 20 + }, + { + "node": "E1b1b1a1a1b1a~", + "hits": 10 + }, + { + "node": "L1a1b3a2b~", + "hits": 3 + } + ], + "defining_snp_count": 187, + "snps_known_to_foundation": 61, + "source_parent": "E1b-Z21175", + "source_parent_status": "matched→E1b1b1a1a1b1a3 (67%)", + "is_backbone": false + }, + { + "node": "E1b-PF2234", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1a1b1", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1b1a1b1", + "hits": 1 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 1, + "source_parent": "E1b-Y4359", + "source_parent_status": "matched→E1b1b1a1b1 (94%)", + "is_backbone": false + }, + { + "node": "E1b-FT318574", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a1a1c4", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G2a2b2a1a1c4", + "hits": 3 + } + ], + "defining_snp_count": 289, + "snps_known_to_foundation": 3, + "source_parent": "E1b-FT38444", + "source_parent_status": "matched→E1b1b1a1b2a4b1~ (50%)", + "is_backbone": false + }, + { + "node": "E1b-FT208247", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1b1a1h1~", + "anchor_strength": 0.42857142857142855, + "candidates": [ + { + "node": "E1b1b1b1a1h1~", + "hits": 6 + }, + { + "node": "E1b1b1b1a1h~", + "hits": 6 + }, + { + "node": "E1b1a1a1a1c1a1a3a1d1b", + "hits": 2 + } + ], + "defining_snp_count": 110, + "snps_known_to_foundation": 14, + "source_parent": "E1b-A930", + "source_parent_status": "matched→E1b1b1b1a1h~ (100%)", + "is_backbone": false + }, + { + "node": "E1b-A2227", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1b1a1f2b~", + "anchor_strength": 0.35294117647058826, + "candidates": [ + { + "node": "E1b1b1b1a1f2b~", + "hits": 6 + }, + { + "node": "E1b1b1b1a1f~", + "hits": 4 + }, + { + "node": "E1b1b1b1a1f2a~", + "hits": 3 + }, + { + "node": "E1b1b1b1a2~", + "hits": 3 + }, + { + "node": "E1b1b1b1a1f2~", + "hits": 1 + } + ], + "defining_snp_count": 106, + "snps_known_to_foundation": 17, + "source_parent": "E1b-M5198", + "source_parent_status": "matched→E1b1b1b1a1 (100%)", + "is_backbone": false + }, + { + "node": "E1b-Y141591", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1b1a2~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "E1b1b1b1a2~", + "hits": 6 + }, + { + "node": "B2b1a2a~", + "hits": 3 + }, + { + "node": "G2a2b2a1a1a1b1a2a1", + "hits": 2 + }, + { + "node": "C1b1a2a", + "hits": 1 + } + ], + "defining_snp_count": 118, + "snps_known_to_foundation": 12, + "source_parent": "E1b-Z21096", + "source_parent_status": "matched→E1b1b1b1a1 (100%)", + "is_backbone": false + }, + { + "node": "E1b-Y4975", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1b2a1a6d~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1b1b2a1a6d~", + "hits": 9 + } + ], + "defining_snp_count": 9, + "snps_known_to_foundation": 9, + "source_parent": "E1b-Z838", + "source_parent_status": "matched→E1b1b1b2a1a5~ (52%)", + "is_backbone": false + }, + { + "node": "E1b-Z20936", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1b2a1a6~", + "anchor_strength": 0.6575342465753424, + "candidates": [ + { + "node": "E1b1b1b2a1a6~", + "hits": 48 + }, + { + "node": "E1b1b1b2a1a~", + "hits": 9 + }, + { + "node": "E1b1b1b2a1a6c1~", + "hits": 7 + }, + { + "node": "E1b1b1b2a1a6c~", + "hits": 3 + }, + { + "node": "I1a10b2a~", + "hits": 3 + } + ], + "defining_snp_count": 211, + "snps_known_to_foundation": 70, + "source_parent": "E1b-Z838", + "source_parent_status": "matched→E1b1b1b2a1a5~ (52%)", + "is_backbone": false + }, + { + "node": "E1b-Y17226", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1b2a1a1a1a1b2b1", + "anchor_strength": 0.4, + "candidates": [ + { + "node": "E1b1b1b2a1a1a1a1b2b1", + "hits": 6 + }, + { + "node": "E1b1b1b2a1a1a1a1b2b", + "hits": 3 + }, + { + "node": "E1b1b1b2a1a1a1a1b2b1a", + "hits": 3 + }, + { + "node": "E1b1b1b2a1a1a1a1b2b1a1", + "hits": 3 + } + ], + "defining_snp_count": 17, + "snps_known_to_foundation": 15, + "source_parent": "E1b-Y15423", + "source_parent_status": "matched→E1b1b1b2a1a1a1a1b2~ (98%)", + "is_backbone": false + }, + { + "node": "E1b-FGC7911", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "I1a1b1a4a2e1a1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "I1a1b1a4a2e1a1~", + "hits": 3 + } + ], + "defining_snp_count": 255, + "snps_known_to_foundation": 3, + "source_parent": "E1b-FGC18372", + "source_parent_status": "matched→E1b1b1b2a1a1a1a1a1a~ (100%)", + "is_backbone": false + }, + { + "node": "E1b-Z20966", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1b1b2a1a1a1a1f1~", + "anchor_strength": 0.6666666666666666, + "candidates": [ + { + "node": "E1b1b1b2a1a1a1a1f1~", + "hits": 6 + }, + { + "node": "E1b1b1b2a1a1a1a1f~", + "hits": 3 + } + ], + "defining_snp_count": 24, + "snps_known_to_foundation": 9, + "source_parent": "E1b-Z20968", + "source_parent_status": "matched→E1b1b1b2a1a1a1a1e~ (100%)", + "is_backbone": false + }, + { + "node": "E1b-Z20984", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1b2a1a1a1a1f1b1a", + "anchor_strength": 0.42105263157894735, + "candidates": [ + { + "node": "E1b1b1b2a1a1a1a1f1b1a", + "hits": 24 + }, + { + "node": "E1b1b1b2a1a1a1a1f1b", + "hits": 15 + }, + { + "node": "E1b1b1b2a1a1a1a1f1b1", + "hits": 9 + }, + { + "node": "D1", + "hits": 3 + }, + { + "node": "E1b1b1b2a1a1a1a1f1b1a1", + "hits": 3 + } + ], + "defining_snp_count": 85, + "snps_known_to_foundation": 51, + "source_parent": "E1b-Z20966", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "E1b-Z20900", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1b2b2a1a1~", + "anchor_strength": 0.4444444444444444, + "candidates": [ + { + "node": "E1b1b1b2b2a1a1~", + "hits": 24 + }, + { + "node": "E1b1b1b2b2a1a~", + "hits": 18 + }, + { + "node": "E1b1b1b2b2a1a1a", + "hits": 9 + }, + { + "node": "E1b1b1b2b", + "hits": 3 + } + ], + "defining_snp_count": 151, + "snps_known_to_foundation": 54, + "source_parent": "E1b-L364", + "source_parent_status": "matched→E1b1b1b2b2a1a~ (71%)", + "is_backbone": false + }, + { + "node": "E1b-MF121627", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E", + "anchor_strength": 0.75, + "candidates": [ + { + "node": "E", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a2c1a6b~", + "hits": 1 + } + ], + "defining_snp_count": 144, + "snps_known_to_foundation": 4, + "source_parent": "E1b-CTS275", + "source_parent_status": "matched→E1b1a1a1a1 (100%)", + "is_backbone": false + }, + { + "node": "E1b-Y81422", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E", + "hits": 3 + } + ], + "defining_snp_count": 156, + "snps_known_to_foundation": 3, + "source_parent": "E1b-Z36529", + "source_parent_status": "matched→E1b1a1a1a1c4~ (75%)", + "is_backbone": false + }, + { + "node": "E1b-Z22359", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1a1a1a1c1a1a3e", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1a1a1a1c1a1a3e", + "hits": 2 + } + ], + "defining_snp_count": 2, + "snps_known_to_foundation": 2, + "source_parent": "E1b-Z5962", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "E1b-Z1656", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1a1a1a1c1a1a3a1d1b1", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "E1b1a1a1a1c1a1a3a1d1b1", + "hits": 6 + }, + { + "node": "E1b1a1a1a1c1a1a3a1d1b1b1", + "hits": 6 + } + ], + "defining_snp_count": 17, + "snps_known_to_foundation": 6, + "source_parent": "E1b-Z22617", + "source_parent_status": "matched→E1b1a1a1a1c1a1a3a1d1b1b (100%)", + "is_backbone": false + }, + { + "node": "E1b-FT206082", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "R1a1a1b1a3a3a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "R1a1a1b1a3a3a", + "hits": 3 + } + ], + "defining_snp_count": 88, + "snps_known_to_foundation": 3, + "source_parent": "E1b-FT52771", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "E1b-FT325004", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "E1b1a1a1a1c1a1a3a1d1b1a1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1a1a1a1c1a1a3a1d1b1a1~", + "hits": 3 + } + ], + "defining_snp_count": 19, + "snps_known_to_foundation": 3, + "source_parent": "E1b-FT206082", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": false + }, + { + "node": "E1b-Y196451", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1a1a1a1c1a1a3a1c", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1a1a1a1c1a1a3a1c", + "hits": 1 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 1, + "source_parent": "E1b-Z1704", + "source_parent_status": "matched→E1b1a1a1a1c1a1a3a1 (100%)", + "is_backbone": false + }, + { + "node": "E1b-PF7223", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1a1a1a1c1a1a3c1b", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1a1a1a1c1a1a3c1b", + "hits": 1 + } + ], + "defining_snp_count": 3, + "snps_known_to_foundation": 1, + "source_parent": "E1b-Z1705", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "E1b-V4311", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1b1a1b1a", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "E1b1b1a1b1a", + "hits": 2 + }, + { + "node": "H1a2b1a", + "hits": 2 + } + ], + "defining_snp_count": 177, + "snps_known_to_foundation": 4, + "source_parent": "E1b-CTS275", + "source_parent_status": "matched→E1b1a1a1a1 (100%)", + "is_backbone": false + }, + { + "node": "E1b-FT399996", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b1a1a1a1a1~", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "G2a2b1a1a1a1a1~", + "hits": 3 + } + ], + "defining_snp_count": 42, + "snps_known_to_foundation": 3, + "source_parent": "E1b-FT399583", + "source_parent_status": "novel", + "is_backbone": false + }, + { + "node": "E1b-Z5953", + "category": "weak_plurality", + "reason": "No single foundation node holds a majority of the node's defining SNPs (SNP-sparse or scattered placement).", + "best_anchor": "E1b1a1a1a2a1a3b1a2a", + "anchor_strength": 1.0, + "candidates": [ + { + "node": "E1b1a1a1a2a1a3b1a2a", + "hits": 2 + } + ], + "defining_snp_count": 6, + "snps_known_to_foundation": 2, + "source_parent": "E1b-Z1725", + "source_parent_status": "matched→E1b1a1a1a2a1a3b1a2 (100%)", + "is_backbone": false + }, + { + "node": "E1b-Y55741", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "G2a2b2a1a1b1a1a2b1b3~", + "anchor_strength": 0.5, + "candidates": [ + { + "node": "G2a2b2a1a1b1a1a2b1b3~", + "hits": 3 + }, + { + "node": "R1b1a1b1a1a1c2b2a1b1a", + "hits": 3 + } + ], + "defining_snp_count": 219, + "snps_known_to_foundation": 6, + "source_parent": "E1b-V4257", + "source_parent_status": "matched→E1b1a1a1b~ (57%)", + "is_backbone": false + }, + { + "node": "A00", + "category": "parent_inconsistent", + "reason": "Anchor is not at/below the parent's anchor, or its major clade differs (possible recurrent-SNP cross-lineage anchor or topology disagreement).", + "best_anchor": "A00", + "anchor_strength": 0.9720149253731343, + "candidates": [ + { + "node": "A00", + "hits": 3126 + }, + { + "node": "C1a1", + "hits": 6 + }, + { + "node": "A00b", + "hits": 3 + }, + { + "node": "B3", + "hits": 3 + }, + { + "node": "C", + "hits": 3 + } + ], + "defining_snp_count": 6548, + "snps_known_to_foundation": 3210, + "source_parent": "Y", + "source_parent_status": "flag_weak", + "is_backbone": true + }, + { + "node": "BT", + "category": "name_collision", + "reason": "Source node name matches an existing foundation node but defines different SNPs (no SNP overlap) — reconcile or rename.", + "best_anchor": "BT", + "anchor_strength": 0.0, + "candidates": [], + "defining_snp_count": 1117, + "snps_known_to_foundation": 0, + "source_parent": "A1b", + "source_parent_status": "flag_parent_inconsistent", + "is_backbone": true + }, + { + "node": "I1b", + "category": "name_collision", + "reason": "Source node name matches an existing foundation node but defines different SNPs (no SNP overlap) — reconcile or rename.", + "best_anchor": "I1b", + "anchor_strength": 0.0, + "candidates": [], + "defining_snp_count": 4, + "snps_known_to_foundation": 0, + "source_parent": "I1", + "source_parent_status": "matched→I1 (97%)", + "is_backbone": false + } + ], + "graft_blocked": [ + "A0-V151", + "A1a-V4436", + "A1a-Z11346", + "A1a-Z11371", + "A1a-S15194", + "A1a-Z38529", + "A1b1-V1018", + "A1b1-V4244", + "A1b1-Y23900", + "A1b1-Y23869", + "A1b1-Y156153", + "A1b1-V7285", + "A1b1-V5912", + "A1b1-Y30506", + "A1b1-V5321", + "A1b1-BY16278", + "A1b1-FGC38354", + "A1b1-V1879", + "A1b1-Y161035", + "A1b1-V1559", + "A1b1-M9705", + "A1b1-Y76912", + "A1b1-Y161020", + "B-M6503", + "B-FT334006", + "C-F15201", + "G-Y197456", + "G-chrY:25320800 G->A", + "O-FGC86", + "O-FT272461", + "O-FTB26900", + "O-CP086569.2:12299902 C->G", + "O-BY153273", + "N-CTS277", + "N-FT324649", + "S-F17185", + "P2", + "R2-FT327021", + "R2-FGC56232", + "R2-FGC61992", + "R2-FGC61453", + "R1a-Y183609", + "R1a-S7737", + "R1b-FT300167", + "R1b-Y21408", + "R1b-S9535", + "R1b-Y8604", + "R1b-BY68252", + "R1b-chrY:5321421 C->T", + "R1b-BY25301", + "R1b-BY25309", + "R1b-chrY:25774450 A->G", + "R1b-A7209", + "R1b-Y94610", + "R1b-FGC34162", + "R1b-Y61666", + "R1b-FGC52315", + "R1b-S8350", + "R1b-FGC15226", + "R1b-BY675", + "R1b-A24483", + "R1b-FGC62105", + "R1b-A11391", + "R1b-ZS10825", + "R1b-FGC13773", + "R1b-FT2802", + "R1b-A9904", + "R1b-FGC29291", + "R1b-BY42407", + "R1b-A27511", + "R1b-Z17687", + "R1b-Y83959", + "R1b-A10891", + "R1b-BY11465", + "R1b-CTS606", + "R1b-Z29713", + "R1b-FT299988", + "R1b-PH2007", + "R1b-CTS8001", + "R1b-Y22894", + "R1b-Y22889", + "R1b-Y30858", + "R1b-BY208342", + "R1b-BY202532", + "R1b-CTS416", + "R1b-FTA51551", + "R1b-Y20968", + "R1b-FT173909", + "R1b-FT300231", + "R1b-FGC60524", + "R1b-Y15850", + "R1b-Y176774", + "R1b-BY63479", + "R1b-BY59223", + "R1b-Y228967", + "R1b-Y139200", + "R1b-S16136", + "I2a-FT73935", + "J2b-CP086569.2:25487852 C->T", + "J2a-Y60112", + "J2a-Z35822", + "J2a-Z35827", + "J2a-FT3472", + "J2a-FT294597", + "J2a-Z7433", + "J2a-Z7391", + "J2a-FT3373", + "J2a-FT171820", + "J2a-FT171833", + "J2a-Z28070", + "J2a-FT317295", + "J2a-FT324728", + "J2a-Z7274", + "J2a-Z7261", + "J2a-CP086569.2:12398698 T->G", + "J1a-FT316616", + "J1a-FT283260", + "J1a-ZS5596", + "J1a-FT299872", + "J1a-FT299874", + "J1a-BY89355", + "J1a-FT14822", + "E1b-Z6370", + "E1b-A18841", + "E1b-Z5018", + "E1b-Y145455", + "E1b-FGC3479", + "E1b-K695", + "E1b-Y161200", + "E1b-FT400626" + ] +} \ No newline at end of file diff --git a/documents/planning/appview-pds-backfeed-system.md b/documents/planning/appview-pds-backfeed-system.md deleted file mode 100644 index e544feb8..00000000 --- a/documents/planning/appview-pds-backfeed-system.md +++ /dev/null @@ -1,1346 +0,0 @@ -# AppView-to-PDS Backfeed System - -## Executive Summary - -This document describes the **backfeed methodology** for keeping researcher/citizen PDS records synchronized with computed and curated data from the DecodingUs AppView. When DecodingUs refines haplogroup assignments, discovers new branches, identifies potential matches, or updates any derived data, these changes must flow back to the user's PDS so they always have the most current metadata. - ---- - -## Problem Statement - -The current Atmosphere architecture is primarily **unidirectional**: - -``` -Researcher/Citizen PDS → Firehose → DecodingUs AppView → Database -``` - -However, DecodingUs performs significant post-ingestion processing on **metadata only**: - -1. **Haplogroup Refinement**: Tree updates may refine `R-L21` to `R-L21>FT54321` -2. **Branch Discovery**: Private variants may be promoted to official branches -3. **Ancestral STR Reconstruction**: Compute modal STR haplotypes for tree branches using submitted STR profiles -4. **TMRCA Estimation**: Age estimates computed from STR variance across the network -5. **Potential Match Discovery**: Identify potential genetic matches across the network for user exploration -6. **Confirmed Match Stamping**: Record when both parties agree on a match result -7. **Lab Inference**: Sequencer instrument-to-lab mappings from metadata - -### Edge Computing Model - -**Critical Architecture Principle**: Raw genomic data (BAM/CRAM/VCF/genotype files) **never** flows to DecodingUs. All raw data analysis happens locally in the Navigator Workbench: - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ EDGE COMPUTING ARCHITECTURE │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ Navigator Workbench (Edge) │ │ -│ │ │ │ -│ │ Raw Data Analysis (LOCAL ONLY - never transmitted): │ │ -│ │ • BAM/CRAM alignment and coverage metrics │ │ -│ │ • Variant calling from sequence data │ │ -│ │ • Haplogroup determination (Y-DNA, mtDNA) │ │ -│ │ • STR extraction from WGS │ │ -│ │ • Ancestry composition / admixture analysis │ │ -│ │ • IBD segment detection (autosomal) │ │ -│ │ │ │ -│ │ Output → Summary metadata synced to PDS │ │ -│ │ │ │ -│ └──────────────────────────────┬───────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ User's PDS (Metadata Only) │ │ -│ │ │ │ -│ │ • biosample (haplogroup assignments, coverage stats) │ │ -│ │ • strProfile (STR marker values - needed for tree building) │ │ -│ │ • alignment (metrics summary, not raw alignments) │ │ -│ │ • populationBreakdown (admixture percentages) │ │ -│ │ • Private Y-DNA/mtDNA SNPs (for branch discovery) │ │ -│ │ │ │ -│ └──────────────────────────────┬───────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ DecodingUs AppView │ │ -│ │ │ │ -│ │ Aggregation & Network Intelligence: │ │ -│ │ • Haplogroup tree refinement (from network-wide SNP data) │ │ -│ │ • Ancestral STR reconstruction (from submitted STR profiles) │ │ -│ │ • TMRCA estimation (from STR variance across samples) │ │ -│ │ • Potential match identification (metadata comparison) │ │ -│ │ • Branch discovery consensus (aggregate private variants) │ │ -│ │ │ │ -│ │ NEVER receives: BAM, CRAM, VCF, FASTQ, raw genotype files │ │ -│ │ │ │ -│ └─────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### Data That DOES Flow to DecodingUs (via PDS) - -| Data Type | Purpose | Why Needed | -|:---|:---|:---| -| Haplogroup assignments | Tree placement | Network-wide refinement | -| Private Y-DNA SNPs | Branch discovery | Consensus detection for new branches | -| Private mtDNA SNPs | Branch discovery | Consensus detection for new branches | -| STR marker values | Ancestral reconstruction | Modal haplotype & TMRCA calculation | -| Coverage/quality metrics | Sample characterization | Match quality assessment | -| Ancestry percentages | Population context | Computed locally, shared as summary | - -### Data That NEVER Flows to DecodingUs - -| Data Type | Why Excluded | -|:---|:---| -| BAM/CRAM files | Raw sequence data - analyzed locally | -| VCF files | Full variant calls - only private SNPs shared | -| FASTQ files | Raw reads - never leave the workbench | -| Genotype chip data | Raw calls - ancestry computed locally | -| IBD segments | Sensitive relationship data - only match confirmation shared | - -Without backfeed, user PDS records become stale and diverge from the AppView's refined understanding. - -### Current Gap - -The Atmosphere Lexicon defines records that the AppView **writes to user PDS** (e.g., `matchList`, `haplogroupAncestralStr`), but lacks: - -1. A systematic enumeration of all backfeed scenarios -2. New record types for AppView-computed updates -3. Authorization model for AppView writing to user PDS -4. Conflict resolution when local and remote changes collide -5. Notification mechanism for users to see what changed -6. Audit trail for all AppView-initiated updates - ---- - -## Backfeed Categories - -### Category 1: AppView-Authored Records - -Records created entirely by the AppView and pushed to user PDS. User cannot create these directly. - -| Record Type | Trigger | Content | -|:---|:---|:---| -| `potentialMatchList` | Network analysis identifies candidates | List of potential matches for user to explore | -| `confirmedMatch` | Both parties agree on match result | Stamped match record with agreed details | -| `haplogroupAncestralStr` | STR reconstruction runs | Ancestral modal haplotype for haplogroup branch | - -**Note**: `populationBreakdown` is computed locally in the Workbench and synced to PDS by the user, NOT authored by AppView. - -### Category 2: AppView-Updated Records - -Records created by the user (via Workbench) but updated by the AppView when network intelligence provides new information. - -| Record Type | Field(s) Updated | Trigger | -|:---|:---|:---| -| `biosample` | `haplogroups.yDna.haplogroupName` | Tree update refines terminal haplogroup | -| `biosample` | `haplogroups.mtDna.haplogroupName` | Tree update refines terminal haplogroup | -| `biosample` | `haplogroups.*.privateVariants` | Private variants reclassified as known branch | -| `biosample` | `haplogroups.*.lineagePath` | Tree restructuring changes ancestry path | - -**Note**: `alignment.metrics`, `strProfile`, and `populationBreakdown` are computed locally and NOT updated by AppView. - -### Category 3: AppView-Notification Records - -New record types to notify users of changes without modifying their source records. - -| Record Type | Purpose | -|:---|:---| -| `haplogroupUpdate` | Notify of haplogroup refinement from tree update | -| `branchDiscovery` | Notify that user's private variants became official branch | -| `treeVersionUpdate` | Notify that reference tree version changed (may affect assignments) | - ---- - -## New Lexicon Records for Backfeed - -### 1. Haplogroup Update Notification (`com.decodingus.atmosphere.haplogroupUpdate`) - -Sent to user's PDS when their biosample's haplogroup assignment changes. - -**NSID:** `com.decodingus.atmosphere.haplogroupUpdate` - -**Author:** AppView (DecodingUs) - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.haplogroupUpdate", - "defs": { - "main": { - "type": "record", - "description": "Notification that a biosample's haplogroup assignment has been refined or corrected.", - "key": "tid", - "record": { - "type": "object", - "required": ["meta", "atUri", "biosampleRef", "updateType", "lineage", "previous", "current"], - "properties": { - "atUri": { - "type": "string", - "description": "The AT URI of this update notification." - }, - "meta": { - "type": "ref", - "ref": "com.decodingus.atmosphere.defs#recordMeta" - }, - "biosampleRef": { - "type": "string", - "description": "AT URI of the biosample that was updated." - }, - "updateType": { - "type": "string", - "description": "Type of haplogroup update.", - "knownValues": ["REFINEMENT", "CORRECTION", "BRANCH_DISCOVERY", "TREE_UPDATE", "RECLASSIFICATION"] - }, - "lineage": { - "type": "string", - "description": "Which lineage was updated.", - "knownValues": ["Y_DNA", "MT_DNA"] - }, - "previous": { - "type": "ref", - "ref": "#haplogroupState", - "description": "The previous haplogroup assignment." - }, - "current": { - "type": "ref", - "ref": "#haplogroupState", - "description": "The new haplogroup assignment." - }, - "reason": { - "type": "string", - "description": "Human-readable explanation of why the change occurred." - }, - "treeVersion": { - "type": "string", - "description": "Haplogroup tree version that triggered the update (e.g., 'ISOGG-2025.1')." - }, - "effectiveAt": { - "type": "string", - "format": "datetime", - "description": "When this update took effect." - }, - "acknowledgement": { - "type": "ref", - "ref": "#updateAcknowledgement", - "description": "User's acknowledgement of the update (optional)." - } - } - } - }, - "haplogroupState": { - "type": "object", - "description": "Snapshot of a haplogroup assignment at a point in time.", - "required": ["haplogroupName"], - "properties": { - "haplogroupName": { - "type": "string" - }, - "score": { - "type": "float" - }, - "treeDepth": { - "type": "integer" - }, - "lineagePath": { - "type": "array", - "items": { "type": "string" } - } - } - }, - "updateAcknowledgement": { - "type": "object", - "description": "User's acknowledgement of an update.", - "properties": { - "acknowledgedAt": { - "type": "string", - "format": "datetime" - }, - "accepted": { - "type": "boolean", - "description": "True if user accepts, false if they dispute." - }, - "disputeReason": { - "type": "string", - "description": "Reason for disputing (if accepted=false)." - } - } - } - } -} -``` - -### 2. Branch Discovery Notification (`com.decodingus.atmosphere.branchDiscovery`) - -Sent when a user's private variants have been promoted to an official haplogroup branch. - -**NSID:** `com.decodingus.atmosphere.branchDiscovery` - -**Author:** AppView (DecodingUs) - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.branchDiscovery", - "defs": { - "main": { - "type": "record", - "description": "Notification that private variants from a biosample have been promoted to an official branch.", - "key": "tid", - "record": { - "type": "object", - "required": ["meta", "atUri", "biosampleRef", "newBranchName", "definingVariants", "discoveredAt"], - "properties": { - "atUri": { - "type": "string", - "description": "The AT URI of this discovery notification." - }, - "meta": { - "type": "ref", - "ref": "com.decodingus.atmosphere.defs#recordMeta" - }, - "biosampleRef": { - "type": "string", - "description": "AT URI of the biosample that contributed to the discovery." - }, - "lineage": { - "type": "string", - "description": "Which lineage (Y-DNA or mtDNA).", - "knownValues": ["Y_DNA", "MT_DNA"] - }, - "parentBranch": { - "type": "string", - "description": "The parent haplogroup from which the new branch descends." - }, - "newBranchName": { - "type": "string", - "description": "Name of the newly discovered branch (e.g., 'R-FT54321')." - }, - "definingVariants": { - "type": "array", - "description": "The variants that define this new branch.", - "items": { - "type": "ref", - "ref": "com.decodingus.atmosphere.defs#variantCall" - } - }, - "contributingSamples": { - "type": "integer", - "description": "Number of biosamples that share these variants." - }, - "discoveredAt": { - "type": "string", - "format": "datetime", - "description": "When the branch was officially added to the tree." - }, - "curatorNotes": { - "type": "string", - "description": "Optional notes from the curator who approved the branch." - }, - "citationDoi": { - "type": "string", - "description": "DOI of publication if branch was discovered through academic research." - } - } - } - } - } -} -``` - -### 3. Tree Version Update Notification (`com.decodingus.atmosphere.treeVersionUpdate`) - -Sent when the haplogroup reference tree is updated, which may affect user's assignments. - -**NSID:** `com.decodingus.atmosphere.treeVersionUpdate` - -**Author:** AppView (DecodingUs) - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.treeVersionUpdate", - "defs": { - "main": { - "type": "record", - "description": "Notification that the haplogroup reference tree has been updated.", - "key": "tid", - "record": { - "type": "object", - "required": ["meta", "atUri", "lineage", "previousVersion", "newVersion", "effectiveAt"], - "properties": { - "atUri": { - "type": "string", - "description": "The AT URI of this tree update notification." - }, - "meta": { - "type": "ref", - "ref": "com.decodingus.atmosphere.defs#recordMeta" - }, - "lineage": { - "type": "string", - "description": "Which lineage tree was updated.", - "knownValues": ["Y_DNA", "MT_DNA"] - }, - "previousVersion": { - "type": "string", - "description": "Previous tree version (e.g., 'ISOGG-2024.12')." - }, - "newVersion": { - "type": "string", - "description": "New tree version (e.g., 'ISOGG-2025.01')." - }, - "effectiveAt": { - "type": "string", - "format": "datetime", - "description": "When the new tree version became active." - }, - "affectedBiosamples": { - "type": "array", - "description": "List of user's biosamples that may be affected.", - "items": { - "type": "ref", - "ref": "#affectedBiosample" - } - }, - "changelogUrl": { - "type": "string", - "format": "uri", - "description": "URL to the tree changelog/release notes." - }, - "summary": { - "type": "string", - "description": "Human-readable summary of changes relevant to user." - } - } - } - }, - "affectedBiosample": { - "type": "object", - "description": "A biosample potentially affected by tree changes.", - "required": ["biosampleRef", "currentHaplogroup"], - "properties": { - "biosampleRef": { - "type": "string", - "description": "AT URI of the affected biosample." - }, - "currentHaplogroup": { - "type": "string", - "description": "Current haplogroup assignment." - }, - "mayChange": { - "type": "boolean", - "description": "True if this biosample's assignment may change." - }, - "suggestedAction": { - "type": "string", - "description": "Recommended action (e.g., 'Re-analyze in Workbench').", - "knownValues": ["NONE", "REVIEW", "REANALYZE"] - } - } - } - } -} -``` - ---- - -## Collaborative Matching Model - -Unlike centralized DNA matching services that compute matches server-side, DecodingUs uses a **collaborative discovery** model where: - -1. **AppView identifies potential matches** across the network based on shared haplogroups, STR similarity, or other criteria -2. **Users explore candidates** in their Workbench (Navigator), choosing which to investigate -3. **Both parties must agree** on the match result before it's stamped as confirmed -4. **Confirmed matches** are written to both users' PDS as permanent records - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ COLLABORATIVE MATCHING FLOW │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─────────────┐ ┌─────────────┐ │ -│ │ Alice's │ │ Bob's │ │ -│ │ Workbench │ │ Workbench │ │ -│ └──────┬──────┘ └──────┬──────┘ │ -│ │ │ │ -│ │ 1. AppView identifies potential match │ │ -│ │◀─────────────────────────────────────────▶│ │ -│ │ (written to both PDS as candidates) │ │ -│ │ │ │ -│ │ 2. Alice explores match in Workbench │ │ -│ │─────▶ Reviews STR comparison │ │ -│ │─────▶ Compares haplogroup branches │ │ -│ │─────▶ Initiates match confirmation │ │ -│ │ │ │ -│ │ 3. AppView notifies Bob of request │ │ -│ │──────────────────────────────────────────▶│ │ -│ │ │ │ -│ │ 4. Bob reviews and confirms match │ │ -│ │◀──────────────────────────────────────────│ │ -│ │ │ │ -│ │ 5. AppView stamps confirmed match │ │ -│ │◀─────────────────────────────────────────▶│ │ -│ │ (written to BOTH PDS) │ │ -│ │ │ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### 4. Potential Match List Record (`com.decodingus.atmosphere.potentialMatchList`) - -List of potential matches identified by the AppView for user exploration. - -**NSID:** `com.decodingus.atmosphere.potentialMatchList` - -**Author:** AppView (DecodingUs) - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.potentialMatchList", - "defs": { - "main": { - "type": "record", - "description": "List of potential genetic matches for user to explore in Workbench.", - "key": "tid", - "record": { - "type": "object", - "required": ["meta", "atUri", "biosampleRef", "candidates"], - "properties": { - "atUri": { - "type": "string", - "description": "The AT URI of this potential match list." - }, - "meta": { - "type": "ref", - "ref": "com.decodingus.atmosphere.defs#recordMeta" - }, - "biosampleRef": { - "type": "string", - "description": "AT URI of the biosample these candidates relate to." - }, - "candidateCount": { - "type": "integer", - "description": "Total number of potential matches." - }, - "lastUpdatedAt": { - "type": "string", - "format": "datetime", - "description": "When candidate list was last refreshed." - }, - "candidates": { - "type": "array", - "description": "List of potential match candidates.", - "items": { - "type": "ref", - "ref": "#matchCandidate" - } - } - } - } - }, - "matchCandidate": { - "type": "object", - "description": "A potential match candidate for user exploration.", - "required": ["candidateBiosampleRef", "matchType", "similarity"], - "properties": { - "candidateBiosampleRef": { - "type": "string", - "description": "AT URI of the potential match's biosample." - }, - "candidateDid": { - "type": "string", - "description": "DID of the potential match (if they consent to visibility)." - }, - "matchType": { - "type": "string", - "description": "Type of potential match.", - "knownValues": ["Y_STR", "Y_SNP_HAPLOGROUP", "MT_HAPLOGROUP", "AUTOSOMAL_IBD"] - }, - "similarity": { - "type": "float", - "description": "Similarity score (0.0-1.0) for ranking candidates." - }, - "sharedHaplogroup": { - "type": "string", - "description": "Common haplogroup if Y-DNA or mtDNA match." - }, - "geneticDistance": { - "type": "integer", - "description": "STR genetic distance if Y-STR match." - }, - "estimatedRelationship": { - "type": "string", - "description": "Rough relationship estimate based on match type." - }, - "identifiedAt": { - "type": "string", - "format": "datetime", - "description": "When this candidate was identified." - }, - "status": { - "type": "string", - "description": "Current status of this candidate.", - "knownValues": ["NEW", "VIEWED", "EXPLORING", "PENDING_CONFIRMATION", "CONFIRMED", "DECLINED"] - } - } - } - } -} -``` - -### 5. Confirmed Match Record (`com.decodingus.atmosphere.confirmedMatch`) - -A confirmed match stamped by the AppView after both parties agree. - -**NSID:** `com.decodingus.atmosphere.confirmedMatch` - -**Author:** AppView (DecodingUs) - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.confirmedMatch", - "defs": { - "main": { - "type": "record", - "description": "A confirmed genetic match agreed upon by both parties.", - "key": "tid", - "record": { - "type": "object", - "required": ["meta", "atUri", "biosampleRef", "matchedBiosampleRef", "matchType", "confirmedAt"], - "properties": { - "atUri": { - "type": "string", - "description": "The AT URI of this confirmed match record." - }, - "meta": { - "type": "ref", - "ref": "com.decodingus.atmosphere.defs#recordMeta" - }, - "biosampleRef": { - "type": "string", - "description": "AT URI of this user's biosample." - }, - "matchedBiosampleRef": { - "type": "string", - "description": "AT URI of the matched biosample." - }, - "matchedCitizenDid": { - "type": "string", - "description": "DID of the matched citizen." - }, - "matchType": { - "type": "string", - "description": "Type of confirmed match.", - "knownValues": ["Y_STR", "Y_SNP_HAPLOGROUP", "MT_HAPLOGROUP", "AUTOSOMAL_IBD"] - }, - "matchDetails": { - "type": "ref", - "ref": "#confirmedMatchDetails", - "description": "Detailed match information based on match type." - }, - "confirmedAt": { - "type": "string", - "format": "datetime", - "description": "When both parties confirmed the match." - }, - "initiatedBy": { - "type": "string", - "description": "DID of the party who initiated confirmation." - }, - "confirmedBy": { - "type": "string", - "description": "DID of the party who accepted confirmation." - }, - "notes": { - "type": "string", - "description": "Optional notes about the match relationship." - } - } - } - }, - "confirmedMatchDetails": { - "type": "object", - "description": "Detailed match metrics based on match type.", - "properties": { - "sharedHaplogroup": { - "type": "string", - "description": "Common haplogroup (Y-DNA or mtDNA matches)." - }, - "geneticDistance": { - "type": "integer", - "description": "STR genetic distance (Y-STR matches)." - }, - "tmrcaEstimate": { - "type": "object", - "description": "Estimated time to most recent common ancestor.", - "properties": { - "generations": { "type": "integer" }, - "yearsBeforePresent": { "type": "integer" }, - "confidenceInterval": { - "type": "object", - "properties": { - "lower": { "type": "integer" }, - "upper": { "type": "integer" } - } - } - } - }, - "sharedCm": { - "type": "float", - "description": "Total shared centiMorgans (autosomal IBD matches)." - }, - "segmentCount": { - "type": "integer", - "description": "Number of shared segments (autosomal IBD matches)." - }, - "relationshipEstimate": { - "type": "string", - "description": "Estimated relationship based on match data.", - "knownValues": ["PARENT_CHILD", "SIBLING", "GRANDPARENT", "AUNT_UNCLE", "1ST_COUSIN", - "2ND_COUSIN", "3RD_COUSIN", "4TH_COUSIN", "DISTANT", "UNKNOWN"] - } - } - } - } -} -``` - -### 6. Sync Status Record (`com.decodingus.atmosphere.syncStatus`) - -A record in the user's PDS tracking the sync state with the AppView. - -**NSID:** `com.decodingus.atmosphere.syncStatus` - -**Author:** AppView (DecodingUs) - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.syncStatus", - "defs": { - "main": { - "type": "record", - "description": "Tracks synchronization status between user's PDS and the AppView.", - "key": "literal:self", - "record": { - "type": "object", - "required": ["meta", "atUri", "lastSyncAt", "appViewVersion"], - "properties": { - "atUri": { - "type": "string", - "description": "The AT URI of this sync status record." - }, - "meta": { - "type": "ref", - "ref": "com.decodingus.atmosphere.defs#recordMeta" - }, - "lastSyncAt": { - "type": "string", - "format": "datetime", - "description": "Last successful sync with AppView." - }, - "appViewVersion": { - "type": "string", - "description": "Version of the DecodingUs AppView." - }, - "treeVersions": { - "type": "object", - "description": "Current haplogroup tree versions used.", - "properties": { - "yDna": { "type": "string" }, - "mtDna": { "type": "string" } - } - }, - "pendingUpdates": { - "type": "integer", - "description": "Number of pending updates to be applied." - }, - "unacknowledgedNotifications": { - "type": "integer", - "description": "Number of notifications user hasn't acknowledged." - }, - "biosampleSyncStates": { - "type": "array", - "description": "Per-biosample sync status.", - "items": { - "type": "ref", - "ref": "#biosampleSyncState" - } - } - } - } - }, - "biosampleSyncState": { - "type": "object", - "description": "Sync state for a single biosample.", - "required": ["biosampleRef", "status"], - "properties": { - "biosampleRef": { - "type": "string", - "description": "AT URI of the biosample." - }, - "status": { - "type": "string", - "description": "Current sync status.", - "knownValues": ["SYNCED", "PENDING_UPDATE", "UPDATE_AVAILABLE", "CONFLICT", "ERROR"] - }, - "lastUpdatedAt": { - "type": "string", - "format": "datetime" - }, - "pendingFields": { - "type": "array", - "description": "Fields with pending updates.", - "items": { "type": "string" } - } - } - } - } -} -``` - ---- - -## Backfeed Authorization Model - -### AppView Service Account - -The DecodingUs AppView operates as a service account with delegated write access to user PDS records. - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ Authorization Flow │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ 1. User authenticates with Navigator or Web UI │ -│ 2. User grants "AppView Write" scope to DecodingUs │ -│ 3. DecodingUs receives delegated credential (DPoP-bound access token) │ -│ 4. AppView uses credential to write backfeed records to user's PDS │ -│ │ -│ ┌─────────────────────┐ │ -│ │ User PDS │ │ -│ │ │ │ -│ │ Scopes granted to │ │ -│ │ DecodingUs: │ │ -│ │ │ │ -│ │ ✓ read:biosample │ (read user's biosamples) │ -│ │ ✓ write:potentialMatches │ (write potential match candidates) │ -│ │ ✓ write:confirmedMatch │ (stamp confirmed matches) │ -│ │ ✓ write:update │ (write update notifications) │ -│ │ ✓ update:biosample │ (update haplogroup fields) │ -│ │ │ │ -│ └─────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### Scope Definitions - -| Scope | Allows | -|:---|:---| -| `com.decodingus.atmosphere:read` | Read all Atmosphere records | -| `com.decodingus.atmosphere:write:potentialMatches` | Create/update potential match candidate lists | -| `com.decodingus.atmosphere:write:confirmedMatch` | Stamp confirmed matches when both parties agree | -| `com.decodingus.atmosphere:write:notification` | Create notification records (updates, discoveries) | -| `com.decodingus.atmosphere:update:biosample` | Update specific fields on biosample records | -| `com.decodingus.atmosphere:write:syncStatus` | Maintain sync status record | - -### Consent Flow - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ AppView Consent Dialog (shown in Navigator or Web) │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ DecodingUs AppView is requesting access to your PDS: │ -│ │ -│ ┌─ Requested Permissions ────────────────────────────────────────────┐ │ -│ │ │ │ -│ │ ☑ Read your biosample records │ │ -│ │ Allow DecodingUs to read your genomic metadata │ │ -│ │ │ │ -│ │ ☑ Update your haplogroup assignments │ │ -│ │ Automatically apply refined haplogroups when tree updates │ │ -│ │ │ │ -│ │ ☑ Write potential match candidates │ │ -│ │ Notify you of potential genetic matches to explore │ │ -│ │ │ │ -│ │ ☑ Stamp confirmed matches │ │ -│ │ Record matches when both you and your match agree │ │ -│ │ │ │ -│ │ ☑ Send update notifications │ │ -│ │ Notify you when your data is updated │ │ -│ │ │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ [Grant Access] [Deny] │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - ---- - -## Backfeed Processing Pipeline - -### Architecture - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ DecodingUs Backend (AppView) │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ Event Sources │ │ -│ ├──────────────────┬──────────────────┬───────────────────────────┤ │ -│ │ Tree Update Job │ Match Discovery │ Analysis Pipeline │ │ -│ │ (scheduled) │ (network scan) │ (on file upload) │ │ -│ └────────┬─────────┴────────┬─────────┴─────────────┬─────────────┘ │ -│ │ │ │ │ -│ ▼ ▼ ▼ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ Backfeed Event Queue │ │ -│ │ (Kafka topic: decodingus.backfeed.events) │ │ -│ └──────────────────────────────┬──────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ Backfeed Processor Service │ │ -│ │ │ │ -│ │ 1. Retrieve user's delegated credential │ │ -│ │ 2. Build appropriate Lexicon record │ │ -│ │ 3. Write record to user's PDS │ │ -│ │ 4. Update local sync tracking │ │ -│ │ 5. Handle failures with retry │ │ -│ │ │ │ -│ └──────────────────────────────┬──────────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ PDS Write Client │ │ -│ │ (AT Protocol XRPC: com.atproto.repo.createRecord/putRecord) │ │ -│ └─────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ - │ - ▼ - ┌──────────────────────────┐ - │ User's PDS │ - │ - Updated records │ - │ - New notifications │ - │ - Sync status │ - └──────────────────────────┘ -``` - -### Event Types - -```scala -sealed trait BackfeedEvent { - def citizenDid: String - def biosampleAtUri: String - def priority: BackfeedPriority -} - -case class HaplogroupRefinementEvent( - citizenDid: String, - biosampleAtUri: String, - lineage: Lineage, - previousHaplogroup: String, - newHaplogroup: String, - reason: HaplogroupUpdateReason, - treeVersion: String, - priority: BackfeedPriority = BackfeedPriority.Normal -) extends BackfeedEvent - -case class BranchDiscoveryEvent( - citizenDid: String, - biosampleAtUri: String, - lineage: Lineage, - newBranchName: String, - parentBranch: String, - definingVariantIds: Seq[Int], - contributingSampleCount: Int, - priority: BackfeedPriority = BackfeedPriority.High -) extends BackfeedEvent - -case class PotentialMatchesEvent( - citizenDid: String, - biosampleAtUri: String, - candidateCount: Int, - newCandidates: Int, - removedCandidates: Int, - priority: BackfeedPriority = BackfeedPriority.Normal -) extends BackfeedEvent - -case class ConfirmedMatchEvent( - citizenDid: String, - biosampleAtUri: String, - matchedCitizenDid: String, - matchedBiosampleAtUri: String, - sharedCm: Float, - segmentCount: Int, - confirmedAt: Instant, - priority: BackfeedPriority = BackfeedPriority.High -) extends BackfeedEvent - -case class AnalysisCompleteEvent( - citizenDid: String, - biosampleAtUri: String, - analysisType: AnalysisType, - updatedRecordAtUri: String, - pipelineVersion: String, - priority: BackfeedPriority = BackfeedPriority.Low -) extends BackfeedEvent - -enum BackfeedPriority: - case High // Branch discovery, major haplogroup change, confirmed match - case Normal // Regular updates, potential matches - case Low // Analysis reruns, minor updates - -enum HaplogroupUpdateReason: - case TreeUpdate // Reference tree was updated - case BranchDiscovery // New branch added from consensus - case Correction // Manual curator correction - case Reclassification // Nomenclature change - case RefinedAnalysis // Better analysis with same data -``` - -### Processing Logic - -```scala -class BackfeedProcessorService( - pdsClient: PdsWriteClient, - credentialStore: DelegatedCredentialStore, - syncTracker: SyncTracker -) { - - def processEvent(event: BackfeedEvent): Future[BackfeedResult] = { - for { - // 1. Get user's delegated credential - credential <- credentialStore.getCredential(event.citizenDid) - .flatMap { - case Some(cred) if cred.isValid => Future.successful(cred) - case Some(cred) => refreshCredential(cred) - case None => Future.failed(NoCredentialException(event.citizenDid)) - } - - // 2. Build the appropriate record(s) - records <- buildRecords(event) - - // 3. Write to user's PDS - results <- Future.traverse(records) { record => - pdsClient.writeRecord( - credential = credential, - collection = record.collection, - record = record.data, - rkey = record.rkey - ) - } - - // 4. Update local sync tracking - _ <- syncTracker.recordBackfeed(event, results) - - // 5. Update user's syncStatus record - _ <- updateSyncStatus(credential, event.citizenDid) - - } yield BackfeedResult.Success(results.map(_.atUri)) - } - - private def buildRecords(event: BackfeedEvent): Future[Seq[BackfeedRecord]] = { - event match { - case e: HaplogroupRefinementEvent => - for { - // Create notification record - notification <- buildHaplogroupUpdateNotification(e) - // Optionally update biosample directly if user consented - biosampleUpdate <- if (autoUpdateEnabled(e.citizenDid)) { - buildBiosampleHaplogroupUpdate(e).map(Some(_)) - } else Future.successful(None) - } yield Seq(notification) ++ biosampleUpdate.toSeq - - case e: BranchDiscoveryEvent => - buildBranchDiscoveryNotification(e).map(Seq(_)) - - case e: PotentialMatchesEvent => - buildPotentialMatchesRecord(e).map(Seq(_)) - - case e: ConfirmedMatchEvent => - // Stamp confirmed match in BOTH parties' PDS - for { - record1 <- buildConfirmedMatchRecord(e, e.citizenDid) - record2 <- buildConfirmedMatchRecord(e, e.matchedCitizenDid) - } yield Seq(record1, record2) - - case e: AnalysisCompleteEvent => - buildAnalysisUpdateNotification(e).map(Seq(_)) - } - } -} -``` - ---- - -## Conflict Resolution - -### Scenario: Local and Remote Changes - -When Navigator syncs a locally-modified biosample that the AppView also updated: - -``` -Timeline: -───────────────────────────────────────────────────────────────────────── - t1: User syncs biosample with haplogroup R-L21 (atCid: abc123) - t2: AppView refines to R-L21>FT54321, writes to PDS (atCid: def456) - t3: User edits description locally (still has atCid: abc123) - t4: User attempts sync → CONFLICT (atCid mismatch) -───────────────────────────────────────────────────────────────────────── -``` - -### Resolution Strategy - -```scala -enum ConflictResolutionStrategy: - case AppViewWins // AppView-computed fields always win - case UserWins // User's local changes always win - case FieldLevel // Merge at field level - case Manual // Require user decision - -val fieldResolutionRules: Map[String, ConflictResolutionStrategy] = Map( - // AppView-computed fields - AppView always wins - "haplogroups.yDna.haplogroupName" -> ConflictResolutionStrategy.AppViewWins, - "haplogroups.yDna.score" -> ConflictResolutionStrategy.AppViewWins, - "haplogroups.yDna.lineagePath" -> ConflictResolutionStrategy.AppViewWins, - "haplogroups.mtDna.haplogroupName" -> ConflictResolutionStrategy.AppViewWins, - "haplogroups.mtDna.score" -> ConflictResolutionStrategy.AppViewWins, - "haplogroups.mtDna.lineagePath" -> ConflictResolutionStrategy.AppViewWins, - - // User-editable fields - User wins - "description" -> ConflictResolutionStrategy.UserWins, - "alias" -> ConflictResolutionStrategy.UserWins, - "donorIdentifier" -> ConflictResolutionStrategy.UserWins, - - // Complex fields - Manual resolution - "haplogroups.yDna.privateVariants" -> ConflictResolutionStrategy.Manual -) -``` - -### Navigator Conflict UI - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ Sync Conflict Detected │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ Biosample: VIK-003 │ -│ │ -│ Your local version and the AppView version have both changed. │ -│ │ -│ ┌─ Automatic Resolution ─────────────────────────────────────────────┐ │ -│ │ │ │ -│ │ ✓ Y-DNA Haplogroup: Using AppView value │ │ -│ │ Local: R-L21 AppView: R-L21>FT54321 │ │ -│ │ (AppView-computed fields always use latest refinement) │ │ -│ │ │ │ -│ │ ✓ Description: Using your local value │ │ -│ │ Local: "Updated analysis notes" │ │ -│ │ AppView: "Deep WGS of Proband" │ │ -│ │ (User-editable fields preserve your changes) │ │ -│ │ │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─ Requires Your Decision ───────────────────────────────────────────┐ │ -│ │ │ │ -│ │ ⚠ Private Variants │ │ -│ │ │ │ -│ │ Local version has 5 private variants │ │ -│ │ AppView version has 3 (2 were promoted to R-L21>FT54321) │ │ -│ │ │ │ -│ │ ( ) Keep my 5 private variants │ │ -│ │ (•) Accept AppView's 3 (2 are now part of official branch) │ │ -│ │ ( ) Review each variant individually │ │ -│ │ │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ [Apply Resolution] [Cancel] │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - ---- - -## Notification Aggregation - -To avoid overwhelming users with individual notifications, the AppView aggregates updates: - -### Daily Digest Record (`com.decodingus.atmosphere.updateDigest`) - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.updateDigest", - "defs": { - "main": { - "type": "record", - "description": "Daily digest of all updates for a user's biosamples.", - "key": "tid", - "record": { - "type": "object", - "required": ["meta", "atUri", "periodStart", "periodEnd", "summary"], - "properties": { - "atUri": { "type": "string" }, - "meta": { "type": "ref", "ref": "com.decodingus.atmosphere.defs#recordMeta" }, - "periodStart": { "type": "string", "format": "datetime" }, - "periodEnd": { "type": "string", "format": "datetime" }, - "summary": { - "type": "object", - "properties": { - "haplogroupUpdates": { "type": "integer" }, - "branchDiscoveries": { "type": "integer" }, - "newPotentialMatches": { "type": "integer" }, - "confirmedMatches": { "type": "integer" }, - "analysisUpdates": { "type": "integer" } - } - }, - "updateRefs": { - "type": "array", - "description": "AT URIs of individual update notifications.", - "items": { "type": "string" } - }, - "highlights": { - "type": "array", - "description": "Most significant updates to call out.", - "items": { "type": "ref", "ref": "#digestHighlight" } - } - } - } - }, - "digestHighlight": { - "type": "object", - "properties": { - "type": { "type": "string", "knownValues": ["BRANCH_DISCOVERY", "CONFIRMED_MATCH", "HAPLOGROUP_REFINEMENT", "NEW_POTENTIAL_MATCH"] }, - "biosampleRef": { "type": "string" }, - "message": { "type": "string" } - } - } - } -} -``` - ---- - -## Implementation Phases - -### Phase 1: Notification Infrastructure -- Implement `haplogroupUpdate` and `branchDiscovery` notification records -- Build backfeed event queue and processor -- Establish delegated credential storage and management -- Create basic Navigator UI for viewing notifications - -### Phase 2: Collaborative Matching -- Implement `potentialMatchList` record for match candidates -- Add `confirmedMatch` record stamping when both parties agree -- Build match exploration UI in Navigator Workbench -- Implement match confirmation workflow - -### Phase 3: Direct Record Updates -- Implement `biosample.haplogroups` field updates with user consent -- Implement conflict resolution logic -- Extend Navigator sync to handle AppView-modified records - -### Phase 4: Full Sync Loop -- Add `syncStatus` record management -- Implement `updateDigest` for daily summaries -- Build comprehensive Navigator sync dashboard -- Add push notifications (optional) - -### Phase 5: Advanced Features -- Real-time WebSocket updates for immediate notification -- Selective sync (user can pause certain update types) -- Audit log accessible to users -- Dispute workflow for incorrect haplogroup assignments - ---- - -## Database Schema Additions - -```sql --- Track delegated credentials for PDS write access -CREATE TABLE pds_delegated_credential ( - id SERIAL PRIMARY KEY, - citizen_did TEXT NOT NULL UNIQUE, - access_token TEXT NOT NULL, - refresh_token TEXT, - token_type TEXT DEFAULT 'DPoP', - scopes TEXT[] NOT NULL, - expires_at TIMESTAMP NOT NULL, - created_at TIMESTAMP DEFAULT NOW(), - updated_at TIMESTAMP DEFAULT NOW() -); - --- Track backfeed events sent to user PDS -CREATE TABLE backfeed_event_log ( - id SERIAL PRIMARY KEY, - citizen_did TEXT NOT NULL, - biosample_at_uri TEXT NOT NULL, - event_type TEXT NOT NULL, - event_payload JSONB NOT NULL, - record_at_uri TEXT, - status TEXT DEFAULT 'PENDING', -- PENDING, SENT, FAILED, RETRYING - attempts INT DEFAULT 0, - last_attempt_at TIMESTAMP, - error_message TEXT, - created_at TIMESTAMP DEFAULT NOW(), - sent_at TIMESTAMP -); - -CREATE INDEX idx_backfeed_event_status ON backfeed_event_log(status); -CREATE INDEX idx_backfeed_event_citizen ON backfeed_event_log(citizen_did); - --- Track sync state per biosample -CREATE TABLE biosample_sync_state ( - id SERIAL PRIMARY KEY, - citizen_did TEXT NOT NULL, - biosample_at_uri TEXT NOT NULL, - local_at_cid TEXT, - appview_at_cid TEXT, - last_user_update TIMESTAMP, - last_appview_update TIMESTAMP, - sync_status TEXT DEFAULT 'SYNCED', -- SYNCED, PENDING_USER, PENDING_APPVIEW, CONFLICT - pending_fields TEXT[], - created_at TIMESTAMP DEFAULT NOW(), - updated_at TIMESTAMP DEFAULT NOW(), - - UNIQUE(citizen_did, biosample_at_uri) -); -``` - ---- - -## Open Questions - -1. **Notification Retention**: How long should notification records be kept in user's PDS? - -2. **Credential Refresh**: How to handle expired credentials when user hasn't connected Navigator in months? - -3. **Rate Limiting**: What limits should apply to AppView writes to prevent abuse? - -4. **User Preferences**: Should users be able to opt out of specific update types? - -5. **Offline Users**: How to queue updates for users whose PDS is temporarily unreachable? - -6. **Multi-AppView**: If user grants access to multiple AppViews, how to coordinate? - -7. **Match Confirmation Timeout**: How long should a match confirmation request remain pending before expiring? - -8. **Potential Match Criteria**: What thresholds (STR distance, shared haplogroup depth) qualify someone as a potential match? - ---- - -## Related Documents - -- [Atmosphere Lexicon Design](../Atmosphere_Lexicon.md) - Base record schemas -- [PDS Workbench Biosample Flow](../proposals/pds-workbench-biosample-flow.md) - Forward flow design -- [Haplogroup Discovery System](./haplogroup-discovery-system.md) - Branch discovery triggers -- [IBD Matching System](./ibd-matching-system.md) - Potential match identification criteria diff --git a/documents/planning/d1-encrypted-edge-exchange.md b/documents/planning/d1-encrypted-edge-exchange.md new file mode 100644 index 00000000..8bbe146d --- /dev/null +++ b/documents/planning/d1-encrypted-edge-exchange.md @@ -0,0 +1,282 @@ +# D1 — Encrypted Edge-to-Edge Exchange + AppView Broker + +**Status:** Design (v0, 2026-06-06). The shared foundation in the AppView roadmap +(`design-roadmap-rust-rewrite.md` §5). **Cross-repo:** specifies both the AppView +**broker** (decodingus) and the Navigator/Edge **exchange endpoint** (DUNavigator). +**Supersedes/generalizes** the crypto + Edge-coordination sections of the +original IBD requirements (now folded into D3), and is the substrate the Navigator FTDNA design +(`ftdna-project-import.md` §8) calls for. + +## 1. Purpose + +One encrypted, consent-gated, peer-to-peer exchange substrate that carries **any** +sensitive payload between two AT-Proto identities (DIDs), with AppView acting only +as a **broker** that never sees plaintext. Two consumers at launch: + +- **IBD comparison** — exchange encrypted variant positions / segment boundaries for + Edge-to-Edge IBD detection (the original IBD use; see D3). +- **Genealogy PII** — exchange member names, MDKA, kit↔subject linkage, and + PII-bearing assertions between **co-admins** of a project (FTDNA platform, §8). + +Both are the same problem: *get sensitive data from one Edge to another, with mutual +consent, without any server holding it.* Build the channel once. + +## 2. Invariants (non-negotiable) + +1. **AppView never sees plaintext.** It brokers discovery, consent, key-exchange + messages, and (optionally) relays **ciphertext only**. No PII, no genetic data, + no session keys at rest on AppView — ever. (Preserves the "anonymized-only" + posture; roadmap §3.) +2. **Dual consent precedes any key exchange.** Both DIDs must sign a consent record; + the broker verifies **both signatures** before notifying either Edge to begin. +3. **Forward secrecy.** Every session uses **ephemeral** ECDH keys; compromise of a + long-term key does not decrypt past sessions. +4. **Verifiable peer identity.** Session keys are bound to each peer's **DID + identity key** (Ed25519), so a peer cannot be impersonated and AppView cannot + MITM (it never holds a usable key). +5. **Plaintext at rest only on the Edge, encrypted.** Received PII/variants are + stored locally (Navigator SQLite), encrypted at rest; never re-uploaded. +6. **Least metadata.** The broker learns *that* A and B exchanged, when, and rough + size — the same social-graph metadata it already has from match requests. It + learns nothing about content. Padding/batching mitigations in §11. + +## 3. Role split + +| | **Edge (Navigator)** | **Broker (AppView)** | +| --- | --- | --- | +| Holds plaintext (PII/variants) | ✅ local, encrypted | ❌ never | +| Long-term identity key (Ed25519) | ✅ (via PDS/DID) | verifies signatures only | +| Ephemeral session keys (X25519) | ✅ generates/rotates | ❌ | +| Discovery / intent | consumes suggestions | ✅ generates (IBD suggestions; project co-membership) | +| Consent records | signs, writes to PDS | ✅ mirrors + **verifies dual-signature** | +| Key-exchange messages | sends/receives | ✅ **relays** (opaque) | +| Ciphertext payload | encrypts/decrypts | ✅ **blind relay** (store-and-forward) or ❌ (direct P2P) | +| Post-exchange action | IBD: compute+attest · Genealogy: decrypt+fold locally | indexes attestations (IBD) / records exchange-occurred | + +This is the IBD doc's split (§ "Edge App Responsibilities" / "DecodingUs +Responsibilities"), generalized beyond IBD. + +## 4. Cryptographic suite + +Reaffirms the IBD spec, with the identity-binding gap fixed: + +``` +Identity / signatures: Ed25519 (AT Proto DID key; already in du-atproto) +Key agreement: X25519 ECDH (NEW — add x25519-dalek) +Session key derivation: HKDF-SHA-256 +Payload encryption: AES-256-GCM (AEAD; per-message random 96-bit IV) +Integrity / summaries: SHA-256 +``` + +**The identity-binding fix.** The IBD doc says "keys derived from PDS signing keys +(verifiable)" — but the DID identity key is **Ed25519 (a signing key); it cannot do +ECDH**. Resolution: each participant publishes a **static X25519 exchange public +key** as an **Ed25519-signed PDS record** (`com.decodingus.exchange.publicKey`). The +signature ties the X25519 key to the DID, so a peer fetches it, verifies the +signature against the DID's identity key (`du-atproto::signature::verify_did_key`), +and trusts it. (Do **not** birationally map Ed25519→X25519; publish a dedicated key.) + +**Per-session handshake (X3DH-lite, gives forward secrecy):** +- Each peer holds: static `IK_x25519` (published, signed) + a fresh **ephemeral** + `EK_x25519` per session. +- Shared secret `= ECDH(IK_A, EK_B) ‖ ECDH(EK_A, IK_B) ‖ ECDH(EK_A, EK_B)` → + `HKDF-SHA-256` → a session key. Static×ephemeral binds identity; ephemeral× + ephemeral gives forward secrecy. +- Session key encrypts payloads with AES-256-GCM (fresh IV per message; `seq` + counter in AAD to order/dedupe). Keys **rotated per session** (IBD doc). + +## 5. Handshake & session state machine (generic) + +``` + Edge A AppView (broker) Edge B + │ 1. intent (suggestion / co-membership) │ │ + │◀──────────────────────────────────────────│ │ + │ 2. exchange_request (signed PDS record) │ │ + │───────────────────────────────────────────▶ mirror + notify B │ + │ │──────────────────────▶│ + │ │ 3. consent (signed) │ + │ │◀──────────────────────│ + │ verify BOTH signatures (dual-consent gate) │ + │ 4. exchange-ready {partnerDid, partnerExchangeKeyUri} │ + │◀───────────────────────────────────────────────────────────────▶│ + │ 5. ECDH: publish/fetch static keys, swap ephemeral EK (relayed) │ + │◀───────────────── key-exchange messages (opaque) ───────────────▶│ + │ 6. encrypted payload ── blind relay (ciphertext) ──▶ │ + │ │──────────────────────▶│ + │ │ (B decrypts locally) │ + │ 7a. IBD: B computes, both sign + attest → AppView indexes │ + │ 7b. Genealogy: B folds PII locally; ack (exchange-occurred) │ +``` + +Steps 1–4 are the broker's job (PII-free); steps 5–7 are Edge-to-Edge (opaque to the +broker). The state machine generalizes IBD's Phase 1–4 (per D3): +**intent → request → dual-consent → exchange-ready → ECDH → encrypted +exchange → attest/ack.** + +## 6. Transport — DECIDED (2026-06-06): AppView-hosted blind relay primary, direct P2P later + +Edges are **desktop apps that are rarely online simultaneously**, and live behind +NAT. So: + +> **Default: AppView-hosted blind store-and-forward relay.** The sender posts an +> opaque envelope (ciphertext + minimal routing header) to the broker; it is held +> until the recipient pulls it, then **deleted on ack** (or on TTL). AppView can +> read **none** of it — it sees `{from_did, to_did, session_id, seq, size, +> created_at}` and an opaque blob. This is consistent with Invariant 1: a transport +> buffer of ciphertext is **not** a PII store. + +- **Why relay, not PDS-as-mailbox:** putting the ciphertext in a public AT-Proto + record would leak the *envelope metadata to the whole network*; the relay keeps it + within AppView, which already knows the social graph from consent records. Relay + also handles offline peers and TTL cleanly. +- **Why relay, not direct P2P (for now):** direct P2P (QUIC/WebRTC + NAT traversal) + needs both peers online and a signaling/TURN path — more moving parts for the + common "other admin is offline" case. **Direct P2P is a later optimization** for + large payloads when both are online; the relay remains the fallback. +- Relay caps: per-envelope size limit, TTL (e.g. 7 days), at-rest encryption of the + blob on AppView (defense-in-depth; it's already ciphertext), rate limits. + +*(Open: confirm relay-primary vs. P2P-primary — §12 Q1. Recommended: relay-primary.)* + +## 7. The generic envelope & payload families + +``` +ExchangeEnvelope { + session_id: UUID, + seq: u64, // ordering / replay guard (in AEAD AAD) + purpose: 'IBD_AUTOSOMAL' | 'IBD_Y' | 'IBD_MT' | 'GENEALOGY_PII' | ..., + payload_type:'VARIANT_POSITIONS' | 'SEGMENT_BOUNDARIES' + | 'SUBJECT_BUNDLE' | 'PII_ASSERTION', + iv: [u8;12], + ciphertext: Vec, // AES-256-GCM + auth_tag: [u8;16], +} +``` + +**Payload families (plaintext shapes, defined per consumer):** +- **IBD** — `VARIANT_POSITIONS` / `SEGMENT_BOUNDARIES` (unchanged from the IBD doc). +- **Genealogy** — `SUBJECT_BUNDLE`: `{ subject_id, external_ids[] (kit#…), member_name, + mdka[], notes }` (the PII the FTDNA importer holds, §4.2/§4.3 of the Navigator + doc); `PII_ASSERTION`: a single scoped assertion whose value contains PII + (`mdka_is`, `note`-with-name). These are exactly the records the Navigator design + routes to "encrypted P2P only" (§8.4). + +The envelope is payload-agnostic; consumers register a `purpose`/`payload_type` and a +post-decrypt handler. + +## 8. Broker schema (PII-free) — generalize `ibd.match_*` into `exchange.*` + +The existing `ibd.match_request` / `ibd.match_consent` (mig 0007) are the IBD-specific +seed. Generalize to a purpose-tagged `exchange` schema that IBD and genealogy share; +IBD's tables become a specialization (or a view) keyed by `purpose='IBD_*'`. + +``` +exchange.exchange_request ( + request_uri TEXT PRIMARY KEY, -- at:// URI of the signed PDS record + initiator_did TEXT NOT NULL, + partner_did TEXT NOT NULL, + purpose TEXT NOT NULL, -- IBD_* | GENEALOGY_PII + scope TEXT, -- e.g. 'project:' (consent boundary) + status TEXT NOT NULL, -- PENDING/CONSENTED/DECLINED/CANCELLED/EXPIRED + details JSONB NOT NULL DEFAULT '{}', created_at, updated_at +); +exchange.exchange_consent ( + id, request_uri REFERENCES exchange_request, consenting_did, consent_given BOOL, + consent_uri TEXT, signature TEXT NOT NULL, created_at -- both sigs verified +); +exchange.exchange_session ( + session_id UUID PRIMARY KEY, request_uri REFERENCES exchange_request, + status TEXT, -- ESTABLISHING/ACTIVE/COMPLETE/EXPIRED + created_at, expires_at +); +exchange.relay_envelope ( -- the blind buffer; ciphertext only + id, session_id REFERENCES exchange_session, from_did, to_did, seq INT, + size_bytes INT, blob BYTEA NOT NULL, -- opaque AES-GCM ciphertext envelope + created_at, expires_at, delivered_at -- deleted on ack or TTL +); +exchange.exchange_publickey ( -- mirror of the published, signed X25519 key + did TEXT PRIMARY KEY, x25519_pub BYTEA NOT NULL, key_uri TEXT, sig_verified_at +); +``` + +**Note:** `relay_envelope.blob` holds **ciphertext only**; storing it does **not** +violate Invariant 1 (AppView cannot decrypt it; it isn't a PII row). IBD's +`ibd_discovery_index` / `ibd_pds_attestation` keep their existing roles (attestation +indexing) downstream of a completed session. + +## 9. Code placement + +- **New shared crate `du-exchange`** (in `decodingus-shared`, used by Navigator and + the eventual Edge/IBD logic): X25519 (`x25519-dalek`), HKDF-SHA-256, AES-256-GCM + (`aes-gcm`), the `ExchangeEnvelope` (de)serialization, the X3DH-lite session + derivation, and the published-key record format. Pure Rust, no PII knowledge. +- **`du-atproto`** already provides Ed25519 signing/verification + DID resolution — + reused for the signed key record and consent signatures (no change beyond adding + the key-record helpers). +- **Navigator `navigator-sync`** gains the Edge endpoint: publish/fetch the exchange + key record (via `PdsClient`), the relay client (post/pull/ack envelopes against the + broker), and the session driver. Builds on the existing `Session`/`PdsClient`. +- **AppView `du-web` + `du-db`**: the `exchange.*` query module + broker endpoints + (request mirror, dual-consent verify, exchange-ready notify, relay post/pull/ack). + +## 10. How the two consumers specialize + +| | **IBD** | **Genealogy PII** | +| --- | --- | --- | +| Intent source | `match_suggestion` (shared haplogroup/pop overlap) | shared **project co-membership** (admin team) | +| `purpose` | `IBD_AUTOSOMAL`/`IBD_Y`/`IBD_MT` | `GENEALOGY_PII` | +| Payload | variant positions / segment boundaries | `SUBJECT_BUNDLE` / `PII_ASSERTION` | +| Post-decrypt | both compute IBD, hash, **sign + attest** → AppView indexes match | recipient **folds PII into local store**; ack only (no server index) | +| Server record | `ibd_discovery_index` (match summary, PII-free) | none — exchange is private; only `current_view` of **non-PII** assertions (§8.2) | + +Same channel; different intent trigger and post-decrypt handler. + +## 11. Threat model & residual metadata + +- **Honest-but-curious AppView:** sees the social graph (who exchanged, when, size) — + identical to what consent records already reveal — and opaque ciphertext. Cannot + read content, cannot MITM (no usable key; static keys are DID-signed). Acceptable + given it already brokers matches. +- **Mitigations (later):** envelope **padding** to fixed size buckets; **batching**/ + cover traffic to blur timing; short relay TTL + delete-on-ack to minimize the + at-rest window. +- **Replay/reorder:** `seq` in AEAD AAD + session expiry. +- **Malicious peer:** can lie about *content* (e.g. a wrong MDKA) — out of scope for + the channel; handled at the assertion/provenance layer (§8.4) where claims are + attributed and disputable. The channel guarantees *who* and *confidentiality*, not + *truth*. +- **Compromised Edge:** plaintext at rest is encrypted; key material in OS keychain + (as the OAuth tokens already are, `navigator-sync`). + +## 12. Open questions / decisions + +1. ~~Transport~~ **DECIDED: blind-relay-primary** (§6); direct P2P is a later + large-payload optimization. +2. ~~Relay host~~ **DECIDED: AppView-hosted blind relay** (ciphertext + routing + metadata only, delete-on-ack/TTL). +3. ~~Generalize now vs. IBD-first~~ **DECIDED: introduce `exchange.*` now**; IBD's + eventual impl rides it, `ibd.match_*` folds in. +4. **Static-key rotation/revocation** — lifetime of the published X25519 key; revoke + by superseding the signed record. Define a rotation policy. +5. **Padding/cover-traffic** — in v1 or deferred? (Recommend: fixed-bucket padding in + v1; cover traffic later.) +6. **Group exchange** — a project has *N* co-admins; is exchange pairwise (N²) or is + there a group-key optimization? Pairwise for v1; revisit for large admin teams. + +## 13. Next step + +§12 Q1–Q3 decided (relay-primary, AppView-hosted, generalize-now). **AppView broker +BUILT (2026-06-12):** `exchange.*` schema (mig 0032; `ibd.match_*` folded/dropped) + +`du_db::exchange` (publish/fetch key, request, **dual-consent gate** → session, +pending, blind relay post/pull/ack, TTL expire) + `du-web` `/api/v1/exchange/*` +endpoints, all **signature-authenticated** (`du_atproto::verify_did_key`; `did:key` +direct, `did:plc/web` resolved — no OAuth/cookie) + `du-jobs exchange-expire`. The +broker is PII-free (Invariants 1/2/4/6 hold: verifies signatures, never sees plaintext +or keys, relays opaque ciphertext). Memory `exchange-broker`. + +**Remaining for end-to-end:** the **`du-exchange` crate** (X25519 + AEAD + envelope + +X3DH-lite) in `decodingus-shared` and the **Navigator relay client + session driver** +(DUNavigator repo) — neither is an AppView concern. Proven end-to-end by a +`GENEALOGY_PII` `SUBJECT_BUNDLE` round-trip between two test admins once Navigator's +Edge endpoint lands, then reused for IBD. +``` diff --git a/documents/planning/d2-research-subject-registry.md b/documents/planning/d2-research-subject-registry.md new file mode 100644 index 00000000..d34f147b --- /dev/null +++ b/documents/planning/d2-research-subject-registry.md @@ -0,0 +1,253 @@ +# D2 — ResearchSubject Registry + Cross-Admin Identity Resolution + +**Status:** Design (v0, 2026-06-06). AppView roadmap §5 D2. **Builds on:** D1 +(`d1-encrypted-edge-exchange.md`) for the encrypted channel; **reuses** the IBD +resolver (D3) for genetic same-person signals; **feeds** D4 (assertions) / D5 +(group projects). **Cross-repo:** AppView registry + Navigator local mapping. + +> **Corrects the earlier sketch.** The Navigator FTDNA design (§8.3) proposed AppView +> storing **salted `id_hashes[]`** of kit numbers (`HMAC(project_salt, kit#)`) as the +> deterministic match key. **That does not survive scrutiny** (§4): kit numbers are a +> small, enumerable space, and any salt AppView can see (or extract from a client) lets +> it brute-force every hash back to the kit#. D2 replaces it: **AppView stores no +> identifiers or hashes at all**; exact matching happens **Edge-to-Edge over D1**. + +## 1. Purpose + +Give the collaboration layer a **vendor-neutral, PII-free "person" node** that +co-admins can attach assertions to and resolve across each other's imports — without +AppView ever learning a name, a kit number, or even a hash of one. + +## 2. What a ResearchSubject is (and is not) + +A **ResearchSubject** is a pseudonymous handle for "a person under research in a +project context." At AppView it is **almost empty**: + +``` +research_subject_id : UUID -- random; the ONLY cross-admin handle +custody_did : DID | null -- null = admin-stewarded; set when the member claims it + -- (no names, no kit#, no MDKA, no hashes) +``` + +It **is not**: +- `core.biosample` — that is a *federated, anonymized sample* from `fed.*` ingest. A + ResearchSubject **may** point at one (if the person published anonymized data) but + usually **does not**: the common bootstrap case is an FTDNA member who is **not on + platform**, whose clear-text identity lives only in the importing admin's local + store. ResearchSubject is the sparser, person-level node. +- A PII record. Names/MDKA/kit# never appear here or anywhere server-side. + +## 3. The three-layer identity picture + +``` + LOCAL (each admin's Navigator, clear-text, PII) SHARED (AppView, pseudonymous) + ┌───────────────────────────────────────┐ ┌──────────────────────────┐ + │ biosample.guid (local Subject) │ maps to │ research_subject_id (UUID)│ + │ external_id(source,id) e.g. FTDNA #128753 │◀────────▶│ + project membership │ + │ ftdna_member / mdka (names, ancestors)│ (local │ + custody_did │ + └───────────────────────────────────────┘ table) │ + current_view (non-PII)│ + ▲ exact match via D1 channel ▲ └──────────────────────────┘ + │ (co-admins exchange id lists, consented) ▲ + └──────── genetic match via IBD/D3 ──────────────────┘ + (optional) → core.biosample + if the person federates data +``` + +- **Local (Navigator):** clear-text identity — `biosample.guid`, `external_id` + (kit#), `ftdna_member`, `mdka`. Never leaves the box except as encrypted D1 payload. +- **Shared (AppView):** the pseudonymous `research_subject_id` + project memberships + + custody + the non-PII `current_view`. **No identifiers.** +- **The map between them** (`biosample.guid ↔ research_subject_id`) is held **locally + by each admin**; admins reconcile their maps to a common `research_subject_id` + through the resolution mechanisms in §4. + +## 4. Resolution — how two admins agree on the same ResearchSubject + +Three mechanisms, in precedence order. **None reveals an identifier to AppView.** + +### 4.1 Deterministic exact match — id-list exchange over D1 (v1) + +Within a **shared project**, two co-admins have already consented to collaborate +(D1 dual-consent). Co-admins in FTDNA's GAP see *all* members of a shared project +anyway, so **exchanging their `(source, external_id)` lists over the encrypted D1 +channel is within the consented scope** — no fancy crypto needed: + +1. Admin A and B establish a D1 session (`purpose=GENEALOGY_PII`). +2. They exchange their project's `(source, external_id)` lists (encrypted). +3. Each computes the **intersection locally**; matching kits ⇒ same person. +4. For a match, they agree on a **shared `research_subject_id`** (lexicographically- + lower admin's existing id wins, or mint one) and each records the mapping locally; + one registers the subject + both memberships at AppView (pseudonymous). + +**AppView sees:** two `research_subject_id`s gained a second project membership. +**It never sees** the kit numbers or that "128753" was the link. + +> **Why not AppView-side hashing?** Kit numbers are ~6–7 digit enumerable values; a +> broker that holds `HMAC(salt, kit#)` and can obtain the salt (it ships in the +> client) brute-forces the whole space in milliseconds. Edge-to-Edge id exchange +> keeps the broker blind by construction. (PSI — §4.4 — is the upgrade for the +> *cross-project* case where admins should learn only the intersection.) + +### 4.2 Genetic same-person / close-kin — IBD over D1 (reuses D3) + +When ids differ but the people may be the same (or close kin), run the **IBD/ +haplotype comparison Edge-to-Edge** (D3, same D1 channel) → a *suggested* merge with +a confidence, surfaced to both admins. **Never auto-merged.** This catches duplicates +across vendors (FTDNA kit vs. a direct-WGS sample) where no shared id exists. + +### 4.3 Assertion-mediated — pseudonymous `same_person` (D4) + +An admin publishes a `same_person(research_subject_id_A, research_subject_id_B)` +assertion (pseudonymous ids only, no kit#). The group accepts/rejects; provenance +retained. This is the manual override and the audit trail for 4.1/4.2 outcomes. + +### 4.4 Cross-project linking — member-claim only (NOT silent AppView merge) + +Auto-linking the *same person across projects they did not consent to be linked +across* is **privacy-hostile** (cross-context deanonymization) and is **deliberately +not** an AppView background job. Cross-project consolidation happens only when the +**member themselves claims** their subjects (§6) and chooses to merge them. PSI +(§4.1 note) is the future tool that would let two *non-co-admins* discover a shared +member with consent — out of scope for v1. + +## 5. AppView schema (`research.*`, PII-free) + +```sql +CREATE SCHEMA research; + +CREATE TABLE research.research_subject ( + research_subject_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + custody_did TEXT, -- null = admin-stewarded; set on claim + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); -- deliberately no ids/names/hashes + +CREATE TABLE research.subject_membership ( + research_subject_id UUID NOT NULL REFERENCES research.research_subject(research_subject_id) ON DELETE CASCADE, + project_id BIGINT NOT NULL, -- group-project (D5) + steward_did TEXT NOT NULL, -- the admin who holds the local clear-text identity + added_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (research_subject_id, project_id) +); +CREATE INDEX subject_membership_project_idx ON research.subject_membership(project_id); + +CREATE TABLE research.subject_link ( -- audit of 4.1/4.2/4.3 merges (pseudonymous) + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + subject_a UUID NOT NULL, subject_b UUID NOT NULL, -- merged ids (a kept, b retired) + method TEXT NOT NULL, -- ID_EXCHANGE | GENETIC | ASSERTION | CLAIM + asserted_by_did TEXT NOT NULL, confidence DOUBLE PRECISION, created_at TIMESTAMPTZ DEFAULT now() +); + +-- optional, sparse: link to a federated sample IF the person published anonymized data +CREATE TABLE research.subject_biosample ( + research_subject_id UUID REFERENCES research.research_subject(research_subject_id) ON DELETE CASCADE, + sample_guid UUID REFERENCES core.biosample(sample_guid), + PRIMARY KEY (research_subject_id, sample_guid) +); +``` + +`current_view` (the materialized, non-PII per-subject summary — branch assignments, +pseudonymous links, aggregate stats) is produced by the **assertion store (D4)**, not +D2; D2 only owns the registry + memberships + link audit. + +**Invariant:** every column above is pseudonymous. A reviewer can confirm no PII path +exists into `research.*`. + +## 6. Custody & member-claim (high level; proof = open Q) + +- **Stewardship:** on import, each Subject is **admin-stewarded** — `custody_did = + null`, `subject_membership.steward_did` = the importing admin. "Steward" just means + "the admin whose local store holds the clear-text identity"; AppView stores no PII. +- **Claim:** a member onboards (gets a DID/PDS), proves to a steward admin that they + control the kit (mechanism TBD — §10), the admin transfers custody over D1, and the + steward sets `custody_did = member_did` (a pseudonymous pointer flip). The member + then controls their own clear-text data locally and decides federation/visibility + (the `group-project-system.md` sovereign end-state). +- **Proof of kit control is the open question** — AppView can't verify it (no PII). + Likely admin-mediated (the steward, who *does* hold the kit data, vouches) or a + vendor credential the member presents to the admin. Specify in D5/claim follow-up. + +## 7. Navigator-side local model + +Each admin's Navigator gains the local map (clear-text side stays in SQLite): + +```sql +-- Navigator local (migration alongside FTDNA import 0014–0016) +CREATE TABLE subject_shared_id ( + biosample_guid TEXT PRIMARY KEY REFERENCES biosample(guid), + research_subject_id TEXT NOT NULL, -- the AppView pseudonym + project_id INTEGER, -- which shared project this binding is for + custody TEXT NOT NULL DEFAULT 'STEWARDED' -- STEWARDED | CLAIMED_BY_ME | CLAIMED_BY_OTHER +); +``` + +- On **import**, a local Subject has no shared id until the project is joined/shared. +- On **join a shared project**, Navigator mints a `research_subject_id` per Subject (or + links existing via §4.1 id-exchange), registers pseudonymous nodes + memberships at + AppView, and stores the binding locally. +- The `external_id`↔`research_subject_id` correspondence is **only** local + exchanged + over D1; never sent to AppView. + +## 8. End-to-end: two admins, one shared project + +``` +Admin A imports FTDNA project P (local Subjects, kit#s, MDKA — all local) +Admin B is invited as co-admin of P ── D1 dual-consent ──▶ session +A ⇄ B exchange (source,id) lists over D1 (encrypted) [§4.1] + → local intersection: kits 128753, 145002 match +A & B agree shared research_subject_id for each match; mint new for the rest + register pseudonymous subjects + memberships at AppView (no ids) +A ⇄ B exchange SUBJECT_BUNDLE (names, MDKA) over D1 for shared subjects [D1/§7] + → each folds PII into its own local store +later genetic suggestion (D3) flags kit 9001 (A) ≈ sample S (B) → suggested merge + → B accepts via same_person assertion (D4); subject_link audited +member M onboards, proves control of 128753 to steward A → custody flips to M_did [§6] +``` + +AppView's whole view of this: a few pseudonymous `research_subject` rows gained +memberships and links. No names, no kits, no MDKA — ever. + +## 9. Threat model / what AppView learns + +- **Pseudonymous social graph:** which `research_subject_id`s belong to which projects + and which DIDs steward them — the same membership graph it needs for D5 ACLs, and no + worse than GAP's "admins know their project's members." No identifiers. +- **No identifier exposure:** kit#/name/MDKA never reach AppView; exact matching is + Edge-to-Edge (§4.1); the broker can't brute-force what it never stores. +- **Steward de-anonymization risk:** `steward_did` links a person-node to a real admin + DID. That's inherent (someone must hold the data) and bounded — it reveals + *custodianship*, not identity. Mitigation: stewardship is per-project, and claim + (§6) lets the member take over. +- **Malicious admin** can mint bogus subjects or wrong links — bounded by the + assertion/provenance layer (D4): links are attributed and disputable. + +## 10. Open questions / decisions + +1. ~~Deterministic mechanism~~ **DECIDED: id-list exchange over D1** within a shared + project (§4.1, GAP-equivalent); AppView stores no ids/hashes. PSI deferred for the + cross-project case. +2. ~~Cross-project linking policy~~ **DECIDED: member-claim only** (§4.4); no silent + AppView cross-context merge. +3. **Proof of kit control** for member-claim (§6) — admin-vouch vs. vendor credential + vs. a challenge the member completes. Blocks the claim flow, not the registry. +4. **`research.*` vs. fold into group-project schema (D5)** — separate schema + (recommended for the PII-free invariant clarity) vs. co-locate with projects. +5. **Merge mechanics** — keep-lower-id vs. mint-new on merge; how `subject_link` + retirement cascades to memberships/assertions. Define with D4. + +## 11. Next step + +§10 Q1–Q2 decided (id-exchange-over-D1, member-claim-only). **AppView registry BUILT +(2026-06-12):** `research.*` schema (mig 0033 — research_subject [+`retired_into` +tombstone], subject_membership → `social.group_project`, subject_link audit, +subject_biosample) + `du_db::research` (register_in_project, **tombstone** merge_subjects +[repoint + audit, no delete], set_custody, link_biosample, authz readers) + `du-web` +`/api/v1/research/*` endpoints, **signature-authenticated** (`crate::sig::verify_signed`, +shared with D1) **and authorized** from existing data (register → project owner; merge → +steward of both; custody → subject's steward; read → project participant). PII-free +invariant holds. Memory `research-subject-registry`. + +**Remaining (Navigator / D4 / D5):** the Navigator `subject_shared_id` local map + the +D1 id-exchange join-flow that *populates* the registry (DUNavigator); proof-of-kit-control +for member-claim (§10 Q3); `current_view` + `same_person` assertions (D4); full +project-admin ACL beyond owner-gating (D5). §4.2's genetic resolver is the D3 candidate +engine (already built), sharing D1's channel. diff --git a/documents/planning/d3-ibd-matching-impl.md b/documents/planning/d3-ibd-matching-impl.md new file mode 100644 index 00000000..70dbb954 --- /dev/null +++ b/documents/planning/d3-ibd-matching-impl.md @@ -0,0 +1,297 @@ +# D3 — IBD Matching: Rust Implementation Spec + +**Status:** Design (v0.1, 2026-06-12 — added §3.0/3d/3e scope control: ancestry +blocking + matches-of-matches graph expansion + query-vs-panel cold start, so +candidate generation is never N:N and the AppView emits only bounded top-K lists; +flagged the D1-independent first slice). AppView roadmap §5 D3. **Implements** the +original IBD requirements **on top of D1** +(`d1-encrypted-edge-exchange.md`) and the actual Rust schema; **supplies** D2's +genetic resolver (`d2-research-subject-registry.md` §4.2). This doc now carries the +requirements (the standalone planning doc was removed as superseded) and the build +spec. **Cross-repo:** AppView coordinator + +Navigator Edge analysis (`navigator-analysis/src/ibd.rs` already exists). + +## 1. What changed since the planning doc (the refresh) + +The original doc (Scala/Tapir era) invented its own crypto, key exchange, and P2P +channel. Three things change: + +1. **The channel is D1, not bespoke.** IBD is now just a **consumer of the + `exchange.*` substrate** with `purpose ∈ {IBD_AUTOSOMAL, IBD_X, IBD_Y, IBD_MT}`. + Drop the IBD-specific ECDH/relay invention (D1 owns it). The planning doc's + `ibd.match_request`/`match_consent` **fold into `exchange.exchange_request`/ + `exchange_consent`** (D1 §8); IBD-specific tables (`ibd_discovery_index`, + `ibd_pds_attestation`, `match_suggestion`) stay. +2. **Rust, not Scala.** The four service traits become `du-db` query modules + + `du-web` axum handlers + a `du-jobs` discovery job. Concrete SQL below. +3. **AppView mines candidates from `fed.*` anonymized aggregates only.** It never + touches raw genotypes; the actual IBD segment detection is **Edge-to-Edge** over + D1. (Unchanged in spirit from the doc's security section; made concrete here.) + +## 2. Architecture (one line each) + +- **AppView (coordinator, PII/genotype-free):** mine candidate pairs from `fed.*` + → `match_suggestion`; broker request + **dual-consent** (via `exchange.*`/D1); + notify both Edges "match-ready"; verify + index **attestations** (match *summaries* + only); serve match-list API. Never sees a genotype. +- **Edge (Navigator, holds genotypes):** establish the D1 session; exchange encrypted + variant positions / segment boundaries; run the IBD algorithm + (`navigator-analysis::ibd`); cross-verify; sign + attest; classify relationship. + +## 3. Candidate mining (the discovery engine) — `du-jobs` + `du-db::ibd` + +A scheduled `du-jobs` job (`ibd-discovery-recompute`, alongside the existing +`branch-age-recompute`), incremental per sample. Three signals → `ibd.match_suggestion` +rows (existing table: `target_sample_guid`, `suggested_sample_guid`, `suggestion_type`, +`score`, `status`), ranked. All inputs are **anonymized `fed.*` / `ibd.*` aggregates**. + +### 3.0. Scope control — block, don't pair (the load-bearing principle) + +The AppView must **never materialize an N×N pair list**, and must **never hand a +Navigator client "everyone."** Each sample gets a **bounded, ranked, top-K candidate +list**; the Edge then runs IBD (or a query-vs-panel search) only against that K — so +each client is O(K), not O(N). Two cheap mechanisms keep candidate generation +near-linear (this is record-linkage *blocking* + graph expansion, not all-pairs +scoring): + +- **Block by ancestry before scoring.** Bucket samples by a cheap key and only score + *within* buckets: a coarse block on the continental rollup + (`fed.population_breakdown.super_population_summary`) drops cross-continental pairs + outright, and a finer block on the published **PCA coordinates** + (`fed.population_breakdown.pca_coordinates` — grid-bin or LSH the PCA space) restricts + the overlap computation to near neighbours. O(N²) → ~O(N·k). +- **Expand the match graph (matches-of-matches) as the steady state.** Once a sample + has any confirmed edge in `ibd_discovery_index`, its best new candidates are its + **2-hop neighbourhood** — cheap graph traversal, not pairing. Ancestry-blocking is + only the **cold-start seeder**; graph expansion is the primary generator thereafter. + +Cap to **top-K per sample** by combined score (`expires_at` ages out the rest). The +existing `ibd.population_overlap_score` table is therefore populated **only for +within-block pairs, incrementally as samples arrive** — never the full N². Research +backing: §3e. + +### 3a. Haplogroup match (cheapest; gates the rest for Y/MT) +Same terminal Y or mt haplogroup ⇒ candidate patriline/matriline match. +```sql +-- suggestion_type = 'HAPLOGROUP'; region from which haplotype matched +INSERT INTO ibd.match_suggestion (target_sample_guid, suggested_sample_guid, suggestion_type, score, metadata) +SELECT a.sample_guid, b.sample_guid, 'HAPLOGROUP', + depth_score(a.haplogroup, b.haplogroup), -- deeper shared terminal = higher + jsonb_build_object('region', 'Y', 'haplogroup', a.haplogroup) +FROM fed.haplogroup_reconciliation a +JOIN fed.haplogroup_reconciliation b + ON a.dna_type = b.dna_type AND a.haplogroup = b.haplogroup AND a.sample_guid < b.sample_guid +WHERE a.dna_type = 'Y'; -- and again for 'Mt' +``` + +### 3b. Population overlap (autosomal candidate gate) +`Σ min(A[pop], B[pop])` over `ibd.population_breakdown`; **cached** in +`ibd.population_overlap_score`. Never compute the full N²: the `gated_pairs` set is the +**ancestry block** from §3.0 — same `super_population_summary` bucket **and** the same +PCA grid/LSH cell (`fed.population_breakdown.pca_coordinates`), plus the haplogroup +bucket for Y/mt-line requests. Score only those within-block pairs; persist +incrementally as samples join. +```sql +-- overlap from the cached breakdown JSONB; only for pre-gated pairs +WITH pair_overlap AS ( + SELECT s1, s2, SUM(LEAST(p1.frac, p2.frac)) AS score + FROM gated_pairs g + JOIN ibd.population_breakdown_cache c1 ON c1.sample_guid = g.s1, jsonb_each_text(c1.breakdown) p1(pop, frac_t) + JOIN ibd.population_breakdown_cache c2 ON c2.sample_guid = g.s2, jsonb_each_text(c2.breakdown) p2(pop, frac_t) + WHERE p1.pop = p2.pop -- (frac cast to double) + GROUP BY s1, s2) +INSERT INTO ibd.match_suggestion (...) SELECT s1, s2, 'POPULATION_OVERLAP', score, ... +FROM pair_overlap WHERE score >= :min_overlap; -- default 0.6 +``` + +### 3c. Shared-match (the **primary** generator once the graph is seeded) +The "in-common-with" / shared-match principle (the basis of every consumer clustering +tool — the Leeds Method, AutoClusters): samples that match the same third parties share +common ancestors. This is **2-hop graph expansion** over `ibd_discovery_index`, not +all-pairs scoring — cheap and high-yield, so it is the steady-state generator (§3.0). +```sql +-- over confirmed matches in ibd_discovery_index (the match graph) +SELECT a.other AS s1, b.other AS s2, COUNT(*) AS shared +FROM matches_of a JOIN matches_of b ON a.match = b.match AND a.other < b.other +GROUP BY a.other, b.other HAVING COUNT(*) >= :min_shared -- default 2 +-- → suggestion_type = 'SHARED_MATCH', score = shared count +``` +(`matches_of` = a view unnesting `ibd_discovery_index` into (sample, matched-sample).) +**Endogamy caveat:** pedigree collapse / endogamous ancestries smear clusters together +(everyone shares everyone), inflating false candidates. Detect via PCA-cell density / +ancestry tag and **cap + down-weight** `SHARED_MATCH` there (and prefer larger +`min_shared`). + +### 3d. Cold start = query-vs-panel, not panel-vs-panel +A brand-new sample has no graph edges to expand (§3c) — seed it from the **ancestry +block** (§3.0/3b) only. Critically, the Edge then does a **one-vs-many query against +that block as a panel**, not an N:N comparison (RaPID-Query-class search: a single +query against a biobank-scale panel in seconds, error-tolerant). The AppView's job is +to **supply the right panel subset** (the block) — never an all-pairs list. After the +first few confirmed matches land, the sample switches to graph expansion. + +**Ranking & lifecycle:** combine the three scores (weighted), dedupe per pair, **cap to +top-K per target**, expire stale suggestions (`status` ACTIVE/DISMISSED/EXPIRED/ +CONVERTED, `expires_at`). `du-db::ibd::suggestions_for(sample|did, limit)` serves them +ranked. The AppView emits only this bounded list — the no-N:N guarantee (§3.0). + +### 3e. Research backing +- **Don't conflate detection with selection.** Genotype-level all-pairs IBD *detection* + (PBWT family: RaPID, hap-IBD, 23andMe/Ancestry TPBWT, the newer kL-SMEM/PBML work) is + the **Edge's** job — the AppView holds no genotypes. The AppView solves *candidate + selection* (metadata blocking + graph expansion). +- **Query-vs-panel** ([RaPID-Query](https://pmc.ncbi.nlm.nih.gov/articles/PMC10244210/), + [L-PBWT-Query](https://pmc.ncbi.nlm.nih.gov/articles/PMC6612857/)) is the Edge-side + one-vs-many that makes a new joiner O(panel-query), not O(N²) — the basis of §3d. +- **Ancestry/PCA blocking** is standard record-linkage blocking (Christen, *Data + Matching*) — the basis of §3.0/3b. +- **Shared-match clustering** ([Leeds Method](https://www.pricegen.com/dna-shared-matches-and-clustering/), + [AutoClusters](https://www.gedmatch.com/blog/what-are-dna-autoclusters/)) is the basis + of §3c, including the documented [endogamy failure mode](https://dna-explained.com/2025/07/10/how-to-use-ancestrys-new-match-clusters-and-what-they-mean/). + +## 4. Request + dual-consent (on `exchange.*` / D1) + +The planning doc's Phase 2 maps directly onto D1's request→consent gate; IBD adds only +the `purpose` and the discovery reason: + +1. Requester writes a signed `exchange_request` PDS record (`purpose=IBD_*`, + `details = {requesterSampleUri, discoveryReason, regionType}`) → AppView mirrors it + (`exchange.exchange_request`), notifies target. +2. Both parties sign `exchange_consent`; AppView **verifies both signatures** (the + dual-consent gate, D1 Invariant 2) and flips the request to `CONSENTED`. +3. AppView emits **exchange-ready** to both Edges (D1 §5 step 4) with `partnerDid` + + `partnerExchangeKeyUri`. + +`du-db::ibd` (or `du-db::exchange`): `create_request`, `record_consent`, +`mutual_consent(request_uri)`, `pending_for(did)`. `du-web` routes under +`/api/v1/exchange/*` (shared) with IBD-specific discovery context. + +## 5. Edge handoff = a D1 session (the only IBD-specific Edge logic) + +Once exchange-ready, Phase 3 *is* a D1 session — no bespoke channel: + +1. D1 ECDH session (`purpose=IBD_Y` etc.), per D1 §4–5. +2. Exchange `payload_type ∈ {VARIANT_POSITIONS, SEGMENT_BOUNDARIES}` (D1 §7) over the + blind relay. +3. **Both Edges run the IBD algorithm locally** (`navigator-analysis::ibd`): + - **Autosomal/X:** IBD *segment* detection over shared positions → `{totalSharedCm, + numSegments, largestSegmentCm}`. + - **Y:** STR genetic distance + terminal-SNP concordance (patriline TMRCA estimate). + - **MT:** HVR/coding mutation distance (matriline). +4. **Cross-verify:** both hash the canonical summary (SHA-256); matching hashes confirm + a valid, agreed result (D1 §5 step 7a; planning doc Phase 3.3). +5. **Attest:** each signs the summary with its Ed25519 PDS key, writes an attestation + record to its PDS. + +The IBD algorithm itself is **Edge analysis** (Navigator), out of scope for AppView; +`navigator-analysis/src/ibd.rs` is its home. AppView only ever sees the *summary*. + +## 6. Attestation indexing (`du-jobs` Jetstream + `du-db::ibd`) + +AppView's Jetstream consumer already ingests `fed.*`; add the IBD attestation +collection. On both attestations for a request: +- `verify_attestations`: both Ed25519 signatures valid (`du-atproto::signature`) **and** + `matchSummaryHash == partnerSummaryHash` (the two Edges agreed). +- Index `ibd.ibd_discovery_index` (pair, `match_region_type`, `total_shared_cm_approx`, + `num_shared_segments_approx`, `consensus_status`) + two `ibd.ibd_pds_attestation` + rows. Mark the `match_suggestion` `CONVERTED`. +- `update_consensus_status`: INITIAL_REPORT → CONFIRMED on matching dual attestation; + DISPUTE on mismatch. + +**Only summaries are indexed** — never positions, never genotypes (planning doc +Security; D1 Invariant 1). + +## 7. Relationship classification → feeds D2's resolver + +The IBD summary is classified into a relationship band (standard autosomal cM ranges), +Edge-side, and the band drives **both** the match UI **and** D2: + +| Band | ~Shared | Action | +| --- | --- | --- | +| **Same person / identical** | ~full genome (autosomal) · Y+mt identical | **→ D2 §4.2 merge suggestion** (`subject_link method=GENETIC`), never auto | +| Parent/child, full sib | ~2550 / ~2550 cM | close-kin match; surface prominently | +| 2nd–4th cousin … | banded by cM | normal match list | +| Y-only / MT-only | patriline/matriline | lineage match (no autosomal claim) | + +So D3 *is* D2's genetic resolver: a confirmed **same-person** (or near-identical) +classification emits a pseudonymous merge suggestion into the ResearchSubject layer +(`research.subject_link`, method `GENETIC`, with the confidence), which the group +accepts via a `same_person` assertion (D4). Close-kin bands stay in the match list, +not the subject-merge path. + +## 8. API surface (`du-web`, axum + utoipa) + +``` +GET /api/v1/ibd/suggestions?limit= -> ranked match_suggestion[] (auth: owner DID) +POST /api/v1/ibd/suggestions/:id/dismiss +POST /api/v1/exchange/requests -> create (purpose=IBD_*) (shared w/ D1) +GET /api/v1/exchange/requests/pending +POST /api/v1/exchange/consent +POST /api/v1/ibd/attestation -> Edge submits signed summary (planning doc §4.4) +GET /api/v1/ibd/matches?sample= -> confirmed matches (summaries only) +GET /api/v1/ibd/matches/:a/:b +``` +DTOs: `MatchSuggestionDto`, `MatchDto {totalSharedCm, numSegments, largestSegmentCm, +regionType, consensusStatus}`, `AttestationSubmission`. No genotype/position DTOs exist +by construction. + +## 9. Schema deltas + +- `ibd.*` (mig 0007) mostly stands: `ibd_discovery_index`, `ibd_pds_attestation`, + `match_suggestion`, `population_*`, `validation_service` — keep. +- **Generalize** `ibd.match_request`/`ibd.match_consent` → `exchange.exchange_request`/ + `exchange_consent` (D1 §8) via a migration; an `ibd` *view* over `purpose='IBD_*'` + preserves call sites if useful. +- Add the **IBD attestation Jetstream collection** to the consumer's + `INGEST_COLLECTIONS`. +- `match_suggestion` already has `metadata JSONB` for discovery reason — no change. + +## 10. Module placement + +- **AppView:** `du-db::ibd` (mining SQL, suggestions, match indexing, attestation + verify), `du-web::routes::ibd` (+ shared `exchange` routes), `du-jobs` + `ibd-discovery-recompute` job + attestation ingest in the Jetstream consumer. +- **Navigator (Edge):** `navigator-analysis::ibd` (the segment/distance algorithms), + `navigator-sync` (D1 session driver + attestation publish), reusing D1's + `du-exchange`. +- **Shared:** `du-domain` for the relationship-band thresholds + canonical + summary-hash format (so Edge and AppView agree on what's signed). + +## 11. Privacy invariants (restate, they're load-bearing) + +- AppView mines candidates from **anonymized `fed.*` aggregates** only (haplogroups, + population breakdowns, the match graph). No raw genotype ever reaches it. +- Edge-to-Edge exchange carries positions/segments **encrypted via D1**; AppView sees + only signed **summaries** (cM, segment counts). +- Same-person merge suggestions to D2 are **pseudonymous** (`research_subject_id`s), + never carrying an identifier. + +## 12. Open questions / decisions + +1. **IBD algorithm provenance** — does `navigator-analysis::ibd` implement segment + detection from scratch, or wrap a known method? (Affects Edge effort, not AppView.) +2. **Phasing requirement** — autosomal IBD wants phased haplotypes; do we require + phasing on the Edge, or do unphased segment detection (lower precision)? +3. **Shared-match cold start** — 3c needs an existing match graph; bootstrap from + 3a/3b only until the graph fills. Confirm acceptable. +4. **Population-overlap N² control** — the pre-gate (3b) must keep pair counts sane; + define the gate (haplogroup/region bucket) precisely. +5. **`match_request`→`exchange_request` migration timing** — do it with D1's schema or + lazily. Recommend with D1 (one migration). + +## 13. Next step + +D3 closes the **Match track** (D1→D3). **D1-independent first slice — BUILT +(2026-06-12):** candidate generation (§3) needs *no* exchange channel — `du_db::ibd:: +recompute_suggestions` reads `fed.population_breakdown` (PCA coords + components) + +`fed.haplogroup_reconciliation` + the `ibd_discovery_index` graph and writes ranked +`ibd.match_suggestion` rows. Ancestry blocking (§3.0: dominant super-pop × z-scored +PCA cell), shared-haplogroup + 2-hop shared-match (§3c) signals, weighted-combine + +**top-K cap** (§3.4 no-N:N), declarative recompute preserving DISMISSED/CONVERTED. +`du-jobs run-once ibd-discovery-recompute` + daily; `suggestions_for` reader; advisory- +locked. **Engine-only — no public API** (candidate pairs gate on consent). Test +`du-db/tests/ibd_suggestions.rs` (blocking + signals + top-K + idempotency + dismiss). +The exchange/consent reuse of D1 + the attestation-ingest/index path layer on once D1's +channel exists. Then the **Platform +track** continues at **D4 (assertion store, split rails)**, which consumes D3's +same-person output (§7) and D1's PII channel. diff --git a/documents/planning/d4-assertion-store.md b/documents/planning/d4-assertion-store.md new file mode 100644 index 00000000..645fc780 --- /dev/null +++ b/documents/planning/d4-assertion-store.md @@ -0,0 +1,209 @@ +# D4 — Assertion Store (Split Rails) + +**Status:** Design (v0, 2026-06-06). AppView roadmap §5 D4 — the **collaboration +primitive**. **Uses** D1 (PII channel), D2 (`research_subject` registry), **consumes** +D3 (same-person → assertion); **feeds** D5 (group-project ACL/UI). **Cross-repo:** +AppView non-PII store + Navigator local store (all PII). + +## 1. Purpose + +Co-admin research is modeled as **attributed, scoped, append-only assertions** over a +`research_subject_id`, not as direct mutation of shared rows. One shape carries +branch assignments, same-person links, haplogroup labels, MDKA, and notes — and the +**PII-ness of each assertion decides whether it can ever touch a server**. + +## 2. The assertion shape + +``` +Assertion { + id, + subject: research_subject_id, -- pseudonymous (D2) + predicate: SAME_PERSON_AS | BELONGS_TO_BRANCH | HAPLOGROUP_IS | MDKA_IS | NOTE | ..., + value: , + author_did, -- attribution + scope: PUBLIC | PROJECT() | LOCAL, -- visibility/consent boundary + evidence: optional (STR distance, SNP, IBD summary ref, doc citation), + created_at, + supersedes: Assertion.id | null, -- append-only edit chain + retracted_at: ts | null, +} +``` + +Append-only + `supersedes`/`retracted_at` gives **conflict-with-provenance**: two +admins disagree → two live assertions, both attributed; nothing is silently +overwritten (§6). + +## 3. Predicate catalog — PII class drives the rail + +| Predicate | `value` | PII? | Rail (§4) | +| --- | --- | --- | --- | +| `SAME_PERSON_AS` | `{other_subject_id, confidence, method}` | no (pseudonymous ids) | non-PII | +| `BELONGS_TO_BRANCH` | `{clade_path / haplogroup_node}` | no | non-PII | +| `HAPLOGROUP_IS` | `{dna_type, haplogroup, status}` | no (a classification, not an identifier) | non-PII | +| `MDKA_IS` | `{lineage, ancestor_name, dates, place, lat/long}` | **YES** (names/places) | **PII → P2P only** | +| `IDENTITY` | `{member_name, external_ids[]}` | **YES** | **PII → P2P only** | +| `NOTE` | `{text}` | **maybe** (free text) | **PII rail by default**; non-PII only if author marks "no PII" | + +**Rule:** predicate PII-class is the *default*; free-text (`NOTE`) defaults to the PII +rail unless the author explicitly clears it. A value-level scrubber can flag obvious +PII (emails/names) and force the PII rail regardless (mirrors the FTDNA `Note`-column +lesson — free text can't be auto-cleaned, so it's PII until proven otherwise). + +## 4. The three rails (PII-ness × scope) + +``` + │ scope=PUBLIC │ scope=PROJECT(id) │ scope=LOCAL + ───────────────┼─────────────────────────┼────────────────────────────┼────────────── + non-PII │ R1: PDS public record │ R2: AppView project store │ local only + │ → du-jobs ingest │ (current_view, D5 ACL) │ + ───────────────┼─────────────────────────┼────────────────────────────┼────────────── + PII │ ✗ FORBIDDEN │ R3: D1 encrypted P2P only │ local only + │ (consent can't make │ (folded LOCALLY, never │ + │ PII public here) │ on AppView) │ +``` + +- **R1 — PDS public record (non-PII, public):** e.g. `HAPLOGROUP_IS` when the member + consents to public. A signed `com.decodingus.research.assertion` record in the + author's PDS → ingested by du-jobs into the AppView store, same path as `fed.*`. +- **R2 — AppView project store (non-PII, project-scoped):** e.g. `BELONGS_TO_BRANCH` + within a project. Held in `research.assertion` (it's **not PII**), served only to the + project's admin team (D5 ACL). This is consistent with "no PII in AppView" — these + rows carry **no identifiers**. +- **R3 — D1 P2P (PII):** `MDKA_IS`, `IDENTITY`, PII `NOTE`. Travels as a D1 + `PII_ASSERTION` payload (D1 §7), folded into each recipient admin's **local** store. + **Never** a PDS record, **never** an AppView row. + +**PII can never be public (R1 cell is ✗):** even with member consent, MDKA/names don't +go to a world-readable record — consent raises visibility to the project circle (R3), +not the world. (If a member truly wants their own ancestor public, that's their PDS +choice post-claim, outside this layer.) + +## 5. Consent-flag enforcement (roadmap Q4) + +The FTDNA roster's `publicly_shares` (per member, on `ftdna_member`) and `access_granted` +set the **maximum scope** an admin's Navigator may assign to assertions about that +subject: +- `publicly_shares = NO` → assertions about that subject are capped at `PROJECT` + (R2/R3); the client refuses to emit a `PUBLIC` (R1) assertion. +- Default everything to `PROJECT` scope; `PUBLIC` requires an explicit, consent-backed + opt-in. +Enforced **Navigator-side at emit time** (the producer), and re-checked at the AppView +ingest boundary for R1 (reject public assertions about a subject flagged non-public — +though AppView only knows the pseudonym, so this is primarily a client-side guarantee). + +## 6. current_view — fold with conflict-and-provenance + +AppView materializes a **per-(subject, predicate) `current_view`** from the **live** +(non-retracted, non-superseded) **non-PII** assertions (R1+R2). PII (R3) is folded the +same way but **locally in each Navigator**, never centrally. + +- **Single-valued predicates** (`HAPLOGROUP_IS`, `BELONGS_TO_BRANCH`): if one live + assertion → settled; if ≥2 disagree → **`DISPUTED`**, surfacing all claims with + `author_did` + `created_at` + `evidence`. The group resolves by an admin issuing a + superseding assertion (or a `RESOLVES` meta-assertion) — never auto-collapsed. +- **Set-valued** (`NOTE`, multiple `SAME_PERSON_AS`): all live members shown. +- **`SAME_PERSON_AS`** additionally **drives a D2 merge**: an accepted same-person + assertion writes `research.subject_link` (method `ASSERTION`) and merges the two + subjects' views (D2 §5). D3's genetic same-person (§7) arrives as a pre-filled + `SAME_PERSON_AS` with `method=GENETIC` + IBD evidence, awaiting group accept. + +Materialization runs on ingest (R1/R2) like the existing `fed.*` reporting fold; +`du-db::research::refold(subject_id)` after each new assertion. + +## 7. Branch/clade assertions vs. the curated tree (roadmap Q3) + +`BELONGS_TO_BRANCH` assertions are a **project's** view of where its subjects sit — they +are **not catalog truth**. They are surfaced **against** the curated AppView haplotree +(`tree.*`), never merged into it. A project's clade tree = the fold of its +`BELONGS_TO_BRANCH` assertions, rendered alongside (and reconcilable with) the +authoritative tree. Promotion of a project finding into the catalog goes through the +existing **curator change-set** path (`tree.change_set`), not silently. + +## 8. Retraction & supersede + +- **Supersede:** an edit is a new assertion with `supersedes = old.id`; the chain head + is "live." Preserves full history + attribution. +- **Retract:** `retracted_at` set; drops out of `current_view` but stays for audit. +- **PII (R3):** retraction is a P2P `PII_ASSERTION` with a `retract` op; recipients drop + it from their **local** fold. (No central enforcement possible — by design; the + recipient already had the plaintext, exactly as in any E2E system.) + +## 9. Schema + +**AppView (`research.*`, non-PII only — R1/R2):** +```sql +CREATE TABLE research.assertion ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + subject_id UUID NOT NULL REFERENCES research.research_subject(research_subject_id) ON DELETE CASCADE, + predicate TEXT NOT NULL, -- SAME_PERSON_AS | BELONGS_TO_BRANCH | HAPLOGROUP_IS | NOTE(non-PII) + value JSONB NOT NULL, + author_did TEXT NOT NULL, + scope TEXT NOT NULL, -- PUBLIC | PROJECT: + evidence JSONB, + record_uri TEXT, -- at:// of the PDS record (R1) if any + supersedes_id BIGINT REFERENCES research.assertion(id), + retracted_at TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX assertion_subject_pred_idx ON research.assertion (subject_id, predicate) WHERE retracted_at IS NULL; + +CREATE TABLE research.subject_current_view ( -- materialized fold (non-PII) + subject_id UUID NOT NULL, predicate TEXT NOT NULL, + state TEXT NOT NULL, -- SETTLED | DISPUTED + view JSONB NOT NULL, -- live claims + authors + evidence + refolded_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (subject_id, predicate) +); +``` +**No PII column exists** in `research.*` — a reviewer can confirm `MDKA_IS`/`IDENTITY` +have no server table. + +**Navigator local (all assertions incl. PII):** +```sql +CREATE TABLE assertion_local ( + id INTEGER PRIMARY KEY, subject_guid TEXT, research_subject_id TEXT, + predicate TEXT, value TEXT, -- JSON; PII lives ONLY here + D1 payloads + author_did TEXT, scope TEXT, evidence TEXT, + supersedes INTEGER, retracted_at TEXT, created_at TEXT NOT NULL +); +``` + +**Lexicon:** `com.decodingus.research.assertion` (R1 public records only). PII +assertions have **no lexicon** — they are D1 `PII_ASSERTION` payloads (D1 §7), by +construction never recordable. + +## 10. Module placement + +- **AppView:** `du-db::research` (assertion CRUD, `refold`, current_view, subject_link + on same-person), `du-web::routes::research` (project-scoped assertion API + ACL via + D5), du-jobs Jetstream ingest of the `research.assertion` collection (R1). +- **Navigator:** `assertion_local` store + the local fold + the D1 driver to emit/ingest + `PII_ASSERTION` payloads (R3) and publish R1 records (via `PdsClient`). +- **Shared `du-domain`:** the `Assertion` shape, predicate catalog + PII-class table, + and the fold rules (so Edge and AppView fold identically). + +## 11. Open questions / decisions + +1. **PII classifier strictness** — predicate-default + value scrubber (recommended). + Confirm `NOTE` defaults to PII (safer) vs. defaults to non-PII with an opt-in PII + flag. Recommend **PII-by-default for free text**. +2. **current_view storage** — materialized table (recommended, mirrors `fed.*` + reporting) vs. compute-on-read. Materialize. +3. **Dispute resolution authority** — any admin supersedes vs. owner/role-gated (D5). + Likely role-gated; finalize with D5. +4. **R1 ingest consent re-check** — AppView can only see the pseudonym, so public-scope + enforcement is primarily client-side; accept that, or add a per-subject "public-ok" + pseudonymous flag the member sets on claim? Recommend client-side + claim-time flag. +5. **Cross-project assertion leakage** — a subject in two projects: are PROJECT-scoped + assertions isolated per project, or visible to any project the subject is in? + Recommend **per-project isolation** (scope = the specific project). + +## 12. Next step + +D4 + D2 + D1 are the full **private collaboration stack**: registry (D2) + channel +(D1) + the attributed-claim primitive (D4), with D3 feeding genetic same-person. The +buildable slice: `research.assertion` + `refold`/current_view + the Navigator +`assertion_local` store + D1 `PII_ASSERTION` round-trip + R1 public-record ingest — +provable by a `BELONGS_TO_BRANCH` (R2) and an `MDKA_IS` (R3) between two test admins. +Then **D5 (group-project reconciliation)** adds the admin-team ACL, roles, and UI that +gate all of R2/R3 and resolve disputes. diff --git a/documents/planning/d5-group-project-reconciliation.md b/documents/planning/d5-group-project-reconciliation.md new file mode 100644 index 00000000..d693b249 --- /dev/null +++ b/documents/planning/d5-group-project-reconciliation.md @@ -0,0 +1,225 @@ +# D5 — Group-Project Reconciliation + Admin-Team ACL + +**Status:** Design (v0, 2026-06-06). AppView roadmap §5 D5 — closes the **Platform +track**. **Reconciles** `proposals/group-project-system.md` (the member-sovereign +proposal) with **D1–D4**, and supplies the **admin-team ACL** that gates D4's R2/R3, +D1's PII introductions, and D4's dispute-resolution authority. + +## 1. Purpose + +A *project* is the unit of collaboration and the **consent/scope boundary** every +prior doc references (`scope=project:`). D5 defines what a project is, **who is in +its trust circle**, the **roles/permissions** that gate the stack, and how the +existing member-sovereign proposal and the FTDNA admin-stewarded bootstrap are **one +lifecycle, not two systems**. + +## 2. The reconciliation — two modes on one lifecycle + +`group-project-system.md` assumes members are **on-platform, own a PDS, and +self-manage visibility** (sovereign). The FTDNA bootstrap (D2/D4) assumes the studied +people are **not on platform** — admins import and steward them. These are the **ends +of one lifecycle**: + +| | **Stewarded mode** (FTDNA bootstrap) | **Sovereign mode** (the proposal's target) | +| --- | --- | --- | +| Studied person | pseudonymous `research_subject`, **no DID** | a member **DID**, self-present | +| PII custody | the steward admin (local + P2P) | the member's own PDS | +| Visibility control | admin team, **capped by the consent flag** | the member, **per-field opt-in** | +| Governance | admin team (D5 roles) | member self-sovereignty + admin governance | + +A subject moves stewarded → sovereign by **member-claim** (D2 §6). A single project can +hold **both** kinds of subject at once; D5 handles the union. + +## 3. Two memberships — the disentanglement the proposal needs + +The proposal conflates "member" (the studied person) with "participant" (a DID in the +project). D1–D4 require these be **separate**: + +- **Collaborator team** = the **DIDs** who run/contribute to the project, each with a + **role**. This is the **trust circle / ACL / consent boundary**. D1 brokers PII + exchange *between these DIDs*; D4 R2 is served *to these DIDs*; disputes are resolved + *by these DIDs* (per role). → AppView `project_member` (D5). +- **Subject membership** = which **`research_subject`s** (pseudonymous studied people) + belong to the project. → D2 `research.subject_membership` (already exists). + +In stewarded mode these are disjoint (admins ≠ subjects). In sovereign mode a claimed +member is **both** a collaborator DID *and* a subject (their `custody_did` = their +team DID). D5's ACL is over the **collaborator team**, never the subjects. + +## 4. Roles & permissions (adopt the proposal's, bind to the stack) + +Keep the proposal's `projectRole` model — `ADMIN`, `CO_ADMIN`, `MODERATOR`, `CURATOR` ++ granular permissions (`APPROVE_MEMBERS`, `MANAGE_ROLES`, …) — and bind each to what +it gates across D1–D4: + +| Capability | Min role/permission | Gates | +| --- | --- | --- | +| Join the PII exchange circle (D1) | any team member (`ADMIN`/`CO_ADMIN`) | D1 broker checks team membership before relaying | +| Write R2 project assertions (D4) | `CO_ADMIN`+ (or `MANAGE_ASSERTIONS`) | D4 R2 accept | +| Read R2 project current_view | any team member | D4 R2 serve / D5 ACL | +| Resolve a dispute (supersede) | `ADMIN`/`CURATOR` (D4 §11.3) | D4 fold resolution | +| Invite/approve collaborators, set roles | `ADMIN` (`MANAGE_ROLES`) | D5 membership | +| Promote a finding to the catalog tree | `CURATOR` → existing `tree.change_set` | catalog (D4 §7) | + +`MODERATOR` ≈ community management (sovereign-mode member relations); `CURATOR` is the +bridge to the existing tree-curation path. + +## 5. AppView project + ACL schema (PII-free) + +```sql +CREATE TABLE research.project ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + project_uri TEXT UNIQUE, -- at:// of the groupProject PDS record (owner-authored) + name TEXT, -- project names are not member PII + kind TEXT, -- SURNAME | HAPLOGROUP | GEOGRAPHIC | STUDY + join_policy TEXT NOT NULL, -- OPEN | APPROVAL_REQUIRED | INVITE_ONLY | HAPLOGROUP_VERIFIED + succession TEXT NOT NULL DEFAULT 'CO_ADMIN_INHERITS', + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE research.project_member ( -- the COLLABORATOR TEAM (the ACL) + project_id BIGINT NOT NULL REFERENCES research.project(id) ON DELETE CASCADE, + member_did TEXT NOT NULL, + role TEXT NOT NULL, -- ADMIN | CO_ADMIN | MODERATOR | CURATOR + permissions TEXT[] NOT NULL DEFAULT '{}', + appointed_by TEXT, + joined_at TIMESTAMPTZ NOT NULL DEFAULT now(), + left_at TIMESTAMPTZ, -- revocation (§8) + PRIMARY KEY (project_id, member_did) +); +CREATE INDEX project_member_did_idx ON research.project_member (member_did) WHERE left_at IS NULL; +``` + +`research.subject_membership` (D2) holds the studied subjects. **No PII anywhere** — +DIDs, roles, pseudonymous subject ids, and project *names* (which are not member PII). + +## 6. The ACL enforces across the whole stack + +`project_member` (live rows) is the single ACL the broker and assertion store consult: + +- **D1 PII exchange:** before relaying a `GENEALOGY_PII` envelope between A and B, the + broker checks **both are live `project_member`s** of the named project (and the + request `scope` matches). Non-members can't be in the circle. +- **D4 R2 (project assertions):** read = any live team member; write/dispute = role per + §4. AppView enforces on the `research.assertion` API. +- **D4 R3 visibility cap:** an admin may only emit PII (R3) to **fellow team members**; + the consent flag (`publicly_shares`) still caps *public* scope (D4 §5). +- **Revocation:** `left_at` set → immediately drops from the ACL (proposal's "Revocable + Participation"). Caveat §8. + +## 7. The proposal's aggregate records = R1/R2 (don't duplicate) + +The proposal's `projectTreeView`, `projectModal`, `strComparison` records are +**aggregate, non-PII** ("genetic distance not raw STR; member counts not lists") — i.e. +they are exactly the **R1/R2 non-PII layer** of D4 plus existing AppView aggregates: + +- `projectTreeView` / project clade tree = the fold of `BELONGS_TO_BRANCH` assertions + (D4 §7), rendered against `tree.*`. +- `projectModal` (modal haplotype) = the existing `ystr` modal aggregation, scoped to a + project's subjects. +- `strComparison` (genetic distance, not raw values) = a derived R2 aggregate. + +So D5 **maps the proposal's records onto D4's rails** rather than adding a parallel +record set: the proposal's "privacy-preserving research value" *is* the non-PII +assertion/aggregate layer. Raw individual STR/PII = R3 (P2P), never these records. + +## 8. PII durability & succession (a real bootstrap risk) + +In stewarded mode, a subject's PII lives **only in the steward admin's local store**. +If that admin vanishes, the PII is **gone** — and so is the project's research memory. +Mitigations D5 must specify: + +- **Replicate stewarded PII across the consent circle.** When co-admins join, the + steward **P2P-exchanges (D1) the relevant `SUBJECT_BUNDLE`s** so ≥2 team members hold + each subject's PII. This turns the consent circle into a redundancy set (and is the + same exchange that enables collaboration — durability is a free side effect). +- **Succession** (`research.project.succession`, from the proposal): `CO_ADMIN_INHERITS` + (default) hands the `ADMIN` role and steward duty to a co-admin who already holds the + replicated PII; `MEMBER_VOTE` / `PROJECT_CLOSES` per the proposal. On `PROJECT_CLOSES`, + AppView drops the project + ACL; local PII remains with whoever holds it (E2E reality). +- **Revocation caveat:** removing a DID from the ACL stops *future* access, but PII + already exchanged to them cannot be recalled (same E2E truth as D4 §8). The ACL gates + the *channel*, not memory. Surface this honestly in the UI. + +## 9. Membership policy & join flows + +- **Join policy** (`join_policy`): `OPEN` / `APPROVAL_REQUIRED` (admin approves) / + `INVITE_ONLY` / `HAPLOGROUP_VERIFIED` (must match a haplogroup, checked against + non-PII fed/assertion data). Applies to **collaborators** joining the team. +- **Sovereign members joining** (proposal's flow): a member with a DID joins, becomes a + `subject` *and* (optionally) a low-privilege team member; sets their own + per-field visibility (which then *replaces* the admin's consent-flag cap for their + data). This is the claim/sovereign path (D2 §6) at project granularity. +- **Invite** rides D1 (a signed invite → consent → `project_member` insert). + +## 10. Lifecycle (per project, mixed subjects allowed) + +``` +Admin creates project P (stewarded) → research.project + ADMIN project_member +Admin imports FTDNA roster → pseudonymous subjects + subject_membership (D2) +Co-admins invited → project_member rows; steward P2P-replicates PII (§8) +Team collaborates → R2 assertions (AppView) + R3 PII (P2P) gated by ACL (§6) +Member M onboards + claims subject S → S.custody_did=M; M becomes a team member; + M's per-field visibility supersedes the consent cap +Project matures → more subjects sovereign; admins govern, members own +``` + +## 11. Module placement & schema deltas + +- **AppView:** `research.project` + `research.project_member` (new, mig in the + `research.*` set with D2/D4); `du-db::research::project` (ACL checks, role/permission + queries) consumed by the D1 broker endpoints and the D4 assertion API; the + `groupProject` PDS record ingested by du-jobs into `research.project`. +- **Navigator:** local project + team roster mirror; the invite/consent + PII-replicate + flows over D1; per-subject visibility/consent UI. +- **Shared `du-domain`:** the role/permission enum + the capability→permission map (so + Edge and AppView agree on who-can-do-what). +- **Reconcile with the proposal:** D5 supersedes the proposal's *governance/membership* + sections (now AppView-enforced ACL) and *adopts* its roles/policies/succession; the + proposal's aggregate records map onto D4 (§7). The proposal's member-sovereign + visibility model = the post-claim state. + +## 12. Open questions / decisions + +1. **PII replication default** — auto-replicate every subject's PII to all co-admins + (max durability, max exposure within the consented circle) vs. on-demand per + subject. Recommend **auto-replicate within the team** (the circle is already + consented; durability matters) with a per-subject opt-out. +2. **Sovereign-member privilege** — does a claimed member auto-get a team role, or stay + subject-only until invited? Recommend **subject-only + self-visibility control**; + team roles remain invite-gated. +3. **`HAPLOGROUP_VERIFIED` join** — checked against which non-PII signal (fed haplogroup + reconciliation / a `HAPLOGROUP_IS` assertion)? Define. +4. **Project as PDS record owner** — the proposal makes `groupProject` an admin's PDS + record; confirm AppView treats it as the source of truth (ingest) vs. AppView-native. + Recommend **PDS record = source, AppView mirrors** (consistent with the rest). +5. **Cross-project subject** — a subject in two projects: confirm per-project assertion + isolation (D4 §11.5) and that PII replication is per-project (no leakage across). + +## ✅ AppView ACL BUILT (2026-06-12) + +The collaborator-team ACL is built. **Reconciliation:** reused the existing +`social.group_project` (mig 0009) as the project (not a new `research.project`); +`owner_did` is the founding ADMIN. Added `research.project_member` (mig 0034: +project_id → social.group_project, member_did, role, permissions[], appointed_by, +joined_at, left_at). `du_db::research`: `Role` (ADMIN/CO_ADMIN/MODERATOR/CURATOR) + +`Capability` + `Role::allows` (the §4 map), `role_of` (owner⇒ADMIN, else live +project_member), `is_team_member`, `can`, `add_member`/`revoke_member`(left_at)/ +`members_of`. **Wired in:** D2's register is now `ManageSubjects`-gated (ADMIN/CO_ADMIN), +the subjects read is team-member-gated; D1's project-scoped request + consent require +the actor be a live team member (`exchange::request_meta` + `project_scope_id`). Team +endpoints `/api/v1/research/project/{member,member/revoke,members}` (signed, ADMIN-gated). +Memory `group-project-acl`. **Remaining (Navigator/D4):** the groupProject PDS-record +ingest (§12 Q4); invite/join + PII replication/succession over D1 (§8/§9); the D4 +capabilities (WriteAssertions/ResolveDispute/PromoteToCatalog defined, enforced when D4 +lands); shared `du-domain` role enum; granular `permissions[]` overrides. + +## 13. Next step — Platform track complete + +D1–D5 are the full collaboration platform: **channel (D1) · pseudonymous registry (D2) +· genetic resolver (D3) · attributed claims with split rails (D4) · projects + ACL + +lifecycle (D5)** — with the no-PII-in-AppView invariant intact end to end. Buildable +order across the track: D1 `du-exchange` + broker → D2 `research_subject` + id-exchange +→ D4 `research.assertion` + rails → D5 `project`/`project_member` ACL → D3 IBD on the +same channel. The **Catalog track (D6 discovery automation, D7 multi-test-type, D8 +sequencer-lab)** is independent and can proceed in parallel whenever the team chooses. diff --git a/documents/planning/design-doc-triage-report.md b/documents/planning/design-doc-triage-report.md new file mode 100644 index 00000000..764cf1d6 --- /dev/null +++ b/documents/planning/design-doc-triage-report.md @@ -0,0 +1,450 @@ +# Design-Doc ↔ Rust Triage Report + +**Started:** 2026-06-07. **Purpose:** walk the original (pre-rewrite) planning +design docs one by one, compare each against the actual Rust implementation, and +record a triage verdict + recommended action **for later action** (nothing is +changed by this report itself). + +**Scope:** the 11 original docs in `documents/planning/` (excludes the new +`d1`–`d5` + `design-roadmap-rust-rewrite.md`, which are current). The +`documents/proposals/` set (Bucket B) is out of scope here. + +**Verdict legend** +- ✅ **Doc current** — matches the code; no action. +- 📝 **Update doc** — code is the source of truth; doc is stale/drifted. +- 🔧 **Make code compliant** — doc is the intended design; code should change. +- ⚖️ **Split** — some of both (note which parts). +- 🗑️ **Deprecate/supersede** — doc describes a dropped or superseded approach. + +## Execution log (2026-06-07) + +- **Reconciliation headers added** to the kept docs (#2, #3, #5, #6, #7, #8, #11); + #1 already had one. +- **Removed** the three superseded docs (#4 jsonb-consolidation, #9 ibd-matching, + #10 appview-pds-backfeed) per "if superseded, just remove it." Their inbound + references inside `documents/planning/` were rewired (→ D1/D3 for IBD; "realized + in mig 0002/0004" for JSONB; "dropped, member-claim carve-out under D1/D4" for + backfeed). The PDS-backfeed **member-claim carve-out** is preserved in the + design-roadmap gap catalog + D2/D5. +- **Still to do (later passes):** references to the removed docs remain in + `documents/atmosphere/` (00-Overview, 04-Ancestry-Records, 06-IBD-Matching-Records, + Executive-Summary) and `documents/proposals/` (branch-age-estimation, + group-project-system) — clean up when those doc sets are triaged. + +## Status index + +| # | Doc | Verdict | Action owner | +|---|-----|---------|--------------| +| 1 | variant-naming-authority.md | ✅ Doc current | — (optional code nicety) | +| 2 | tree-versioning-system.md | 📝 Update doc | docs | +| 3 | openalex-publication-discovery.md | 📝 Update doc (light) | docs | +| 4 | jsonb-consolidation-analysis.md | 🗑️ REMOVED (realized) | done | +| 5 | multi-test-type-roadmap.md | ⚖️ Split (reconcile built; rest is forward = D7) | docs + forward | +| 6 | sequencer-lab-inference-system.md | ⚖️ Split (schema built incl. consensus; logic forward = D8) | docs + forward | +| 7 | haplogroup-discovery-system-overview.md | ✅ Doc current (minor terminology) | docs (light) | +| 8 | haplogroup-discovery-system.md | ⚖️ Split (curator/pool half built; engine forward = D6; arch evolved) | docs + forward | +| 9 | ibd-matching-system.md | 🗑️ REMOVED (→ D1 + D3) | done | +| 10 | appview-pds-backfeed-system.md | 🗑️ REMOVED (dropped; 1 carve-out) | done | +| 11 | post-mvp-roadmap.md | 📝 Update / reconcile with design-roadmap | docs | + +--- + +## 1. variant-naming-authority.md — ✅ Doc current + +**Compared against:** `du_db::naming` (`crates/du-db/src/naming.rs`), +`du_db::variant`, migration 0016, `/curator/naming`, `/api/v1/variants/export.gff`. + +**Finding:** already reconciled — the doc carries an accurate `Implementation +status (2026-06, Rust)` header and the code matches it: `DU` sequence +(`core.next_du_name()`), lifecycle `UNNAMED→PENDING_REVIEW→NAMED`, mint preserves +the prior name as a `common_names` alias, local same-coordinate dedup +(`dedup_by_coordinates`, GRCh38), curator queue with modes, GFF3 propagation +export. The two "Not yet" items are genuine future work, not violations: +- **Live external (YBrowse/ISOGG/YFull) dedup lookup** — not built; "check + external names" is a manual curator step. +- **Unnamed variants in the public API/domain** — `du-domain::Variant.canonical_name` + is still `String`; the code sidesteps it by filtering `canonical_name IS NOT NULL` + on every public path, so unnamed variants never flow through that type. + +**Latent edge (low severity):** `du_db::variant::get_by_id` selects +`canonical_name` into a non-`Option` without a NULL filter, so +`GET /api/v1/variants/{id}` on an *unnamed* variant id would 500 on row decode. +Unreachable via normal UX (unnamed ids aren't surfaced anywhere public). + +**Recommended action (later, optional):** harden `get_by_id` to tolerate/404 a +NULL `canonical_name`. Full compliance (Option in `du-domain::Variant`) is a +cross-repo change the doc already defers. No doc change needed. + +--- + +## 2. tree-versioning-system.md — 📝 Update doc + +**Compared against:** `tree.change_set` / `tree.tree_change` (mig 0001 enum + +0003), `tree.wip_*` staging tables, `tree.curator_action` (mig 0010), +`du_db::change_set`, `routes/change_sets.rs` + `routes/reviews.rs` + `/manage/*` +(`routes/versioning.rs`). + +**Finding:** the *design* is correct and implemented — the doc recommends **Option +B (overlay change-sets)** and that's exactly what Rust built (`tree.change_set` ++ `tree.tree_change`; **no `tree.tree_version` table** — Option A was not taken; +audit in `tree.curator_action` as described). But every concrete specific is +Scala-era and has drifted: + +- **Code/types:** Scala `case class` / `Future` / `trait TreeVersioningService` / + `*.scala.html` → Rust `du_db::change_set` + axum routes + Askama. +- **Schema:** `SERIAL` / `VARCHAR CHECK(...)` / `TIMESTAMP` / added + `tree_version_id` columns → `BIGINT IDENTITY` / native enum + `tree.change_set_status` / `TIMESTAMPTZ` / the existing **temporal** + (`valid_from`/`valid_until`) model (no version-id columns). +- **API:** documented public `/api/v1/tree/change-sets` + `/api/v1/curator/changes/*` + **do not exist**. Reality: `/curator/change-sets/*` + `/curator/reviews/*` (UI) and + `/manage/change-sets/*` + `/manage/haplogroups/merge[/preview]` (machine). Change-sets + are deliberately **not** in the public `/api/v1`. +- **Permissions:** granular `tree.version.*` → the single **`Curator`** role guard + (Admin/TreeCurator/Curator). +- **Ambiguity handling (substantive evolution):** the doc describes a **file-based + `ambiguity_report_path`** + an in-change-set review. Rust replaced this with the + **`tree.wip_*` staging tables + a dedicated `/curator/reviews` resolution flow** + (REPARENT/MERGE_EXISTING/DEFER), enacted by the change-set apply engine. The doc + doesn't mention this layer at all. + +**Recommended action (later):** **update the doc to match the code.** Add a +`Rust implementation status` reconciliation header (as variant-naming-authority.md +has), correct the schema/API/permissions specifics, and add a section on the +`wip_*` + `/curator/reviews` merge-review layer that superseded the file-report +approach. Keep the Option A/B rationale as historical design context. Cross-link +the refreshed user guide (`../curator-guide-tree-versioning.md`). No code changes +needed — the system is built and working. + +--- + +## 3. openalex-publication-discovery.md — 📝 Update doc (light) + +**Compared against:** `pubs.publication_candidate` + `pubs.publication_search_config` +(mig 0006), `du_db::publication` (`enabled_search_configs`, `upsert_candidate`, +`promote_candidate`, `review_candidate`), `du_jobs::publications` +(`publication-update`, `publication-discovery`), `routes/publications.rs` +(`/curator/publications`), `du_external::openalex`. + +**Finding:** design is sound and **substantially implemented** — scheduled +discovery runs each enabled search config and upserts candidates; candidates +dedupe by `openalex_id`; the curator review queue (`/curator/publications`) does +**accept (promote to a reference) / reject / defer**. So the doc's Phase-1 "simple +curator review UI" (shown `[ ]`) is in fact **done**. Drift to fix: + +- **Stale specifics:** Scala `OpenAlexService.scala` / `PublicationService`, + **Pekko Quartz** cron, `public.users`, `SERIAL`, plural/unprefixed table names, + and the `/api/private/publication-candidates/*` endpoints → Rust + `du_external::openalex` + `du_db::publication` + the tokio scheduler + + `pubs.publication_candidate` (singular) + the `/curator/publications` UI (no + public candidate API). +- **Schedule:** documented weekly cron (Sun 02:00) → Rust runs **daily** + (`Duration::from_secs(86_400)`), config-gated. +- **Phase status drift:** Phase 1 complete incl. the curator UI; **not built:** + relevance scoring (Phase 2 — the `relevance_score` column exists but isn't + computed), smart discovery (Phase 3), biosample-extraction hints (Phase 4), and + the `publication_search_run` history/debug table. +- **Addition not in the doc:** the public **"suggest a paper"** on-ramp + (`/references/submit`: DOI → OpenAlex resolve → candidate queue). + +**Recommended action (later):** light doc refresh — Rust reconciliation header, +fix the schema/endpoint/scheduler specifics, correct the phase checkboxes, add the +`/references/submit` on-ramp, and keep relevance scoring + `search_run` as explicit +forward work. No code changes required. + +--- + +## 4. jsonb-consolidation-analysis.md — 🗑️ Superseded (recommendations realized) + +**Compared against:** migrations 0002 (`core.biosample.original_haplogroups`) and +0004 (`genomics` header + `sequence_file`, `alignment_metadata`). + +**Finding:** this is a **pre-rewrite analysis** recommending 7 child-table → JSONB +consolidations, and the Rust redesign **implemented all of them** (mig 0004's +header explicitly enumerates the same moves): +- `sequence_file_checksum` / `_http_location` / `_atp_location` → `sequence_file` + `checksums` / `http_locations` / `atp_location` JSONB ✓ (child tables gone) +- `alignment_coverage` / `pangenome_alignment_coverage` → `coverage` JSONB on the + metadata tables ✓ — **with the recommended expression index** + (`((coverage->>'meanDepth')::double precision)`) +- `biosample_original_haplogroup` / `citizen_*` → `core.biosample.original_haplogroups` + JSONB ✓ (and the three biosample tables collapsed to one) +- bonus: scattered `at_uri`/`at_cid` → a single `atproto` JSONB ✓ + +Nothing to make compliant — the code already embodies (and slightly exceeds) the +analysis. Only nit: implemented coverage keys are camelCase (`meanDepth`, +`medianDepth`) alongside `percent_coverage_at_*x`, vs the doc's snake_case +proposal — cosmetic, no action. + +**Recommended action (later):** treat as **historical/implemented** — add a short +"realized in the Rust redesign (mig 0002/0004)" note at the top, or archive it. +No code or design action. + +--- + +## 5. multi-test-type-roadmap.md — ⚖️ Split (reconcile the built part; rest is forward = D7) + +**Compared against:** `genomics.test_type_definition` + `genomics.coverage_expectation_profile` +(mig 0004), `core.data_generation_method` / `core.target_type` enums, +`sequence_library.test_type_id` FK, `du-domain` `DataGenerationMethod`/`TargetType`, +`fed.genotype` (mig 0012). + +**Finding — built foundation (~Phase 1):** +- `genomics.test_type_definition` exists — **leaner** than the doc's spec: has + code/display_name/category/vendor/target_type/expected_min_depth/supports_*/ + typical_file_formats/description, but **omits** `expected_target_depth`, + `expected_marker_count`, `version`, `release_date`, `deprecated_at`, + `successor_test_type_id`, `documentation_url`. Coverage thresholds live in a + separate `genomics.coverage_expectation_profile` (not inline columns). +- `core.data_generation_method` (SEQUENCING/GENOTYPING) + `core.target_type` + native enums; `du-domain` mirrors them. ✓ +- `sequence_library.test_type_id` is a **native FK from the start** — the doc's + Phase-1 "[ ] migrate the string column to an FK" is moot (no string column). +- **Seed data NOT loaded** — the table is empty in migrations (the doc shows seed + as `[X]`; in Rust it's outstanding, per the design-roadmap "seed test_type_definition"). + +**Finding — not built (forward, = design-roadmap D7):** everything in Phases 2–6 — +`test_type_target_region`, `genotyping_test_summary` (local; partly shadowed by the +federated `fed.genotype` summary), `test_type_haplogroup_marker_coverage`, +`test_type_marker_intersection`; the `TestTypeService` + `/api/v1/test-types/*` and +`/api/v1/haplogroup-variants/*` APIs; chip-metadata ingest; **test-type-aware +haplogroup confidence**; cross-test-type IBD. Also tightly coupled to the (also +forward) haplogroup-discovery doc. + +**Drift:** Scala throughout (Slick case classes, `Future` service traits, +`models.domain.genomics`), Pekko, removed `/api/private` endpoints — all need +restating in Rust terms when the forward parts are built. + +**Recommended action (later):** **keep the doc as the forward design (D7) but +reconcile the built part** — add a Rust status header: Phase-1 schema is built +(note the leaner `test_type_definition` + separate `coverage_expectation_profile` ++ native `test_type_id` FK), seed data is still TODO, and Phases 2–6 remain +forward; restate their schema/services in Rust terms when picked up. The core +indexing principle (index Y/mt variants + summaries, never raw autosomal) is +correct and already matches the implemented federation posture. No code change +required now beyond (optionally) loading the test-type seed. + +--- + +## 6. sequencer-lab-inference-system.md — ⚖️ Split (schema built incl. consensus; logic forward = D8) + +**Compared against:** `genomics.sequencing_lab`, `genomics.sequencer_instrument`, +`genomics.instrument_observation`, `genomics.instrument_association_proposal` +(mig 0004), `fed.sequencerun.instrument_id` (mig 0012). No lab-lookup/consensus +code found (only `coverage.rs`/`fed::core` touch `instrument_*` for benchmarks). + +**Finding — schema built (more than the roadmap's ~20% implies):** all four tables +exist, **including the two the doc marks as NEW/`[ ]`** — `instrument_observation` +and `instrument_association_proposal`. So the consensus data model is in place. +Schema deltas to reconcile: +- Tables are in `genomics`, native `BIGINT IDENTITY` (doc: `public.*`, `SERIAL`); + `sequencing_lab` is leaner (no created/updated_at). +- **`sequencer_instrument` has no `lab_id` FK** and a different column set + (`model_name`, `manufacturer`, `year_introduced`, `estimated_max_throughput`) — + i.e. instrument↔lab is intended to resolve via observation→proposal→accept, not a + static FK. The doc's proposed `sequencer_instrument` add-ons + (`source`/`observation_count`/`confidence_score`/`last_observed_at`) are **not** + present (that state lives in the proposal table instead). + +**Finding — zero logic (forward = design-roadmap D8):** none of it is wired — +no `/api/v1/sequencer/lab` lookup, no `/api/v1/labs/{instrument-id}`, no Firehose +`instrumentObservation` ingestion, no consensus/confidence engine, no curator +instrument-proposal review UI. The "existing API endpoints / domain models" the doc +lists are **Scala-era and do not exist** in Rust. The consensus source in Rust is +`fed.sequencerun.instrument_id` (crowdsourced @RG id); the +`com.decodingus.atmosphere.instrumentObservation` lexicon + its `fed.*` mirror are +**not yet defined** (design-roadmap notes the record shape is TBD). + +**Drift:** Scala/Slick/Tapir/Pekko throughout; "Current State" lists endpoints that +were never ported. + +**Recommended action (later):** keep as the forward design (D8) but reconcile — +Rust status header: the **full schema (incl. consensus + proposal tables) is in +place; logic is unbuilt**; fix the schema specifics (genomics schema, no `lab_id` +FK, actual instrument columns, observation/proposal tables already present); note +the consensus source is `fed.sequencerun.instrument_id` and the observation +lexicon/mirror is still to define; restate services/APIs in Rust terms (axum + +utoipa; Firehose = the existing Jetstream consumer). Drop the "existing endpoints" +section. No code change required now. + +--- + +## 7. haplogroup-discovery-system-overview.md — ✅ Doc current (minor terminology) + +**Compared against:** `tree.proposed_branch*` / `tree.biosample_private_variant` / +`tree.discovery_config` (schema), `du_db::proposal`, `/curator/proposals` +(review/promote) + `/manage/curation/proposals` intake. + +**Finding:** this is a **stack-agnostic conceptual overview** (discover → correlate +→ propose → review → evolve; evidence sources; thresholds; curator workflow; +privacy/visibility; federated model). It still describes the intended system +accurately, and the curator-review half it describes **is built**. No +implementation specifics to drift. Two minor nits: +- **Terminology:** "Firehose / real-time stream" → the implemented inbound path is + the **Jetstream summary mirror** (`fed.*`); the credential-holding inbound + firehose was dropped. The *concept* (Edge → PDS → stream → AppView discovery) + still holds. +- **Auto-promotion** ("10+ samples → can be automatically accepted") is aspirational + — curator accept is the gate today; the automated ingest→consensus engine is + forward (= design-roadmap D6). + +**Recommended action (later):** optional one-line note that ingestion is via the +Jetstream summary mirror (not a credential-holding firehose) and that +auto-promotion is a future option. Otherwise leave as-is. + +--- + +## 8. haplogroup-discovery-system.md — ⚖️ Split (curator/pool half built; engine forward = D6; architecture evolved) + +**Compared against:** `du_db::proposal` (`crates/du-db/src/proposal.rs`), +`/curator/proposals` (review/promote) + `/manage/curation/proposals` intake, +`tree.proposed_branch` / `_evidence` / `_variant`, `tree.biosample_private_variant`, +`tree.discovery_config`, `tree.wip_*`. *(Triaged from the doc's +Prerequisites/Architecture + the overview + the cross-references in +multi-test-type-roadmap.md + the confirmed schema/code, rather than a full read of +all 71 KB.)* + +**Finding — done:** +- The doc's **prerequisite** (`variant-schema-simplification`: universal JSONB + coordinates, parallel-mutation handling, JSONB aliases) is **implemented** in the + Rust variant model. +- Schema is present (proposed_branch + evidence + variant, biosample_private_variant, + discovery_config, wip_*). +- The **curator review/promote + proposal pooling** half is **built**: + `proposal.rs` pools submissions by (proposed_name, parent) across submitters, + tracking `evidence_count`/`submitter_count`/`confidence`/`status`; curators work + `/curator/proposals` (review/promote); machine intake at `/manage/curation/proposals`. + +**Finding — forward + architecture evolved (= design-roadmap D6):** the doc +specifies an **AppView-side pipeline** — *Private Variant Extraction* that parses +`HaplogroupResult.mismatchingSnps` from ingested biosamples (Citizen Firehose + +External upload) → groups → ProposedBranch → consensus detection. Rust **inverts the +ingestion model**: **Navigator (Edge) extracts the private variants and submits a +proposal; the AppView pools by submitter** — there is no AppView-side raw-extraction +from `fed.biosample`. This is consistent with the no-PII / edge-compute direction. +The automated consensus/Jaccard engine + auto-reassignment remain unbuilt (D6 — the +`du-domain` algorithm spec is the open design piece). + +**Drift:** Scala/Slick/Tapir, `Firehose`, `/api/v1/discovery/proposals` + +`/api/v1/curator/proposals/{id}/accept` → Rust `/curator/proposals/*` + +`/manage/curation/proposals`. + +**Recommended action (later):** keep as the forward design (D6) but reconcile +substantially — Rust status header (prereqs + schema + curator/pooling half done); +**document the ingestion-model change** (Edge-submits-proposals, not AppView-side +extraction from `mismatchingSnps`); restate endpoints/services in Rust terms; mark +the consensus/Jaccard engine + auto-reassignment as the remaining D6 work. No code +change required now. + +--- + +## 9. ibd-matching-system.md — 🗑️ Superseded by D1 + D3 + +**Compared against:** `ibd.*` schema (mig 0007: `match_request`, `match_consent`, +`match_suggestion`, `ibd_discovery_index`, `ibd_pds_attestation`, `population_*`), +the new `d1-encrypted-edge-exchange.md` + `d3-ibd-matching-impl.md` (read in full +earlier). **No `du-db::ibd` code exists** (schema-only). + +**Finding:** this is the **original (Scala/Tapir-era) IBD requirements** doc — it +invents its own crypto, key exchange, and P2P channel, and references the Java Edge +App. It is **explicitly superseded** by the two new docs we just added: +- **D1** generalizes its crypto/consent/channel into the shared `exchange.*` + substrate (D1's own note: it "supersedes/generalizes the crypto + Edge-coordination + sections of ibd-matching-system.md"; it also fixes the Ed25519-can't-ECDH gap). +- **D3** is the **Rust build spec** that "implements the requirements in + ibd-matching-system.md on top of D1," folding `ibd.match_request`/`match_consent` + into `exchange.*` and keeping the IBD-specific tables. + +So the doc's value is now purely as **historical requirements**; the authoritative +design is D1 + D3. IBD itself is **unbuilt** (schema present, logic forward — D3 +closes the Match track). + +**Recommended action (later):** add a header marking it **superseded** — point +crypto/channel/key-exchange → `d1-encrypted-edge-exchange.md`, the Rust impl → +`d3-ibd-matching-impl.md`; keep the body as historical requirements (or archive). +No code change (build per D3 when the Match track is scheduled). + +--- + +## 10. appview-pds-backfeed-system.md — 🗑️ Superseded/dropped (one open carve-out) + +**Compared against:** `rust/README.md` + STATUS (federation is **outbound-only**), +the `[[atproto-federation-direction]]` decision (drop private firehose; use +permissions/OAuth + notify-fetch), the design-roadmap Q2. No backfeed code exists +(correctly absent). + +**Finding:** the doc designs a **bidirectional AppView→PDS backfeed** that pushes +refined/derived data (haplogroup refinement, branch discovery, ancestral STR/TMRCA, +matches, lab inference) back into user PDSes. The Rust rewrite **dropped this +direction**: federation is an **outbound Jetstream summary mirror** (Navigator +publishes → `fed.*`) plus a notify-fetch posture; the inbound firehose + PDS-fleet ++ backfeed model is out of scope. So the doc describes a **non-chosen +architecture**. + +**Open carve-out (don't fully delete):** the design-roadmap (Q2) flags that +**member-claim** custody (D2 §6 / D5) may need a *limited* AppView→PDS write — to +be decided under D1/D4. So the general backfeed is dropped, but the narrow +member-claim write is an open question. + +**Recommended action (later):** mark **superseded/dropped** with a header (Rust = +outbound-only mirror + notify-fetch; no general backfeed), and record the single +open carve-out (limited member-claim write, decide under D1/D4). Keep as historical +/decision-input or archive. No code (correctly nothing built). + +--- + +## 11. post-mvp-roadmap.md — 📝 Update / reconcile with the design-roadmap + +**Compared against:** current build state (per docs #1–#10 above) and the new +`design-roadmap-rust-rewrite.md` (the current authoritative index). + +**Finding:** this is the **old central roadmap** indexing the six subsystem docs +with a dependency graph + phased plan (A–F). It is **largely superseded** by +`design-roadmap-rust-rewrite.md`, which the new doc itself only calls a "pairs +with… feature sequencing" companion — but in practice the new roadmap is the +accurate one (it has the gap catalog, the two-track D1–D8 sequencing, and the +no-PII reconciliation). Specific drift: +- **Stale statuses:** Phase A (tree schema / `test_type_definition` / + `sequence_file` JSONB) is correctly `[X]`, and OpenAlex candidate queue `[X]` — + but it misses that the **curator proposal/review half**, **tree versioning**, and + **multi-test + sequencer-lab schema** are now built; and its "In Progress / + Planned" labels predate that. +- **Omits the entire collaboration/IBD-via-D1 platform** (D1–D5) — it still lists + IBD as the standalone `ibd-matching-system.md` (now superseded by D1+D3). +- **Scala terms** throughout (Firehose, `PrivateVariantExtractionService`, + `publication_candidates` plural, etc.). + +**Recommended action (later):** **reconcile with `design-roadmap-rust-rewrite.md`** +— either demote post-mvp-roadmap to historical with a header pointing at the new +roadmap as authoritative, or refresh its status table + terminology and graft in +the D1–D5 platform track. Keep its still-useful bits (per-phase detail, +JSONB-distributed-across-phases plan, success metrics). No code action. + +--- + +## Summary of verdicts + +| Verdict | Docs | +|---------|------| +| ✅ Doc current | #1 variant-naming-authority, #7 discovery-overview (minor terminology) | +| 📝 Update doc | #2 tree-versioning, #3 openalex (light), #11 post-mvp-roadmap | +| ⚖️ Split (reconcile built + forward design) | #5 multi-test-type (D7), #6 sequencer-lab (D8), #8 discovery (D6) | +| 🗑️ Superseded / dropped | #4 jsonb-consolidation (realized), #9 ibd (→ D1+D3), #10 backfeed (dropped; 1 carve-out) | + +**Cross-cutting themes** +- **No code is wrong.** Every verdict is "update the doc," never "make the code + comply" — the Rust build is the source of truth; the pre-rewrite docs carry + Scala/Slick/Tapir/Pekko/Firehose specifics, stale schemas/endpoints, and + out-of-date status. +- **Recurring fixes:** add a "Rust implementation status" reconciliation header + (as variant-naming-authority.md already has); swap Scala→Rust specifics; correct + `/api/v1/*` + `/curator/*` + `/manage/*` route surfaces; replace granular + `tree.version.*`/`*.permission` with the `Curator` role; "Firehose" → the + outbound **Jetstream** summary mirror. +- **Two architecture evolutions to capture:** (a) discovery ingestion is + **Edge-submits-proposals**, not AppView-side extraction (#8); (b) IBD crypto/ + channel is now the shared **D1 `exchange.*`** substrate (#9). +- **One open product decision:** the limited **member-claim** AppView→PDS write + (#10), to be decided under D1/D4 (design-roadmap Q2). +- **Forward design that's still valid** lives in #5/#6/#8 (= design-roadmap + D6–D8) and should be kept (reconciled), not discarded. diff --git a/documents/planning/design-roadmap-rust-rewrite.md b/documents/planning/design-roadmap-rust-rewrite.md new file mode 100644 index 00000000..7d6bc87f --- /dev/null +++ b/documents/planning/design-roadmap-rust-rewrite.md @@ -0,0 +1,210 @@ +# AppView (decodingus) — Design-Gap Roadmap for the Rust Rewrite + +**Status:** Living index. Drafted 2026-06-06. +**Purpose:** One map of *what design work remains* for the Rust AppView, what +already has a doc, what must be **reconciled** with the new Navigator-side +genealogical-platform direction, and a recommended **order**. This is a +navigational doc — it points at the real design docs (existing and to-write), it +does not restate them. + +**Pairs with:** `rust/STATUS.md` (build status), `planning/post-mvp-roadmap.md` +(feature sequencing). Navigator-side companions live in the **DUNavigator** repo: +`docs/design/ftdna-project-import.md` and `docs/design/academic-ena-import.md`. + +## 1. Where the rewrite stands + +Per `rust/STATUS.md` (2026-06-05): the **spine is done and cutover-verified**. +Built: schema (migrations 0001–0022), `du-db` query layer, public HTML/HTMX + JSON +API, auth + curator tools, haplotree build/merge/versioning, SNP-graft + review, +YBrowse mirror→reconcile (~3M variants), Y-STR signatures/prediction/age, variant +naming authority, ETL (verified on a real prod dump), and **federation reporting** +(Jetstream → `fed.*` mirror + report endpoints). + +Launch-critical path is just **(1) cutover execution** + alias-aware mt resolution, +and **(2) the live cross-host AT Proto OAuth test**. Everything below is the +**post-launch feature mass** — and it's where the design gaps are. + +## 2. The two buckets of remaining design + +**Bucket A — documented subsystems, not yet built in Rust.** Each has a planning +doc; the gap is a *Rust-implementation spec* (exact SQL, state machines, endpoints) +and reconciliation with the new schema. Mostly schema-only today. + +**Bucket B — the collaboration / genealogy-platform layer.** This is what the +Navigator FTDNA work (`ftdna-project-import.md` §8) depends on. It is **partly +covered by older proposals** (`proposals/group-project-system.md`, +`Messaging_and_Feed_System.md`, `Reputation_System_Implementation.md`) — but those **predate** the +ResearchSubject/assertion model *and take the opposite privacy stance* (see §3). +The gap here is **reconciliation + the net-new pieces**, not greenfield. + +## 3. The central reconciliation — RESOLVED (2026-06-06): no PII in AppView + +The apparent tension between the privacy-first `group-project-system.md` and the +Navigator FTDNA design is **decided in favor of the privacy-first stance**: + +> **AppView holds NO PII. It is a pure broker.** It keeps its anonymized/aggregate- +> only posture (the `fed.*` mirror drops donor PII at ingest). Member PII — names, +> MDKA, kit↔identity linkage — is exchanged **admin-to-admin over an encrypted +> Edge-to-Edge (P2P) channel**, the **same mechanism the IBD system uses** for +> genetic comparison (now D1/D3: ECDH X25519 + AES-256-GCM, +> AT-Proto-brokered handshake, P2P/relay transport). AppView coordinates discovery, +> consent, and key exchange, and persists **PII-free** match/assertion *state*. + +This **reinforces** `group-project-system.md` (member-sovereign, refs-not-copies) +and **corrects** the earlier Navigator draft (which had PII landing in an AppView +private tier — now amended in `ftdna-project-import.md` §8 to P2P-only). + +The bootstrap→sovereign **lifecycle still holds**, but no server-side PII copy +exists at any stage: + +``` +[Admin-stewarded bootstrap] [Member-sovereign steady state] + admin imports FTDNA project ──► member onboards, proves kit control, + PII stays LOCAL; shared with CLAIMS their ResearchSubject ──► custody + co-admins via encrypted P2P (DID) moves to them; they decide their + (our FTDNA on-ramp) own visibility (group-project-system.md) +``` + +**Consequence — one shared substrate.** Because both IBD comparison and genealogy- +PII exchange need the same encrypted Edge-to-Edge channel + AppView broker, **design +it once** (§5, D1) and let both tracks ride it. This is the highest-leverage +foundational piece; it underpins Bucket B and the IBD impl alike. + +## 4. Gap catalog + +Legend: ✅ done · ◐ partial · ☐ schema-only · ✎ has design doc · ✶ net-new design needed + +### Bucket A — finish the documented subsystems + +| Subsystem | Code | Schema | Design doc | Remaining design work | +| --- | --- | --- | --- | --- | +| **IBD matching** | ☐ | `ibd` (mig 0007) | → D1 + D3 (orig planning doc removed) | Designed: candidate-pair mining SQL over `fed.*`, dual-consent state machine, Edge↔AppView handoff, match-list endpoints — see `d3-ibd-matching-impl.md` on `d1-encrypted-edge-exchange.md`. **Reused by Bucket B's cross-admin resolver.** | +| **Haplogroup-discovery automation** | ◐ (curator half ✅) | `tree.proposal`/`wip_*`/`discovery_config` | ✎ `planning/haplogroup-discovery-system.md` (71 KB) | The *ingest→consensus engine*: private-variant extraction from `fed.biosample`/`fed.str_profile`, Jaccard/consensus + thresholds, sample de-dup, auto-reassignment on accept. du-domain algorithm spec. | +| **Multi-test-type** | ◐ ~30% | `genomics.test_type_definition` (mig 0004/0014) | ✎ `planning/multi-test-type-roadmap.md` (47 KB) | Marker-coverage + target-region reference tables; **test-type-aware confidence** (Big Y-700 vs chip); seed `test_type_definition`. Feeds discovery confidence. | +| **Sequencer-lab inference** | ◐ ~20% | `genomics` lab/instrument | ✎ `planning/sequencer-lab-inference-system.md` (30 KB) | Public `GET /api/v1/labs/{instrument-id}`; consensus from `fed.instrumentObservation` (record shape not yet defined); curator review + confidence scoring. | +| **OpenAlex pub discovery** | ◐ | `pubs` | ✎ `planning/openalex-publication-discovery.md` | Mostly built; finish discovery/enrichment edges. Low risk. | +| **JSONB consolidation** | ✅ realized | mig 0002/0004 | (removed — done) | Realized in the Rust redesign (7 child tables → JSONB on parents). No action. | +| **PDS backfeed** | ➖ dropped | — | (removed — superseded) | Outbound-only mirror; general backfeed dropped. **Open carve-out:** a *limited* AppView→PDS write for member-claim (§3) — decide under D1/D4. | + +### Bucket B — collaboration / genealogy platform + +| Piece | Code | Schema | Existing proposal | Remaining design work | +| --- | --- | --- | --- | --- | +| **Group projects** | ☐ | `social` placeholder (mig 0009) | ✎ `proposals/group-project-system.md` | **Reconcile** with FTDNA on-ramp (§3); add admin-team membership + roles + ACL + audit; project = the scope boundary for assertions. | +| **ResearchSubject registry** | ☐ | — | ✶ none | **Net-new, PII-free.** Opaque subject node + **salted `id_hashes[]`** (not raw kit#/accession), cross-admin resolution on hashed/genetic signals (reuses IBD backbone, §3), `custody_did` for member-claim. No names/MDKA. | +| **Assertion store** | ☐ | — | ✶ none | **Net-new.** Attributed, scoped assertions, append-only + retract, conflict-with-provenance. **Split by `scope`:** non-PII → PDS records + du-jobs ingest + AppView `current_view`; **PII → encrypted P2P only, never stored in AppView.** | +| **Encrypted P2P exchange + broker** | ☐ | `ibd` (mig 0007) partial | ✶ none (crypto now spec'd in D1) | **Net-new, SHARED with IBD (§3).** The Edge-to-Edge channel (ECDH X25519 + AES-256-GCM) + AppView broker (discovery, consent, key-exchange relay, exchange attestation). Carries IBD comparison **and** genealogy PII. Build once. | +| **Messaging / feed** | ☐ | `social` placeholder | ✎ `proposals/Messaging_and_Feed_System.md` | Reconcile with assertion threads; the collaboration layer reuses messaging for discussion. Refresh to Rust schema. | +| **Reputation** | ☐ | `social` placeholder | ✎ `proposals/Reputation_System_Implementation.md` | Lower priority; depends on social being live. Refresh later. | + +## 5. Recommended sequencing (design order) + +Dependency-driven. Each `D#` is a doc to write (or refresh) before the matching +build work. + +1. **D1 — Encrypted Edge-to-Edge exchange + AppView broker** ✅ **DRAFTED: + `planning/d1-encrypted-edge-exchange.md`** *(net-new, SHARED foundation, gates + both tracks)* — X25519 ECDH (X3DH-lite, forward secrecy) + AES-256-GCM, identity- + bound via a published Ed25519-signed X25519 key (fixes the "Ed25519 can't ECDH" + gap), **blind store-and-forward relay** (recommended) so offline peers work, + generic `exchange.*` broker schema + `ExchangeEnvelope`, new shared `du-exchange` + crate. Lifts/generalizes the original IBD requirements (now folded into D3). Open: transport confirm, + relay host, generalize-now (§12). +2. **D2 — ResearchSubject + identity resolution** ✅ **DRAFTED: + `planning/d2-research-subject-registry.md`** *(net-new, PII-free)* — pseudonymous + `research_subject` registry (`{research_subject_id, custody_did}` + memberships, + **no ids/hashes**), exact match via **D1 id-list exchange** (corrected the + rejected AppView-hash idea), genetic match via D3, member-claim custody, + cross-project = claim-only. **Uses** D1; **depends on** D3's resolver. Open: + id-exchange-vs-PSI, cross-project policy, claim proof (§10). +3. **D3 — IBD matching impl spec** ✅ **DRAFTED: + `planning/d3-ibd-matching-impl.md`** *(implements the IBD requirements in Rust + on D1)* — candidate mining SQL over `fed.*` (haplogroup/population-overlap/shared- + match → `match_suggestion`), dual-consent reuses `exchange.*`, Edge handoff = a D1 + session (`purpose=IBD_*`), summary-only attestation indexing, **relationship + classification feeds D2's genetic resolver** (same-person → `subject_link`). + Closes the Match track. Open: phasing, N² gate, algo provenance (§12). +4. **D4 — Assertion store (split rails)** ✅ **DRAFTED: + `planning/d4-assertion-store.md`** *(net-new, the collaboration primitive)* — + attributed/scoped/append-only assertions over `research_subject_id`; **PII-class × + scope → three rails** (R1 non-PII public→PDS record/ingest; R2 non-PII project→ + AppView `research.assertion`+current_view, D5 ACL; R3 PII→D1 P2P-only, folded + locally, never server-side); `current_view` fold keeps disputes with provenance; + `SAME_PERSON_AS` drives D2 merge (D3 feeds it); branch assertions surfaced *against* + the curated tree. Open: NOTE PII-default, dispute authority (§11). +5. **D5 — Group-project reconciliation** ✅ **DRAFTED: + `planning/d5-group-project-reconciliation.md`** *(reconciles `group-project- + system.md` with D1–D4)* — **two memberships** disentangled (collaborator-team DIDs + +roles = the ACL/consent-circle vs. pseudonymous subject membership); adopts the + proposal's roles (ADMIN/CO_ADMIN/MODERATOR/CURATOR + perms) and binds each to what + it gates in D1/D4; `research.project`+`project_member` ACL gates PII exchange/R2/ + disputes; proposal's aggregate records map onto D4 R1/R2 (no duplication); + **stewarded→claim→sovereign lifecycle** (mixed subjects per project); PII durability + via consent-circle P2P replication + succession. **Platform track COMPLETE.** +6. **D6 — Haplogroup-discovery automation spec** *(refresh + `haplogroup-discovery-system.md`)* private-variant ingest→consensus engine. Mostly + independent of B; can run in parallel anytime after launch. +7. **D7 — Multi-test-type confidence** + **D8 — Sequencer-lab inference** — finish + the documented subsystems; both feed discovery quality. Parallelizable. +8. **Deferred:** messaging/reputation refresh, JSONB consolidation, backfeed + decision (revisit under D1/D4). + +**Two tracks** can run concurrently, joined at **D1 (the shared encrypted-exchange +substrate)**: **Platform track** D1→D2→D4→D5 (genealogy collaboration) and **Match +track** D1→D3 (IBD) share the channel; the **Catalog track** D6→D7/D8 (tree-science +quality) is independent. + +## 6. Cross-repo contracts to keep in sync + +Bucket B is inherently two-sided. Each net-new AppView doc must pin the +**Navigator-side contract** already drafted in DUNavigator (`ftdna-project-import.md` +§8, amended 2026-06-06 to the no-PII / P2P model): +- **Non-PII** record/NSID shapes (assertions, salted `id_hashes`, aggregate state) → + extend `du-domain::fed`; ingested via the existing **Jetstream → du-jobs** path. +- **PII** payloads (names, MDKA, kit↔subject map, raw STR/SNP) → the **encrypted P2P + channel (D1)**, never an AppView record. Navigator runs the Edge endpoint; AppView + only brokers + attests. +- `ResearchSubject` ↔ Navigator `biosample.guid`: AppView stores the **opaque** id + + hashes; the clear `external_id(source, id)` stays in Navigator's local store. +- **Sequencer-lab lookup + consensus (D8) — DONE 2026-06-12 (lookup + engine); + curator review UI remains.** Navigator's Rust rewrite **lost** the Scala lab association + (FGC/FTDNA/YSEQ/Dante/Nebula…) + read-name platform/instrument inference; it's being + restored Navigator-side (read-name scan → `instrument_id`/flowcell/model + a local + `labs` catalog). The **AppView lookup endpoint is now built**: + **`GET /api/v1/sequencer/lab?instrument_id=…`** (single lookup, 404 if unknown) and + **`GET /api/v1/sequencer/lab-instruments`** (bulk cache seed), resolving via the + **preseeded** `genomics.sequencer_instrument.lab_id` (mig 0025 re-adds it; the ETL + backfills the legacy tie that the 0004 redesign had dropped; `du_db::sequencer`). The + lookup uses the preseeded direct tie (memory `sequencer-lab-lookup`). The **consensus + engine is now also built**: `du_db::sequencer::recompute_consensus` derives + observations from `fed.sequencerun ⋈ fed.biosample.center_name` → per-instrument + `instrument_association_proposal` (dominant lab, confidence, threshold status, + conflict→PENDING), run by `du-jobs run-once sequencer-consensus` (+ hourly); the + curator **accept** (`/manage/instrument-proposals/:id/accept`, audited via + `du_db::audit::log`) sets `sequencer_instrument.lab_id` — the column the lookup reads. + Navigator publishes `instrument_id`/`center_name` on the `sequencerun`/`biosample` fed + records. **Remaining D8:** the curator HTMX review UI (API done), the + `instrumentObservation` lexicon, and recency/confidence-level scoring refinements. + +## 7. Open strategic questions + +1. ~~PII in AppView~~ **RESOLVED (§3): no PII in AppView; PII moves via encrypted + P2P (D1).** Remaining sub-question: choose **transport** — direct P2P (NAT + traversal, both online) vs **blind relay** (store-and-forward ciphertext); D1 + decides. Relay is likely needed since admins are rarely online simultaneously. +2. **Backfeed in or out?** Member-claim likely needs AppView→PDS writes; STATUS + lists backfeed as dropped. Reconcile under D1/D4. +3. **Where do FTDNA branch/clade assignments live** vs the curated AppView + haplotree? Project `Sub Group` paths are *project assertions*, not catalog truth + — keep them in the assertion store, surface against (not merged into) the tree. +4. **Consent-flag enforcement** — the FTDNA roster's `Publicly Share DNA Results` + must gate federation at the AppView boundary; specify where it's checked. +5. **Sequencing vs launch** — none of this blocks the cutover; confirm it's all + post-launch so it doesn't pull focus from the two launch-critical items. + +## 8. Next step + +Draft **D1 — Encrypted Edge-to-Edge exchange + AppView broker** (the shared +foundation for both the genealogy and IBD tracks), pinning the transport choice +(Q1) and the no-PII data-classification policy. Then **D2 (ResearchSubject, PII-free)** +and **D3 (IBD impl spec)** build on it. diff --git a/documents/planning/haplogroup-discovery-system-overview.md b/documents/planning/haplogroup-discovery-system-overview.md index b1e856ec..d2f01930 100644 --- a/documents/planning/haplogroup-discovery-system-overview.md +++ b/documents/planning/haplogroup-discovery-system-overview.md @@ -1,5 +1,14 @@ # Haplogroup Discovery System +> **ℹ️ Rust status (2026-06-07).** This conceptual overview still holds. Two +> terminology notes: ingestion is via the outbound **Jetstream summary mirror** +> (`fed.*`), not a credential-holding "Firehose" (that inbound model was dropped); +> and **auto-promotion** is a future option — curator accept is the gate today. The +> curator review/promote half is built; the automated discovery engine is forward +> work (`design-roadmap-rust-rewrite.md` D6). Full technical design + status: +> [`haplogroup-discovery-system.md`](./haplogroup-discovery-system.md). Triage: +> `design-doc-triage-report.md` §7. + ## What It Does The Haplogroup Discovery System automatically finds new branches on the Y-DNA and mtDNA family trees by analyzing genetic samples from the community. diff --git a/documents/planning/haplogroup-discovery-system.md b/documents/planning/haplogroup-discovery-system.md index 261f8e04..1ca5c121 100644 --- a/documents/planning/haplogroup-discovery-system.md +++ b/documents/planning/haplogroup-discovery-system.md @@ -1,5 +1,34 @@ # Haplogroup Discovery System +> **⚖️ Rust status (2026-06-07).** Prerequisites (variant-schema simplification: +> universal JSONB coordinates, parallel-mutation handling, JSONB aliases) are +> **done**. Schema is present (`tree.proposed_branch` + `_evidence` + `_variant`, +> `tree.biosample_private_variant`, `tree.discovery_config`, `tree.wip_*`), and the +> **curator review/promote + proposal-pooling half is built** (`du-db::proposal`, +> `/curator/proposals`, `/manage/curation/proposals` intake). +> +> **Architecture evolved:** the Rust model is **Edge-submits** — +> Navigator extracts the private variants and the citizen publishes them; the AppView +> pools across submitters. There is **no AppView-side extraction from +> `HaplogroupResult.mismatchingSnps`** as the pipeline below describes (this aligns +> with the no-PII / edge-compute direction). +> +> **D6 DONE (2026-06-12).** Delivery is a **`com.decodingus.atmosphere.privateVariant` +> lexicon** record (one per biosample/DNA-arm: terminal + variant calls) mirrored via +> Jetstream into `fed.private_variant` (mig 0028). The **discovery consensus engine** +> (`du_db::discovery`, mig 0029) materializes them into `tree.biosample_private_variant` +> and pools the per-sample variant sets into `tree.proposed_branch` by **variant-set +> Jaccard** — a declarative, idempotent recompute (stable proposal ids via a +> `cluster_key` partial-unique index, config thresholds from `tree.discovery_config`, +> confidence = count + distinct submitters + variant-set consistency, +> `READY_FOR_REVIEW`/`SPLIT_CANDIDATE` transitions, opt-in auto-promote off by +> default). Promotion reassigns + freezes the contributing samples +> (`discovery::reassign_after_promote`). Read API `GET /api/v1/discovery/proposals[/:id]`; +> the `/curator/proposals` UI surfaces defining variants + confidence + a split banner. +> The Scala/Slick/Tapir/"Firehose"/`mismatchingSnps`-extraction specifics below are +> **superseded** — kept for historical context only. Memory `discovery-consensus-engine`; +> triage `design-doc-triage-report.md` §8. + ## Executive Summary This document outlines a comprehensive system for evolving Y-DNA and mtDNA haplogroup trees based on discoveries from **all biosample sources**: both Citizen Biosamples (AT Protocol) and External/Publication Biosamples loaded by curators. The system manages "private branches" (proposed terminal variants), tracks consensus formation across multiple biosamples regardless of source, and provides curator oversight for tree modifications. @@ -16,7 +45,7 @@ Before implementing the discovery system, the variant schema must be migrated to 2. **Parallel mutation handling** - Same variant name can exist for different lineages 3. **JSONB aliases** - No separate `variant_alias` table -See: `documents/proposals/variant-schema-simplification.md` +See: realized in `core.variant` (mig 0002 — universal JSONB coordinates/aliases). **Key dependency**: The `tree.biosample_private_variant` and `tree.proposed_branch_variant` tables reference the variant table. The new schema changes how variants are identified: @@ -1448,7 +1477,7 @@ decodingus.discovery { - [ ] Remove `variant_alias` table and related code - [ ] Rename `variant_v2` to `variant` -**See:** `documents/proposals/variant-schema-simplification.md` +**See:** realized in `core.variant` (mig 0002). **Risk Mitigation:** - Dual-write period: write to both old and new schema during transition @@ -1683,7 +1712,7 @@ Foundation curator tools for manual tree management, independent of the automate 3. **Publication Integration**: Automatically create proposals from new publications 4. **Collaborative Curation**: Multi-curator review workflow with voting 5. **Geographic Correlation**: Analyze proposal evidence by geographic distribution -6. **DecodingUs Naming Authority**: Establish "DU" prefix for naming discovered variants; publish in format for YBrowse aggregation (see `documents/proposals/variant-schema-simplification.md`) +6. **DecodingUs Naming Authority**: Establish "DU" prefix for naming discovered variants; publish in format for YBrowse aggregation (see `planning/variant-naming-authority.md`) 7. **Pangenome Coordinates**: Extend variant coordinates JSONB to support graph-based pangenome references as they become available ### Scalability diff --git a/documents/planning/ibd-matching-system.md b/documents/planning/ibd-matching-system.md deleted file mode 100644 index 8e28c087..00000000 --- a/documents/planning/ibd-matching-system.md +++ /dev/null @@ -1,1381 +0,0 @@ -# IBD Matching and Relationship Discovery System - -## Executive Summary - -This document outlines a comprehensive system enabling Genetic Genealogists to discover and confirm IBD (Identity By Descent) relationships with other participating users. The system leverages the AT Protocol for decentralized consent management, coordinates with the Java-based Edge Computing Application for secure data exchange, and builds upon existing schema infrastructure (`ibd_discovery_index`, `ibd_pds_attestation`). - ---- - -## User Story - -> As a **Genetic Genealogist** -> I need to **be able to perform IBD relationship comparisons with participating Genetic Genealogists** -> So that I can **discover potential relatives and build my family tree** - ---- - -## Problem Statement - -Genetic genealogists need to: - -1. **Discover potential matches** - Find other users who may share DNA segments indicating common ancestry -2. **Prioritize comparisons** - Focus on matches likely to be meaningful (shared contacts, similar population breakdowns) -3. **Request consent** - Ask potential matches for permission to perform detailed IBD analysis -4. **Exchange data securely** - Share encrypted genetic data for comparison without exposing raw sequences -5. **Record confirmed relationships** - Persist validated matches for future discovery - -### Current Gap - -The existing system has: -- Database schema for IBD discovery (`ibd_discovery_index`, `ibd_pds_attestation`) - **not utilized** -- Ancestry analysis infrastructure (`ancestry_analysis`, `population`) - **not connected to matching** -- User/PDS infrastructure - **no consent workflow** -- No match list concept -- No Lexicon definitions for match requests or population breakdowns - ---- - -## System Architecture - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ User A's Environment │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ -│ │ Edge App │────▶│ User A's PDS │────▶│ AT Protocol │ │ -│ │ (Java) │ │ │ │ Network │ │ -│ │ │ │ • Biosample │ │ │ │ -│ │ • IBD Analysis │ │ • Match List │ │ • Firehose │ │ -│ │ • Encryption │ │ • Match Requests│ │ • XRPC │ │ -│ │ • Key Exchange │ │ • Population │ │ │ │ -│ └────────┬────────┘ └─────────────────┘ └────────┬────────┘ │ -│ │ │ │ -└───────────┼────────────────────────────────────────────────┼────────────────┘ - │ │ - │ Encrypted P2P Channel │ - │ (Edge App ↔ Edge App) │ - ▼ ▼ -┌───────────┼────────────────────────────────────────────────┼────────────────┐ -│ │ │ │ -│ ┌────────┴────────┐ ┌─────────────────┐ ┌────────┴────────┐ │ -│ │ Edge App │◀────│ User B's PDS │◀────│ AT Protocol │ │ -│ │ (Java) │ │ │ │ Network │ │ -│ │ │ │ • Biosample │ │ │ │ -│ │ • IBD Analysis │ │ • Match List │ │ • Firehose │ │ -│ │ • Encryption │ │ • Match Requests│ │ • XRPC │ │ -│ │ • Key Exchange │ │ • Population │ │ │ │ -│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ -│ User B's Environment │ -└─────────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────────┐ -│ DecodingUs AppView │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ • Subscribes to Firehose for match confirmations │ -│ • Indexes confirmed matches in ibd_discovery_index │ -│ • Aggregates population data for discovery suggestions │ -│ • Provides match discovery API │ -│ • Tracks attestation consensus │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - ---- - -## Lexicon Extensions - -### Namespace: `com.decodingus.atmosphere` - -#### 1. Match List Record (`com.decodingus.atmosphere.matchList`) - -Stores a user's confirmed genetic matches in their PDS. - -**NSID:** `com.decodingus.atmosphere.matchList` - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.matchList", - "defs": { - "main": { - "type": "record", - "description": "A user's list of confirmed genetic matches stored in their PDS.", - "key": "tid", - "record": { - "type": "object", - "required": ["ownerDid", "matches"], - "properties": { - "ownerDid": { - "type": "string", - "description": "The DID of the user who owns this match list." - }, - "matches": { - "type": "array", - "description": "List of confirmed matches.", - "items": { - "type": "ref", - "ref": "#confirmedMatch" - } - }, - "lastUpdated": { - "type": "string", - "format": "datetime" - } - } - } - }, - "confirmedMatch": { - "type": "object", - "description": "A confirmed genetic match with another user.", - "required": ["matchedUserDid", "matchedBiosampleUri", "relationshipType", "confirmedAt"], - "properties": { - "matchedUserDid": { - "type": "string", - "description": "DID of the matched user." - }, - "matchedBiosampleUri": { - "type": "string", - "description": "AT URI of the matched user's biosample record." - }, - "relationshipType": { - "type": "string", - "description": "Type of genetic relationship.", - "knownValues": ["AUTOSOMAL", "Y_CHROMOSOME", "MT_DNA", "X_CHROMOSOME"] - }, - "totalSharedCm": { - "type": "float", - "description": "Total shared centimorgans (autosomal)." - }, - "numSharedSegments": { - "type": "integer", - "description": "Number of shared DNA segments." - }, - "largestSegmentCm": { - "type": "float", - "description": "Size of the largest shared segment in cM." - }, - "estimatedRelationship": { - "type": "string", - "description": "Estimated relationship (e.g., '2nd Cousin', '3rd-4th Cousin')." - }, - "sharedAncestors": { - "type": "array", - "description": "Known shared ancestors (if any).", - "items": { "type": "string" } - }, - "confirmedAt": { - "type": "string", - "format": "datetime" - }, - "matchSignature": { - "type": "string", - "description": "Cryptographic signature confirming both parties agreed to this match." - }, - "notes": { - "type": "string", - "description": "User notes about this match." - } - } - } - } -} -``` - ---- - -#### 2. Match Request Record (`com.decodingus.atmosphere.matchRequest`) - -A request from one user to another for IBD comparison (similar to Bluesky DMs). - -**NSID:** `com.decodingus.atmosphere.matchRequest` - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.matchRequest", - "defs": { - "main": { - "type": "record", - "description": "A request to perform IBD comparison with another user.", - "key": "tid", - "record": { - "type": "object", - "required": ["requesterDid", "requesterBiosampleUri", "targetDid", "requestType", "status", "createdAt"], - "properties": { - "requesterDid": { - "type": "string", - "description": "DID of the user initiating the match request." - }, - "requesterBiosampleUri": { - "type": "string", - "description": "AT URI of the requester's biosample." - }, - "targetDid": { - "type": "string", - "description": "DID of the user being requested for a match." - }, - "targetBiosampleUri": { - "type": "string", - "description": "AT URI of the target's biosample (if known)." - }, - "requestType": { - "type": "string", - "description": "Type of comparison requested.", - "knownValues": ["AUTOSOMAL", "Y_CHROMOSOME", "MT_DNA", "FULL"] - }, - "status": { - "type": "string", - "description": "Current status of the request.", - "knownValues": ["PENDING", "ACCEPTED", "REJECTED", "EXPIRED", "COMPLETED", "CANCELLED"] - }, - "discoveryReason": { - "type": "ref", - "ref": "#discoveryReason", - "description": "Why this match was suggested." - }, - "message": { - "type": "string", - "description": "Optional message from requester explaining interest." - }, - "createdAt": { - "type": "string", - "format": "datetime" - }, - "expiresAt": { - "type": "string", - "format": "datetime", - "description": "Request expiration (default 30 days)." - }, - "respondedAt": { - "type": "string", - "format": "datetime" - }, - "responseMessage": { - "type": "string", - "description": "Response message from target user." - } - } - } - }, - "discoveryReason": { - "type": "object", - "description": "Reason this match was suggested.", - "properties": { - "reasonType": { - "type": "string", - "knownValues": ["SHARED_MATCH", "POPULATION_OVERLAP", "HAPLOGROUP_MATCH", "MANUAL"] - }, - "sharedMatchDids": { - "type": "array", - "description": "DIDs of users both parties match with.", - "items": { "type": "string" } - }, - "populationOverlapScore": { - "type": "float", - "description": "Score indicating population breakdown similarity (0-1)." - }, - "sharedHaplogroup": { - "type": "string", - "description": "Shared terminal haplogroup (Y-DNA or mtDNA)." - } - } - } - } -} -``` - ---- - -#### 3. Population Breakdown Record (`com.decodingus.atmosphere.populationBreakdown`) - -Ancestry composition data stored in the user's PDS. - -**NSID:** `com.decodingus.atmosphere.populationBreakdown` - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.populationBreakdown", - "defs": { - "main": { - "type": "record", - "description": "Ancestry population breakdown for a biosample.", - "key": "tid", - "record": { - "type": "object", - "required": ["biosampleUri", "analysisMethod", "populations", "analyzedAt"], - "properties": { - "biosampleUri": { - "type": "string", - "description": "AT URI of the biosample this breakdown belongs to." - }, - "analysisMethod": { - "type": "string", - "description": "Method/algorithm used for analysis (e.g., 'ADMIXTURE_K12', 'PCA_REFERENCE')." - }, - "referencePanel": { - "type": "string", - "description": "Reference panel used (e.g., 'Human Origins', '1000 Genomes')." - }, - "populations": { - "type": "array", - "description": "Population percentages.", - "items": { - "type": "ref", - "ref": "#populationComponent" - } - }, - "analyzedAt": { - "type": "string", - "format": "datetime" - }, - "confidenceLevel": { - "type": "string", - "description": "Overall confidence in the breakdown.", - "knownValues": ["HIGH", "MEDIUM", "LOW"] - } - } - } - }, - "populationComponent": { - "type": "object", - "description": "A single population component in the breakdown.", - "required": ["populationName", "percentage"], - "properties": { - "populationName": { - "type": "string", - "description": "Name of the population (e.g., 'Northern European', 'East Asian')." - }, - "populationCode": { - "type": "string", - "description": "Standardized code for the population." - }, - "percentage": { - "type": "float", - "description": "Percentage of ancestry from this population (0-100)." - }, - "confidenceInterval": { - "type": "ref", - "ref": "#confidenceInterval" - }, - "parentPopulation": { - "type": "string", - "description": "Parent population category for hierarchical breakdowns." - } - } - }, - "confidenceInterval": { - "type": "object", - "properties": { - "lower": { "type": "float" }, - "upper": { "type": "float" } - } - } - } -} -``` - ---- - -#### 4. Match Consent Vote Record (`com.decodingus.atmosphere.matchConsent`) - -Records a user's consent decision for a match request. Both users must have matching consent records for a match to be confirmed. - -**NSID:** `com.decodingus.atmosphere.matchConsent` - -```json -{ - "lexicon": 1, - "id": "com.decodingus.atmosphere.matchConsent", - "defs": { - "main": { - "type": "record", - "description": "A user's consent vote for a match comparison.", - "key": "tid", - "record": { - "type": "object", - "required": ["matchRequestUri", "voterDid", "vote", "votedAt"], - "properties": { - "matchRequestUri": { - "type": "string", - "description": "AT URI of the match request this consent applies to." - }, - "voterDid": { - "type": "string", - "description": "DID of the user casting this vote." - }, - "voterBiosampleUri": { - "type": "string", - "description": "AT URI of the voter's biosample." - }, - "vote": { - "type": "string", - "description": "The consent decision.", - "knownValues": ["ACCEPT", "REJECT", "DEFER"] - }, - "votedAt": { - "type": "string", - "format": "datetime" - }, - "expiresAt": { - "type": "string", - "format": "datetime", - "description": "When this consent expires (requires renewal)." - }, - "scope": { - "type": "array", - "description": "What data can be shared.", - "items": { - "type": "string", - "knownValues": ["SEGMENT_POSITIONS", "SHARED_CM_TOTAL", "HAPLOGROUP", "POPULATION_OVERLAP"] - } - }, - "signature": { - "type": "string", - "description": "Cryptographic signature of the consent." - } - } - } - } - } -} -``` - ---- - -## Match Discovery Workflow - -### Phase 1: Discovery Suggestions - -Users can discover potential matches through several mechanisms: - -#### 1a. Shared Match Discovery - -``` -User A has matches: [M1, M2, M3, M4] -User B has matches: [M2, M3, M5, M6] - -Shared matches: [M2, M3] - -If |shared| >= threshold (configurable, default 2): - → Suggest A and B as potential matches - → Higher shared count = higher suggestion priority -``` - -#### 1b. Population Overlap Discovery - -``` -User A population: {Northern European: 45%, British Isles: 30%, Germanic: 15%, ...} -User B population: {Northern European: 50%, British Isles: 25%, Scandinavian: 15%, ...} - -Overlap Score = Σ min(A[pop], B[pop]) for all populations - = min(45,50) + min(30,25) + ... - = 45 + 25 + ... - -If overlapScore >= threshold (configurable, default 60%): - → Suggest A and B as potential matches -``` - -#### 1c. Haplogroup Match Discovery - -``` -User A: Y-DNA R-M269, mtDNA H1a -User B: Y-DNA R-M269, mtDNA J1c - -If A.yHaplogroup == B.yHaplogroup (for males): - → Suggest Y-DNA comparison - → Priority based on terminal depth match - -If A.mtHaplogroup == B.mtHaplogroup: - → Suggest mtDNA comparison -``` - -### Phase 2: Match Request Flow - -``` -┌─────────────┐ ┌─────────────┐ -│ User A │ │ User B │ -│ (Requester)│ │ (Target) │ -└──────┬──────┘ └──────┬──────┘ - │ │ - │ 1. Create matchRequest in A's PDS │ - │────────────────────────────────────────▶│ - │ (status: PENDING) │ - │ │ - │ 2. AT Protocol delivers to B's PDS │ - │ (B sees pending request) │ - │ │ - │ 3. B reviews request │ - │ (sees discovery reason) - │ │ - │ 4. B creates matchConsent in B's PDS │ - │◀────────────────────────────────────────│ - │ (vote: ACCEPT) │ - │ │ - │ 5. A creates matchConsent in A's PDS │ - │────────────────────────────────────────▶│ - │ (vote: ACCEPT) │ - │ │ - │ ═══════════════════════════════════════│ - │ Both consents present = Ready for IBD │ - │ ═══════════════════════════════════════│ - │ │ -``` - -### Phase 3: IBD Analysis (Edge App Coordination) - -Once both users consent, the Edge Apps coordinate the actual analysis: - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ EDGE APP COORDINATION PROTOCOL │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ 1. KEY EXCHANGE │ -│ ┌──────────────┐ ┌──────────────┐ │ -│ │ Edge App A │───── ECDH Key Exchange ──│ Edge App B │ │ -│ │ │ (via AT Protocol) │ │ │ -│ └──────────────┘ └──────────────┘ │ -│ │ -│ 2. ENCRYPTED DATA EXCHANGE │ -│ • App A encrypts variant positions with shared key │ -│ • App A sends encrypted payload to App B (P2P or relay) │ -│ • App B decrypts and performs local comparison │ -│ • App B encrypts results and sends back │ -│ │ -│ 3. RESULT VERIFICATION │ -│ • Both apps independently calculate shared segments │ -│ • Results are hashed and compared │ -│ • Matching hashes confirm valid analysis │ -│ │ -│ 4. ATTESTATION │ -│ • Both apps sign the match result │ -│ • Attestations written to respective PDS │ -│ • DecodingUs indexes confirmed match │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### Phase 4: Match Confirmation and Indexing - -``` -┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ -│ User A │ │ User B │ │ DecodingUs AppView │ -│ PDS │ │ PDS │ │ │ -└──────┬──────┘ └──────┬──────┘ └──────────┬──────────┘ - │ │ │ - │ attestation A │ attestation B │ - │───────────────────┼───────────────────────▶ - │ │ │ - │ │ Firehose events │ - │ │ │ - │ │ ┌───────────────────┤ - │ │ │ Verify signatures │ - │ │ │ Match attestations│ - │ │ │ Index in DB │ - │ │ └───────────────────┤ - │ │ │ - │ │ ibd_discovery_index │ - │ │ ibd_pds_attestation │ - │ │ │ -``` - ---- - -## Database Schema Extensions - -### New Tables (in `public` schema, or consider `matching` schema) - -```sql --- Evolution XX: IBD Matching System Extensions - --- ============================================================================ --- PART 1: Match Discovery Tables --- ============================================================================ - --- Match suggestions generated by the discovery engine -CREATE TABLE match_suggestion ( - id BIGSERIAL PRIMARY KEY, - suggester_sample_guid UUID NOT NULL, - suggested_sample_guid UUID NOT NULL, - suggestion_type VARCHAR(50) NOT NULL - CHECK (suggestion_type IN ('SHARED_MATCH', 'POPULATION_OVERLAP', 'HAPLOGROUP_MATCH')), - score DOUBLE PRECISION NOT NULL, - metadata JSONB, -- Stores reason details (shared match DIDs, overlap score, etc.) - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - expires_at TIMESTAMP, - status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE' - CHECK (status IN ('ACTIVE', 'DISMISSED', 'CONVERTED', 'EXPIRED')), - UNIQUE(suggester_sample_guid, suggested_sample_guid, suggestion_type) -); - -CREATE INDEX idx_ms_suggester ON match_suggestion(suggester_sample_guid); -CREATE INDEX idx_ms_suggested ON match_suggestion(suggested_sample_guid); -CREATE INDEX idx_ms_type ON match_suggestion(suggestion_type); -CREATE INDEX idx_ms_score ON match_suggestion(score DESC); - --- ============================================================================ --- PART 2: Population Overlap Caching --- ============================================================================ - --- Cached population breakdowns for efficient overlap calculation -CREATE TABLE population_breakdown_cache ( - id BIGSERIAL PRIMARY KEY, - sample_guid UUID NOT NULL UNIQUE, - citizen_did VARCHAR(255), - analysis_method VARCHAR(100) NOT NULL, - breakdown JSONB NOT NULL, -- {populationCode: percentage, ...} - breakdown_hash VARCHAR(64) NOT NULL, -- For change detection - source_at_uri VARCHAR(500), - cached_at TIMESTAMP NOT NULL DEFAULT NOW(), - expires_at TIMESTAMP -); - -CREATE INDEX idx_pbc_sample ON population_breakdown_cache(sample_guid); -CREATE INDEX idx_pbc_did ON population_breakdown_cache(citizen_did); -CREATE INDEX idx_pbc_method ON population_breakdown_cache(analysis_method); - --- Pre-computed population overlap scores for discovery -CREATE TABLE population_overlap_score ( - id BIGSERIAL PRIMARY KEY, - sample_guid_1 UUID NOT NULL, - sample_guid_2 UUID NOT NULL, - overlap_score DOUBLE PRECISION NOT NULL, - analysis_method VARCHAR(100) NOT NULL, - computed_at TIMESTAMP NOT NULL DEFAULT NOW(), - UNIQUE(LEAST(sample_guid_1, sample_guid_2), GREATEST(sample_guid_1, sample_guid_2), analysis_method) -); - -CREATE INDEX idx_pos_sample1 ON population_overlap_score(sample_guid_1); -CREATE INDEX idx_pos_sample2 ON population_overlap_score(sample_guid_2); -CREATE INDEX idx_pos_score ON population_overlap_score(overlap_score DESC); - --- ============================================================================ --- PART 3: Match Request Tracking --- ============================================================================ - --- Local tracking of match requests (supplements PDS records) -CREATE TABLE match_request_tracking ( - id BIGSERIAL PRIMARY KEY, - request_at_uri VARCHAR(500) NOT NULL UNIQUE, - requester_did VARCHAR(255) NOT NULL, - requester_sample_guid UUID NOT NULL, - target_did VARCHAR(255) NOT NULL, - target_sample_guid UUID, - request_type VARCHAR(50) NOT NULL, - status VARCHAR(50) NOT NULL, - discovery_reason JSONB, - created_at TIMESTAMP NOT NULL DEFAULT NOW(), - updated_at TIMESTAMP NOT NULL DEFAULT NOW(), - expires_at TIMESTAMP, - completed_at TIMESTAMP -); - -CREATE INDEX idx_mrt_requester ON match_request_tracking(requester_did); -CREATE INDEX idx_mrt_target ON match_request_tracking(target_did); -CREATE INDEX idx_mrt_status ON match_request_tracking(status); - --- Consent votes (local index of PDS consent records) -CREATE TABLE match_consent_tracking ( - id BIGSERIAL PRIMARY KEY, - consent_at_uri VARCHAR(500) NOT NULL UNIQUE, - match_request_at_uri VARCHAR(500) NOT NULL, - voter_did VARCHAR(255) NOT NULL, - voter_sample_guid UUID NOT NULL, - vote VARCHAR(20) NOT NULL CHECK (vote IN ('ACCEPT', 'REJECT', 'DEFER')), - scope JSONB, - signature TEXT NOT NULL, - voted_at TIMESTAMP NOT NULL, - expires_at TIMESTAMP, - UNIQUE(match_request_at_uri, voter_did) -); - -CREATE INDEX idx_mct_request ON match_consent_tracking(match_request_at_uri); -CREATE INDEX idx_mct_voter ON match_consent_tracking(voter_did); - --- ============================================================================ --- PART 4: Extend existing IBD tables --- ============================================================================ - --- Add match request reference to ibd_discovery_index -ALTER TABLE ibd_discovery_index - ADD COLUMN match_request_at_uri VARCHAR(500), - ADD COLUMN requester_did VARCHAR(255), - ADD COLUMN target_did VARCHAR(255); - -CREATE INDEX idx_ibd_request_uri ON ibd_discovery_index(match_request_at_uri); -``` - ---- - -## Service Layer - -### 1. MatchDiscoveryService - -Generates match suggestions based on various criteria. - -```scala -trait MatchDiscoveryService { - /** - * Find potential matches for a user based on shared matches. - */ - def findSharedMatchSuggestions( - sampleGuid: UUID, - minSharedMatches: Int = 2 - ): Future[Seq[MatchSuggestion]] - - /** - * Find potential matches based on population overlap. - */ - def findPopulationOverlapSuggestions( - sampleGuid: UUID, - minOverlapScore: Double = 0.6 - ): Future[Seq[MatchSuggestion]] - - /** - * Find potential matches based on shared haplogroups. - */ - def findHaplogroupMatchSuggestions( - sampleGuid: UUID, - haplogroupType: HaplogroupType - ): Future[Seq[MatchSuggestion]] - - /** - * Get all suggestions for a user, ranked by score. - */ - def getSuggestionsForUser( - userDid: String, - limit: Int = 50 - ): Future[Seq[RankedMatchSuggestion]] - - /** - * Dismiss a suggestion (user not interested). - */ - def dismissSuggestion(suggestionId: Long, userDid: String): Future[Boolean] -} -``` - -### 2. MatchRequestService - -Manages match request lifecycle. - -```scala -trait MatchRequestService { - /** - * Create a new match request. - * Writes to requester's PDS and tracks locally. - */ - def createMatchRequest( - requesterDid: String, - requesterBiosampleUri: String, - targetDid: String, - requestType: MatchRequestType, - discoveryReason: Option[DiscoveryReason], - message: Option[String] - ): Future[MatchRequest] - - /** - * Get pending requests for a user (as target). - */ - def getPendingRequestsForUser(targetDid: String): Future[Seq[MatchRequest]] - - /** - * Get requests initiated by a user. - */ - def getRequestsByUser(requesterDid: String): Future[Seq[MatchRequest]] - - /** - * Record a consent vote for a request. - */ - def recordConsent( - matchRequestUri: String, - voterDid: String, - voterBiosampleUri: String, - vote: ConsentVote, - scope: Seq[ConsentScope] - ): Future[MatchConsent] - - /** - * Check if both parties have consented. - */ - def checkMutualConsent(matchRequestUri: String): Future[Option[MutualConsent]] - - /** - * Cancel a pending request. - */ - def cancelRequest(requestUri: String, requesterDid: String): Future[Boolean] -} -``` - -### 3. PopulationAnalysisService - -Manages population breakdown data and overlap calculations. - -```scala -trait PopulationAnalysisService { - /** - * Cache a population breakdown from PDS. - */ - def cachePopulationBreakdown( - sampleGuid: UUID, - citizenDid: String, - breakdown: PopulationBreakdown - ): Future[Unit] - - /** - * Calculate overlap score between two samples. - */ - def calculateOverlapScore( - sampleGuid1: UUID, - sampleGuid2: UUID - ): Future[Double] - - /** - * Batch compute overlap scores for a sample against all others. - */ - def computeOverlapScoresForSample(sampleGuid: UUID): Future[Int] - - /** - * Get population breakdown for a sample. - */ - def getBreakdown(sampleGuid: UUID): Future[Option[PopulationBreakdown]] -} -``` - -### 4. IbdMatchingService - -Handles IBD match indexing and querying. **Coordinates with Edge App for actual analysis.** - -```scala -trait IbdMatchingService { - /** - * Record a confirmed IBD match from Edge App attestations. - */ - def recordConfirmedMatch( - sampleGuid1: UUID, - sampleGuid2: UUID, - matchDetails: IbdMatchDetails, - attestation1: IbdAttestation, - attestation2: IbdAttestation - ): Future[IbdDiscoveryIndex] - - /** - * Get all matches for a sample. - */ - def getMatchesForSample(sampleGuid: UUID): Future[Seq[IbdMatch]] - - /** - * Get match details between two samples. - */ - def getMatchBetween(sampleGuid1: UUID, sampleGuid2: UUID): Future[Option[IbdMatch]] - - /** - * Verify attestation signatures. - */ - def verifyAttestations( - attestation1: IbdAttestation, - attestation2: IbdAttestation, - matchHash: String - ): Future[Boolean] - - /** - * Update consensus status based on attestations. - */ - def updateConsensusStatus(indexId: Long): Future[ConsensusStatus] -} -``` - ---- - -## Edge App Coordination Points - -### Interface Contract: DecodingUs ↔ Edge App - -The Edge App (Java) must implement these coordination points: - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ EDGE APP INTERFACE CONTRACT │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ 1. NOTIFICATION CHANNEL │ -│ ──────────────────── │ -│ DecodingUs → Edge App: Notify of mutual consent │ -│ │ -│ Endpoint: POST /api/edge/v1/match-ready │ -│ Payload: { │ -│ matchRequestUri: String, │ -│ partnerDid: String, │ -│ partnerPdsUrl: String, │ -│ requestType: "AUTOSOMAL" | "Y_CHROMOSOME" | "MT_DNA" | "FULL", │ -│ consentScope: ["SEGMENT_POSITIONS", "SHARED_CM_TOTAL", ...] │ -│ } │ -│ │ -│ 2. KEY EXCHANGE PROTOCOL │ -│ ───────────────────── │ -│ Edge App ↔ Edge App: ECDH key agreement │ -│ │ -│ • Use AT Protocol for key exchange messages │ -│ • Lexicon: com.decodingus.edge.keyExchange │ -│ • Keys rotated per comparison session │ -│ │ -│ 3. DATA EXCHANGE FORMAT │ -│ ───────────────────── │ -│ Edge App A → Edge App B: Encrypted variant data │ -│ │ -│ Format: { │ -│ sessionId: UUID, │ -│ encryptedPayload: Base64, // AES-256-GCM encrypted │ -│ iv: Base64, │ -│ authTag: Base64, │ -│ dataType: "VARIANT_POSITIONS" | "SEGMENT_BOUNDARIES" │ -│ } │ -│ │ -│ 4. RESULT ATTESTATION │ -│ ────────────────── │ -│ Edge App → DecodingUs: Submit match results │ -│ │ -│ Endpoint: POST /api/v1/ibd/attestation │ -│ Payload: { │ -│ matchRequestUri: String, │ -│ attestingDid: String, │ -│ attestingSampleGuid: UUID, │ -│ matchSummary: { │ -│ totalSharedCm: Double, │ -│ numSegments: Int, │ -│ largestSegmentCm: Double, │ -│ regionType: String │ -│ }, │ -│ matchSummaryHash: String, // SHA-256 of canonical summary │ -│ signature: String, // Ed25519 signature with PDS key │ -│ partnerSummaryHash: String // Hash received from partner │ -│ } │ -│ │ -│ 5. SECURITY REQUIREMENTS │ -│ ───────────────────── │ -│ • All data encrypted in transit (TLS 1.3+) │ -│ • All data encrypted at rest on Edge App │ -│ • Variant data never stored on DecodingUs servers │ -│ • Only match summaries (cM, segments) indexed │ -│ • Keys derived from PDS signing keys (verifiable) │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### Edge App Responsibilities - -| Responsibility | Description | -|---------------|-------------| -| **Variant Storage** | Securely store user's variant calls locally | -| **Key Management** | Generate/rotate session keys for P2P exchange | -| **IBD Algorithm** | Implement IBD segment detection algorithm | -| **Encryption** | Encrypt/decrypt variant data for exchange | -| **P2P Communication** | Establish direct connection with partner Edge App | -| **Result Signing** | Sign match results with user's PDS key | -| **UI/UX** | Present match requests, manage consent workflow | - -### DecodingUs Responsibilities - -| Responsibility | Description | -|---------------|-------------| -| **Discovery Engine** | Generate match suggestions | -| **Request Routing** | Track match requests across PDS | -| **Consent Verification** | Verify mutual consent before triggering Edge Apps | -| **Attestation Indexing** | Index confirmed matches in `ibd_discovery_index` | -| **Match Querying** | Provide API for match list queries | -| **Population Caching** | Cache population data for overlap calculations | - ---- - -## API Endpoints - -### Discovery API - -``` -# Match Suggestions -GET /api/v1/discovery/suggestions - ?type={SHARED_MATCH|POPULATION_OVERLAP|HAPLOGROUP_MATCH} - &limit={int} - → Seq[RankedMatchSuggestion] - -POST /api/v1/discovery/suggestions/{id}/dismiss - → { success: Boolean } - -# Population Analysis -GET /api/v1/discovery/population/{sampleGuid} - → PopulationBreakdown - -GET /api/v1/discovery/population/overlap/{sampleGuid1}/{sampleGuid2} - → { overlapScore: Double } -``` - -### Match Request API - -``` -# Match Requests -POST /api/v1/matches/request - Body: CreateMatchRequest - → MatchRequest - -GET /api/v1/matches/requests/pending - → Seq[MatchRequest] - -GET /api/v1/matches/requests/sent - → Seq[MatchRequest] - -POST /api/v1/matches/requests/{uri}/cancel - → { success: Boolean } - -# Consent -POST /api/v1/matches/consent - Body: CreateMatchConsent - → MatchConsent - -GET /api/v1/matches/consent/status/{requestUri} - → ConsentStatus -``` - -### IBD Results API - -``` -# Confirmed Matches -GET /api/v1/matches/confirmed - ?regionType={AUTOSOMAL|Y_CHROMOSOME|MT_DNA} - &minCm={double} - → Seq[IbdMatch] - -GET /api/v1/matches/confirmed/{sampleGuid} - → Seq[IbdMatch] - -# Attestation (called by Edge App) -POST /api/v1/ibd/attestation - Body: IbdAttestation - → { indexed: Boolean, consensusStatus: String } -``` - ---- - -## Tapir Endpoint Definitions - -```scala -object MatchDiscoveryEndpoints { - - val getSuggestions: Endpoint[String, SuggestionQuery, ApiError, Seq[RankedMatchSuggestion], Any] = - endpoint.get - .securityIn(auth.bearer[String]()) - .in("api" / "v1" / "discovery" / "suggestions") - .in(query[Option[String]]("type")) - .in(query[Int]("limit").default(50)) - .out(jsonBody[Seq[RankedMatchSuggestion]]) - .errorOut(jsonBody[ApiError]) - - val getPopulationOverlap: Endpoint[String, (UUID, UUID), ApiError, OverlapResult, Any] = - endpoint.get - .securityIn(auth.bearer[String]()) - .in("api" / "v1" / "discovery" / "population" / "overlap") - .in(path[UUID]("sampleGuid1") / path[UUID]("sampleGuid2")) - .out(jsonBody[OverlapResult]) - .errorOut(jsonBody[ApiError]) -} - -object MatchRequestEndpoints { - - val createRequest: Endpoint[String, CreateMatchRequest, ApiError, MatchRequest, Any] = - endpoint.post - .securityIn(auth.bearer[String]()) - .in("api" / "v1" / "matches" / "request") - .in(jsonBody[CreateMatchRequest]) - .out(jsonBody[MatchRequest]) - .errorOut(jsonBody[ApiError]) - - val recordConsent: Endpoint[String, CreateMatchConsent, ApiError, MatchConsent, Any] = - endpoint.post - .securityIn(auth.bearer[String]()) - .in("api" / "v1" / "matches" / "consent") - .in(jsonBody[CreateMatchConsent]) - .out(jsonBody[MatchConsent]) - .errorOut(jsonBody[ApiError]) -} - -object IbdEndpoints { - - val submitAttestation: Endpoint[String, IbdAttestationRequest, ApiError, AttestationResult, Any] = - endpoint.post - .securityIn(auth.bearer[String]()) // Edge App auth - .in("api" / "v1" / "ibd" / "attestation") - .in(jsonBody[IbdAttestationRequest]) - .out(jsonBody[AttestationResult]) - .errorOut(jsonBody[ApiError]) - - val getConfirmedMatches: Endpoint[String, MatchQuery, ApiError, Seq[IbdMatch], Any] = - endpoint.get - .securityIn(auth.bearer[String]()) - .in("api" / "v1" / "matches" / "confirmed") - .in(query[Option[String]]("regionType")) - .in(query[Option[Double]]("minCm")) - .out(jsonBody[Seq[IbdMatch]]) - .errorOut(jsonBody[ApiError]) -} -``` - ---- - -## Security Considerations - -### Data Privacy - -| Data Type | Storage Location | Encryption | -|-----------|-----------------|------------| -| Variant calls | Edge App only | AES-256 at rest | -| Match requests | User PDS | AT Protocol signing | -| Consent votes | User PDS | AT Protocol signing | -| Match summaries | DecodingUs DB | Standard DB encryption | -| Population breakdowns | User PDS + cache | AT Protocol + DB encryption | - -### Authentication & Authorization - -1. **User Authentication**: OAuth2/DID-based via AT Protocol -2. **Edge App Authentication**: API keys + request signing -3. **Consent Verification**: Dual-signature requirement before data exchange -4. **Rate Limiting**: Prevent discovery enumeration attacks - -### Cryptographic Requirements - -``` -Key Exchange: ECDH (X25519) -Data Encryption: AES-256-GCM -Signatures: Ed25519 (AT Protocol standard) -Hashing: SHA-256 for match summaries -``` - ---- - -## Implementation Phases - -### Phase 1: Lexicon & Schema - -**Scope:** -- Define and publish Lexicon extensions -- Database schema migration -- Repository layer for new tables - -**Deliverables:** -- [ ] Lexicon JSON files for matchList, matchRequest, populationBreakdown, matchConsent -- [ ] Database evolution script -- [ ] `MatchSuggestionRepository` -- [ ] `PopulationBreakdownCacheRepository` -- [ ] `MatchRequestTrackingRepository` -- [ ] `MatchConsentTrackingRepository` - -**Edge App Coordination:** -- Share Lexicon definitions with Edge App team -- Agree on key exchange protocol - -### Phase 2: Discovery Engine - -**Scope:** -- Implement match suggestion algorithms -- Population overlap calculation -- Suggestion ranking - -**Deliverables:** -- [ ] `MatchDiscoveryService` implementation -- [ ] `PopulationAnalysisService` implementation -- [ ] Background job for overlap score computation -- [ ] Discovery API endpoints - -**Edge App Coordination:** -- None required (server-side only) - -### Phase 3: Request & Consent Flow - -**Scope:** -- Match request lifecycle management -- Consent voting and verification -- PDS record creation via AT Protocol - -**Deliverables:** -- [ ] `MatchRequestService` implementation -- [ ] AT Protocol client for PDS writes -- [ ] Firehose listener for consent records -- [ ] Match request API endpoints - -**Edge App Coordination:** -- Coordinate on consent UI/UX -- Define notification webhook contract - -### Phase 4: IBD Integration - -**Scope:** -- Edge App notification on mutual consent -- Attestation submission endpoint -- Match indexing and consensus tracking - -**Deliverables:** -- [ ] `IbdMatchingService` implementation -- [ ] Extend existing `IbdDiscoveryIndexRepository` -- [ ] Extend existing `IbdPdsAttestationRepository` -- [ ] IBD API endpoints -- [ ] Attestation verification logic - -**Edge App Coordination:** -- **CRITICAL**: Define and test data exchange protocol -- Implement key exchange mechanism -- Test P2P encrypted communication -- Verify attestation signing/verification - -### Phase 5: UI & Notifications - -**Scope:** -- User-facing match discovery interface -- Request/consent management UI -- Match list visualization - -**Deliverables:** -- [ ] Twirl templates for discovery pages -- [ ] Match request notification system -- [ ] Match list dashboard -- [ ] Population breakdown visualization - -**Edge App Coordination:** -- Coordinate on consistent UX across platforms -- Define deep-linking for match requests - ---- - -## Configuration - -```hocon -decodingus.matching { - discovery { - shared-match-threshold = 2 # Minimum shared matches for suggestion - population-overlap-threshold = 0.6 # Minimum overlap score (0-1) - suggestion-expiry-days = 90 - max-suggestions-per-user = 100 - } - - requests { - default-expiry-days = 30 - max-pending-requests = 50 - consent-expiry-days = 365 - } - - ibd { - attestation-timeout-hours = 24 # Time for both attestations - min-shared-cm-to-index = 7.0 # Don't index tiny matches - } - - edge-app { - notification-webhook-timeout = 30.seconds - retry-attempts = 3 - } -} -``` - ---- - -## Testing Strategy - -### Unit Tests -- Overlap score calculation -- Suggestion ranking algorithm -- Consent verification logic -- Attestation signature verification - -### Integration Tests -- Full request → consent → attestation flow -- Firehose event processing -- Edge App webhook delivery - -### End-to-End Tests -- Complete match discovery workflow -- Cross-PDS consent synchronization - -### Edge App Integration Testing -- **Joint testing required** with Edge App team -- Key exchange protocol verification -- Encrypted data round-trip -- Attestation interoperability - ---- - -## Monitoring & Metrics - -### Key Metrics - -- Suggestions generated per day -- Request conversion rate (suggestion → request) -- Consent acceptance rate -- Average time to mutual consent -- Attestation success rate -- Match indexing rate - -### Alerts - -- Attestation verification failures -- Consent timeout rate spike -- Edge App webhook failures -- Population cache staleness - ---- - -## Future Considerations - -1. **Group Matching**: Support for family/surname project group comparisons -2. **Triangulation**: Automated triangulation detection across multiple matches -3. **Chromosome Browser**: Visual segment comparison (requires Edge App coordination) -4. **Match Notes Sync**: Synchronize match notes across users -5. **Relationship Prediction ML**: Machine learning for relationship estimation - ---- - -## Appendix: Existing Schema Reference - -### ibd_discovery_index (Evolution 7) - -```sql -CREATE TABLE public.ibd_discovery_index ( - id BIGSERIAL PRIMARY KEY, - sample_guid_1 UUID NOT NULL, - sample_guid_2 UUID NOT NULL, - pangenome_graph_id INTEGER NOT NULL, - match_region_type VARCHAR(50) NOT NULL, -- AUTOSOMAL, Y_CHROMOSOME, etc. - total_shared_cm_approx DOUBLE PRECISION, - num_shared_segments_approx INTEGER, - is_publicly_discoverable BOOLEAN DEFAULT FALSE, - consensus_status VARCHAR(50) DEFAULT 'INITIATED', - last_consensus_update TIMESTAMP DEFAULT NOW(), - validation_service_guid UUID, - validation_timestamp TIMESTAMP, - indexed_by_service VARCHAR(255), - indexed_date TIMESTAMP DEFAULT NOW() -); -``` - -### ibd_pds_attestation (Evolution 7) - -```sql -CREATE TABLE public.ibd_pds_attestation ( - id BIGSERIAL PRIMARY KEY, - ibd_discovery_index_id BIGINT NOT NULL, - attesting_pds_guid UUID NOT NULL, - attesting_sample_guid UUID NOT NULL, - attestation_timestamp TIMESTAMP DEFAULT NOW(), - attestation_signature TEXT NOT NULL, - match_summary_hash VARCHAR(255) NOT NULL, - attestation_type VARCHAR(50) NOT NULL, -- INITIAL_REPORT, CONFIRMATION, etc. - attestation_notes TEXT -); -``` - -### ancestry_analysis (Evolution 1) - -```sql -CREATE TABLE ancestry_analysis ( - ancestry_analysis_id SERIAL PRIMARY KEY, - sample_guid UUID NOT NULL, - analysis_method_id INT NOT NULL, - population_id INT NOT NULL, - probability DECIMAL(5, 4) -); -``` diff --git a/documents/planning/jsonb-consolidation-analysis.md b/documents/planning/jsonb-consolidation-analysis.md deleted file mode 100644 index 1f80f299..00000000 --- a/documents/planning/jsonb-consolidation-analysis.md +++ /dev/null @@ -1,553 +0,0 @@ -# JSONB Consolidation Analysis - -## Executive Summary - -This document analyzes the current database schema to identify tables that would be better served as JSONB columns on their parent tables. The analysis considers query patterns, reporting performance, cardinality relationships, and PostgreSQL JSONB capabilities. - -**Key Finding:** 7 tables are strong candidates for JSONB consolidation, potentially eliminating 5-7 tables while improving data locality and reducing JOIN overhead for common access patterns. - ---- - -## Evaluation Criteria - -Each table was evaluated against: - -| Criterion | Favors JSONB | Favors Separate Table | -|-----------|--------------|----------------------| -| Cardinality | 1:1 or 1:few | 1:many or many:many | -| Query pattern | Always with parent | Independent queries | -| Filtering/JOINs | Never filtered independently | Used in WHERE/JOIN | -| Aggregations | Never aggregated | SUM/AVG/COUNT queries | -| Update frequency | Set once, rarely changed | Frequently updated | -| Data size | Small, bounded | Large, unbounded | -| Constraints | Simple validation | Complex CHECK/UNIQUE | - ---- - -## Reporting Performance Implications - -### JSONB Advantages -- **Reduced JOINs**: Co-located data eliminates JOIN overhead for 1:1 relationships -- **Better locality**: Related data on same page reduces I/O -- **Flexible schema**: Easy to add optional fields without migrations -- **GIN indexing**: Efficient containment and existence queries - -### JSONB Disadvantages -- **Aggregation overhead**: `->>'field'` casting slower than typed columns -- **Full column updates**: No partial JSONB updates (entire value replaced) -- **Index size**: GIN indexes larger than B-tree on typed columns -- **Query complexity**: Path expressions less readable than column names - -### Mitigation Strategies -```sql --- For frequently aggregated JSONB fields, create expression indexes: -CREATE INDEX idx_coverage_mean ON alignment_metadata - USING BTREE ((coverage->>'mean_depth')::double precision); - --- For containment queries, use jsonb_path_ops: -CREATE INDEX idx_checksums ON sequence_file - USING GIN (checksums jsonb_path_ops); -``` - ---- - -## Strong Candidates for Consolidation - -### Tier 1: Clear Wins (Low Risk, High Reward) - -#### 1. sequence_file_checksum → sequence_file.checksums - -| Aspect | Current | Proposed | -|--------|---------|----------| -| Relationship | 1:few (1-2 per file) | JSONB array | -| Access pattern | Always with parent | Same | -| Independent queries | None | N/A | -| Rows eliminated | ~50-70% reduction | - | - -**Current Schema:** -```sql -CREATE TABLE sequence_file_checksum ( - id SERIAL PRIMARY KEY, - sequence_file_id INT NOT NULL REFERENCES sequence_file(id), - checksum VARCHAR(255) NOT NULL, - algorithm VARCHAR(50) NOT NULL, - verified_at TIMESTAMP NOT NULL, - UNIQUE (sequence_file_id, algorithm) -); -``` - -**Proposed JSONB:** -```sql -ALTER TABLE sequence_file ADD COLUMN checksums JSONB DEFAULT '[]'::jsonb; - --- Structure: [{"algorithm": "MD5", "checksum": "abc123...", "verified_at": "2025-01-01T00:00:00Z"}, ...] --- Constraint moved to application layer -``` - -**Migration:** -```sql -UPDATE sequence_file sf -SET checksums = ( - SELECT COALESCE(jsonb_agg(jsonb_build_object( - 'algorithm', sfc.algorithm, - 'checksum', sfc.checksum, - 'verified_at', sfc.verified_at - )), '[]'::jsonb) - FROM sequence_file_checksum sfc - WHERE sfc.sequence_file_id = sf.id -); -``` - ---- - -#### 2. sequence_http_location → sequence_file.http_locations - -| Aspect | Current | Proposed | -|--------|---------|----------| -| Relationship | 1:few (1-3 per file) | JSONB array | -| Access pattern | Always with parent | Same | -| Independent queries | None | N/A | -| Rows eliminated | ~80% reduction | - | - -**Current Schema:** -```sql -CREATE TABLE sequence_http_location ( - id SERIAL PRIMARY KEY, - sequence_file_id INT NOT NULL REFERENCES sequence_file(id), - file_url TEXT NOT NULL, - file_index_url TEXT -); -``` - -**Proposed JSONB:** -```sql -ALTER TABLE sequence_file ADD COLUMN http_locations JSONB DEFAULT '[]'::jsonb; - --- Structure: [{"file_url": "https://...", "file_index_url": "https://..."}, ...] -``` - ---- - -#### 3. sequence_atp_location → sequence_file.atp_location - -| Aspect | Current | Proposed | -|--------|---------|----------| -| Relationship | 1:1 (one per file max) | JSONB object | -| Access pattern | Always with parent | Same | -| Independent queries | None | N/A | -| Table eliminated | Yes | - | - -**Current Schema:** -```sql -CREATE TABLE sequence_atp_location ( - id SERIAL PRIMARY KEY, - sequence_file_id INT NOT NULL REFERENCES sequence_file(id), - repo_did VARCHAR(255) NOT NULL, - record_cid VARCHAR(255) NOT NULL, - record_path TEXT NOT NULL, - index_did VARCHAR(255), - index_cid VARCHAR(255) -); -``` - -**Proposed JSONB:** -```sql -ALTER TABLE sequence_file ADD COLUMN atp_location JSONB; - --- Structure: {"repo_did": "did:plc:...", "record_cid": "...", "record_path": "...", ...} --- NULL when no ATP location -``` - ---- - -### Tier 2: Strong Candidates (Medium Effort) - -#### 4. alignment_coverage → alignment_metadata.coverage - -| Aspect | Current | Proposed | -|--------|---------|----------| -| Relationship | 1:1 (strict FK) | JSONB object | -| Access pattern | Always JOINed | Same | -| Aggregations | Yes (CoverageBenchmark) | Requires expression indexes | -| Rows eliminated | 50% | - | - -**Current Schema:** -```sql -CREATE TABLE alignment_coverage ( - id SERIAL PRIMARY KEY, - alignment_metadata_id INT NOT NULL UNIQUE REFERENCES alignment_metadata(id), - mean_depth DOUBLE PRECISION, - median_depth DOUBLE PRECISION, - percent_coverage_at_1x DOUBLE PRECISION, - percent_coverage_at_5x DOUBLE PRECISION, - percent_coverage_at_10x DOUBLE PRECISION, - percent_coverage_at_20x DOUBLE PRECISION, - percent_coverage_at_30x DOUBLE PRECISION, - bases_no_coverage BIGINT, - bases_low_quality_mapping BIGINT, - bases_callable BIGINT, - mean_mapping_quality DOUBLE PRECISION -); -``` - -**Proposed JSONB:** -```sql -ALTER TABLE alignment_metadata ADD COLUMN coverage JSONB; - --- Structure: { --- "mean_depth": 30.5, --- "median_depth": 29.0, --- "percent_coverage_at_1x": 0.99, --- ... --- } -``` - -**Required Index for Aggregations:** -```sql --- Support CoverageBenchmark queries -CREATE INDEX idx_am_coverage_mean_depth - ON alignment_metadata USING BTREE ((coverage->>'mean_depth')::double precision); -CREATE INDEX idx_am_coverage_median - ON alignment_metadata USING BTREE ((coverage->>'median_depth')::double precision); -``` - -**Impact:** Requires rewriting 4 aggregation queries in `CoverageBenchmarkRepository`: -- `getBenchmarksByLab` -- `getBenchmarksByLabAndTestType` -- `getBenchmarksByContig` -- `getOverallBenchmarks` - ---- - -#### 5. citizen_biosample_original_haplogroup → citizen_biosample.original_haplogroups - -| Aspect | Current | Proposed | -|--------|---------|----------| -| Relationship | 1:few (per publication) | JSONB array | -| Access pattern | With parent | Same | -| Constraint | UNIQUE(biosample_id, publication_id) | Application-level | -| Existing JSONB | y_haplogroup, mt_haplogroup already JSONB | Consistent pattern | - -**Current Schema:** -```sql -CREATE TABLE citizen_biosample_original_haplogroup ( - id SERIAL PRIMARY KEY, - citizen_biosample_id INT NOT NULL REFERENCES citizen_biosample(id), - publication_id INT NOT NULL REFERENCES publication(id), - y_haplogroup_result JSONB, - mt_haplogroup_result JSONB, - notes TEXT, - UNIQUE(citizen_biosample_id, publication_id) -); -``` - -**Proposed JSONB:** -```sql -ALTER TABLE citizen_biosample - ADD COLUMN original_haplogroups_by_publication JSONB DEFAULT '[]'::jsonb; - --- Structure: [ --- { --- "publication_id": 123, --- "y_haplogroup_result": {...}, --- "mt_haplogroup_result": {...}, --- "notes": "..." --- }, --- ... --- ] -``` - -**Uniqueness Enforcement:** -```scala -// Application-level validation -def addOriginalHaplogroup(biosample: CitizenBiosample, pubId: Int, data: HaplogroupData): Future[CitizenBiosample] = { - val existing = biosample.originalHaplogroupsByPublication.getOrElse(Seq.empty) - if (existing.exists(_.publicationId == pubId)) { - Future.failed(DuplicatePublicationHaplogroupError(pubId)) - } else { - val updated = existing :+ OriginalHaplogroup(pubId, data.y, data.mt, data.notes) - biosampleRepository.update(biosample.copy(originalHaplogroupsByPublication = Some(updated))) - } -} -``` - ---- - -#### 6. biosample_original_haplogroup → biosample.original_haplogroups - -Same pattern as #5, for publication (external) biosamples. - -**Current Schema:** -```sql -CREATE TABLE biosample_original_haplogroup ( - id SERIAL PRIMARY KEY, - biosample_id INT NOT NULL REFERENCES biosample(id), - publication_id INT NOT NULL REFERENCES publication(id), - y_haplogroup_result JSONB, - mt_haplogroup_result JSONB, - notes TEXT, - UNIQUE(biosample_id, publication_id) -); -``` - -**Proposed:** Same JSONB array pattern on `biosample` table. - ---- - -### Tier 3: Conditional Candidates (Trade-offs) - -#### 7. Revision Metadata Tables (haplogroup_variant_metadata, relationship_revision_metadata) - -| Aspect | Current | Consideration | -|--------|---------|---------------| -| Relationship | 1:many (revision history) | JSONB array for history | -| Access pattern | Recursive chain queries | Would need app-level recursion | -| Use case | Audit trail | Append-only log fits JSONB | - -**Recommendation:** Hybrid approach - keep current table for active revisions, add JSONB column for historical snapshot: - -```sql --- Add historical log to parent tables -ALTER TABLE haplogroup_variant ADD COLUMN revision_history JSONB DEFAULT '[]'::jsonb; -ALTER TABLE haplogroup_relationship ADD COLUMN revision_history JSONB DEFAULT '[]'::jsonb; - --- Structure: [ --- {"revision_id": 1, "author": "...", "timestamp": "...", "change_type": "CREATE", "comment": "..."}, --- {"revision_id": 2, "author": "...", "timestamp": "...", "change_type": "UPDATE", "comment": "..."} --- ] -``` - -**Trade-off:** Faster history reads, but complex recursive queries (getVariantRevisionChain) would need application logic. - ---- - -## Do NOT Consolidate - -### Many-to-Many Junction Tables -- `publication_biosample` -- `publication_citizen_biosample` -- `publication_ena_study` - -**Reason:** Junction tables with two FKs are the correct pattern for M:N relationships. JSONB arrays on both sides would require complex sync logic. - -### Core Entity Tables -- `biosample`, `citizen_biosample`, `specimen_donor` -- `haplogroup`, `variant`, `haplogroup_variant` -- `publication`, `genomic_studies` - -**Reason:** Independently queried entities with complex filtering, constraints, and relationships. - -### Graph/Tree Structures -- `pangenome_graph`, `pangenome_node`, `pangenome_edge`, `pangenome_path` -- `haplogroup_relationship` (tree structure) - -**Reason:** Graph algorithms require efficient traversal; JSONB would complicate recursive queries. - -### High-Volume Data Tables -- `reported_variant_pangenome` -- `quality_metrics` -- `ibd_discovery_index` - -**Reason:** High cardinality, independent queries, aggregation targets. - ---- - -## Implementation Plan - -### Phase 1: Sequence File Consolidation (Low Risk) [X] Completed - -**Evolution XX:** -```sql --- !Ups - --- 1. Add JSONB columns -ALTER TABLE sequence_file ADD COLUMN checksums JSONB DEFAULT '[]'::jsonb; -ALTER TABLE sequence_file ADD COLUMN http_locations JSONB DEFAULT '[]'::jsonb; -ALTER TABLE sequence_file ADD COLUMN atp_location JSONB; - --- 2. Migrate data -UPDATE sequence_file sf SET checksums = ( - SELECT COALESCE(jsonb_agg(jsonb_build_object( - 'algorithm', sfc.algorithm, - 'checksum', sfc.checksum, - 'verified_at', sfc.verified_at - ) ORDER BY sfc.algorithm), '[]'::jsonb) - FROM sequence_file_checksum sfc WHERE sfc.sequence_file_id = sf.id -); - -UPDATE sequence_file sf SET http_locations = ( - SELECT COALESCE(jsonb_agg(jsonb_build_object( - 'file_url', shl.file_url, - 'file_index_url', shl.file_index_url - )), '[]'::jsonb) - FROM sequence_http_location shl WHERE shl.sequence_file_id = sf.id -); - -UPDATE sequence_file sf SET atp_location = ( - SELECT jsonb_build_object( - 'repo_did', sal.repo_did, - 'record_cid', sal.record_cid, - 'record_path', sal.record_path, - 'index_did', sal.index_did, - 'index_cid', sal.index_cid - ) - FROM sequence_atp_location sal WHERE sal.sequence_file_id = sf.id -); - --- 3. Create indexes -CREATE INDEX idx_sf_checksums ON sequence_file USING GIN (checksums jsonb_path_ops); - --- 4. Drop old tables -DROP TABLE sequence_file_checksum; -DROP TABLE sequence_http_location; -DROP TABLE sequence_atp_location; - --- !Downs --- (reverse migration SQL) -``` - -**Code Changes:** -- Update `SequenceFile` domain model -- Remove `SequenceFileChecksum`, `SequenceHttpLocation`, `SequenceAtpLocation` models -- Update `SequenceFileRepository` to handle JSONB -- Update `BiosampleDataService` file creation logic - ---- - -### Phase 2: Alignment Coverage Consolidation (Medium Risk) - -**Evolution XX+1:** -```sql --- !Ups - -ALTER TABLE alignment_metadata ADD COLUMN coverage JSONB; - -UPDATE alignment_metadata am SET coverage = ( - SELECT jsonb_build_object( - 'mean_depth', ac.mean_depth, - 'median_depth', ac.median_depth, - 'percent_coverage_at_1x', ac.percent_coverage_at_1x, - 'percent_coverage_at_5x', ac.percent_coverage_at_5x, - 'percent_coverage_at_10x', ac.percent_coverage_at_10x, - 'percent_coverage_at_20x', ac.percent_coverage_at_20x, - 'percent_coverage_at_30x', ac.percent_coverage_at_30x, - 'bases_no_coverage', ac.bases_no_coverage, - 'bases_low_quality_mapping', ac.bases_low_quality_mapping, - 'bases_callable', ac.bases_callable, - 'mean_mapping_quality', ac.mean_mapping_quality - ) - FROM alignment_coverage ac WHERE ac.alignment_metadata_id = am.id -); - --- Indexes for aggregation queries -CREATE INDEX idx_am_cov_mean_depth ON alignment_metadata - USING BTREE (((coverage->>'mean_depth')::double precision)); -CREATE INDEX idx_am_cov_pct_30x ON alignment_metadata - USING BTREE (((coverage->>'percent_coverage_at_30x')::double precision)); - -DROP TABLE alignment_coverage; -``` - -**Code Changes:** -- Update `AlignmentMetadata` model to include `AlignmentCoverage` as embedded case class -- Rewrite `CoverageBenchmarkRepository` aggregation queries -- Update `AlignmentRepository` to handle embedded coverage - -**Query Migration Example:** -```sql --- Before (with JOIN) -SELECT sl.lab, sl.test_type, AVG(ac.mean_depth) -FROM sequence_library sl -JOIN sequence_file sf ON sf.library_id = sl.id -JOIN alignment_metadata am ON am.sequence_file_id = sf.id -JOIN alignment_coverage ac ON ac.alignment_metadata_id = am.id -GROUP BY sl.lab, sl.test_type; - --- After (JSONB) -SELECT sl.lab, sl.test_type, AVG((am.coverage->>'mean_depth')::double precision) -FROM sequence_library sl -JOIN sequence_file sf ON sf.library_id = sl.id -JOIN alignment_metadata am ON am.sequence_file_id = sf.id -WHERE am.coverage IS NOT NULL -GROUP BY sl.lab, sl.test_type; -``` - ---- - -### Phase 3: Haplogroup Tracking Consolidation (Medium Risk) - -**Evolution XX+2:** -```sql --- !Ups - -ALTER TABLE citizen_biosample - ADD COLUMN original_haplogroups_by_publication JSONB DEFAULT '[]'::jsonb; - -ALTER TABLE biosample - ADD COLUMN original_haplogroups_by_publication JSONB DEFAULT '[]'::jsonb; - --- Migrate citizen_biosample data -UPDATE citizen_biosample cb SET original_haplogroups_by_publication = ( - SELECT COALESCE(jsonb_agg(jsonb_build_object( - 'publication_id', cboh.publication_id, - 'y_haplogroup_result', cboh.y_haplogroup_result, - 'mt_haplogroup_result', cboh.mt_haplogroup_result, - 'notes', cboh.notes - )), '[]'::jsonb) - FROM citizen_biosample_original_haplogroup cboh - WHERE cboh.citizen_biosample_id = cb.id -); - --- Migrate biosample data -UPDATE biosample b SET original_haplogroups_by_publication = ( - SELECT COALESCE(jsonb_agg(jsonb_build_object( - 'publication_id', boh.publication_id, - 'y_haplogroup_result', boh.y_haplogroup_result, - 'mt_haplogroup_result', boh.mt_haplogroup_result, - 'notes', boh.notes - )), '[]'::jsonb) - FROM biosample_original_haplogroup boh - WHERE boh.biosample_id = b.id -); - --- Index for publication lookups -CREATE INDEX idx_cb_orig_hg_pub ON citizen_biosample - USING GIN (original_haplogroups_by_publication jsonb_path_ops); -CREATE INDEX idx_b_orig_hg_pub ON biosample - USING GIN (original_haplogroups_by_publication jsonb_path_ops); - -DROP TABLE citizen_biosample_original_haplogroup; -DROP TABLE biosample_original_haplogroup; -``` - ---- - -## Summary - -### Tables to Consolidate (7 total) - -| Current Table | Target Parent | Column Type | Priority | -|--------------|---------------|-------------|----------| -| sequence_file_checksum | sequence_file | JSONB array | P1 | -| sequence_http_location | sequence_file | JSONB array | P1 | -| sequence_atp_location | sequence_file | JSONB object | P1 | -| alignment_coverage | alignment_metadata | JSONB object | P2 | -| pangenome_alignment_coverage | pangenome_alignment_metadata | JSONB object | P2 | -| citizen_biosample_original_haplogroup | citizen_biosample | JSONB array | P3 | -| biosample_original_haplogroup | biosample | JSONB array | P3 | - -### Expected Outcomes - -- **Tables eliminated:** 7 -- **JOIN reduction:** 3-4 fewer JOINs per sequence file query -- **Code simplification:** Fewer repository classes, simpler data access -- **Performance:** Faster reads for 1:1/1:few relationships; requires monitoring for aggregations - -### Risks & Mitigations - -| Risk | Mitigation | -|------|------------| -| Aggregation slowdown | Expression indexes on frequently aggregated fields | -| Lost constraints | Application-level validation | -| Migration errors | Staged rollout with verification queries | -| Reporting impact | Benchmark before/after with production-like data | diff --git a/documents/planning/multi-test-type-roadmap.md b/documents/planning/multi-test-type-roadmap.md index 7fc76129..f9f6b152 100644 --- a/documents/planning/multi-test-type-roadmap.md +++ b/documents/planning/multi-test-type-roadmap.md @@ -1,5 +1,49 @@ # Multi-Test-Type Support Roadmap +> **⚖️ This doc conflates AppView and Navigator concerns (2026-06-12).** Most of it — +> per-test-type taxonomy/tracking, chip parsing, marker-coverage, file formats, the +> `genotyping_test_summary`/accuracy-tier machinery — is **Navigator's** (the Edge +> tracks data *by test*). The **AppView only cares that the calls are reliable enough +> to build the shared genealogy components** (tree, IBD, reports). That reliability +> has exactly two inputs: +> 1. **Coverage conformance** — is a run's depth in line with the norm? **DONE** (D7 +> below). +> 2. **Cross-technology consensus** — the per-biosample call reconciled across all +> its sequencing technologies (`fed.haplogroup_reconciliation`: consensus_haplogroup +> + confidence + snp_concordance + run_count). **In the AppView the *consensus* +> drives tree evolution + reporting, never the individual runs.** This is mirrored +> but **not yet wired in** — the remaining AppView piece. +> +> Cross-test-type IBD (Phase 6) is the separate D1/D3 Edge-to-Edge track. The +> haplogroup-marker-coverage / accuracy-tier machinery (Phases 4–5) is Navigator's. +> +> **⚖️ D7 coverage QA DONE (2026-06-12).** Built: +> `genomics.test_type_coverage_norm` (mig 0030) — the **empirically-derived** cohort +> norm per test type (median/quartile depth, pct tiers, typical Y/mt marker counts), +> recomputed from `fed.coverage_summary ⋈ fed.sequencerun` + `fed.genotype` by +> `du_db::coverage::recompute_norms` (`du-jobs run-once coverage-norms` + hourly); +> per-sample **conformance** on the report (actual depth vs cohort norm + advertised +> spec → BELOW/AT/ABOVE), baselined on the empirical norm because an advertised "30× +> WGS" is a ~90 Gb raw-yield spec and D2C labs don't target 30× aligned; vendor +> conformance on `coverage::benchmarks` (`meets_spec`/`depth_delta`); read API +> `GET /api/v1/test-types[/:code]`. Deferred: age-contribution wiring (typical SNP +> counts captured), raw-yield (Gbases) norm. Memory `test-type-coverage-norms`. +> The Phase-1 schema notes below remain accurate for `test_type_definition` / +> `coverage_expectation_profile`. +> +> **⚖️ Rust status (2026-06-07).** **Phase-1 schema is built**, leaner than below: +> `genomics.test_type_definition` (omits `expected_target_depth`, +> `expected_marker_count`, `version`, `release_date`, `deprecated_at`, +> `successor_test_type_id`, `documentation_url`), coverage thresholds in a separate +> `genomics.coverage_expectation_profile`, and a **native `sequence_library.test_type_id` +> FK** (no string column to migrate); `du-domain` `DataGenerationMethod` / +> `TargetType` enums exist. **Seed data is not yet loaded.** Phases 2–6 (target +> regions, `genotyping_test_summary`, marker-coverage reference, test-type-aware +> confidence, cross-test-type IBD) are **forward work = `design-roadmap-rust-rewrite.md` +> D7**. Read the Scala/Slick/Tapir/Pekko + removed `/api/private` specifics as +> illustrative — restate in Rust (axum/utoipa; the Jetstream mirror) when built. +> Triage: `design-doc-triage-report.md` §5. + ## Executive Summary This document outlines the roadmap for extending DecodingUs beyond Whole Genome Sequencing (WGS) to support: @@ -18,8 +62,8 @@ This roadmap integrates with other planning documents: | Document | Relationship | |----------|-------------| | `haplogroup-discovery-system.md` | **Primary integration point.** Y/mtDNA variants from all test types feed into the discovery system for tree building. This roadmap's chip and targeted sequencing services delegate to the discovery system's `PrivateVariantExtractionService`. | -| `ibd-matching-system.md` | IBD comparisons happen Edge-to-Edge using autosomal data. This roadmap's test type metadata helps determine comparison compatibility. | -| `jsonb-consolidation-analysis.md` | Some tables in this roadmap may be candidates for JSONB consolidation. | +| `d3-ibd-matching-impl.md` (on `d1-encrypted-edge-exchange.md`) | IBD comparisons happen Edge-to-Edge using autosomal data. This roadmap's test type metadata helps determine comparison compatibility. | +| JSONB consolidation (realized, mig 0002/0004) | The 1:1/1:few tables here were folded into JSONB on their parents in the redesign. | **Schema Note:** All haplogroup-related tables reside in the `tree` schema as defined in `haplogroup-discovery-system.md`. This includes: - `tree.haplogroup`, `tree.haplogroup_variant`, `tree.haplogroup_relationship` @@ -1261,7 +1305,7 @@ class ParserFtdna extends ChipDataParser { ... } 1. **Reference Data Download**: Edge App fetches marker coverage reference from DecodingUs to know which Y/mtDNA SNPs to extract 2. **Metadata Registration**: Edge App submits `GenotypingTestSummary` after local processing 3. **Haplogroup Variant Submission**: Edge App submits Y/mtDNA variants for tree building -4. **IBD Coordination**: Autosomal comparisons happen Edge-to-Edge per `ibd-matching-system.md` +4. **IBD Coordination**: Autosomal comparisons happen Edge-to-Edge per `d3-ibd-matching-impl.md` --- diff --git a/documents/planning/open-code-notes.md b/documents/planning/open-code-notes.md new file mode 100644 index 00000000..f05443d0 --- /dev/null +++ b/documents/planning/open-code-notes.md @@ -0,0 +1,30 @@ +# Tracked: open in-code notes (TODO / transitional) + +Created 2026-06-10. A small backlog of the deliberate forward-looking notes left in +the Rust source, surfaced by a TODO/hack sweep. Neither is a bug; both are +intentional and scoped. Tracked here so they don't get lost in code comments. + +## 1. Jobs: variant-export to a file artifact + +- **Where:** `rust/crates/du-jobs/src/main.rs` (`TODO(jobs)`) +- **What:** add a scheduled job that exports the variant catalog to a file artifact. +- **Context:** the live path already exists — `GET /api/v1/variants/export` streams CSV + on demand. This would be the batch/artifact equivalent (e.g. a periodic dump for + downstream consumers). Match-discovery is explicitly **out of scope** (IBD is not in + production — see the AppView-coordinator track in `collab-platform-d1-d5`). +- **Priority:** low. No consumer is blocked; the live endpoint covers current needs. + +## 2. Curation intake auth: X-API-Key → OAuth bearer + +- **Where:** `rust/crates/du-web/src/routes/curation.rs` (module doc) +- **What:** the Navigator → curation intake endpoint authenticates with a static + `X-API-Key` today; it should become the OAuth bearer once the Edge handshake is live. +- **Context:** gated on the encrypted Edge-exchange substrate — see + `documents/planning/d1-encrypted-edge-exchange.md`. Until that handshake exists, the + API key is the machine-auth stopgap. +- **Priority:** sequenced after D1. Functional and acceptable in the interim. + +--- + +*If/when `gh` is authenticated, these can be promoted to GitHub issues; for now they +follow the repo's `documents/planning/` issue convention.* diff --git a/documents/planning/openalex-publication-discovery.md b/documents/planning/openalex-publication-discovery.md index 67bcb8a9..14a1ad87 100644 --- a/documents/planning/openalex-publication-discovery.md +++ b/documents/planning/openalex-publication-discovery.md @@ -1,5 +1,17 @@ # OpenAlex Publication Auto-Discovery System +> **📝 Rust status (2026-06-07).** Substantially built: scheduled +> `publication-discovery` + `publication-update` jobs (tokio, **daily** — not a +> weekly cron), candidates in `pubs.publication_candidate` (dedupe by `openalex_id`), +> and the curator queue `/curator/publications` (accept→promote / reject / defer) — +> so Phase-1's "simple curator review UI" (shown `[ ]` below) is **done**. Added +> beyond the doc: the public on-ramp `/references/submit` (DOI → candidate). +> **Forward:** relevance scoring (`relevance_score` exists but isn't computed), smart +> discovery, biosample-extraction hints, and the `publication_search_run` table. +> Swap the Scala specifics (`OpenAlexService`/Pekko/`/api/private/*`/`SERIAL`/plural +> tables) for `du_external::openalex` + `du_db::publication` + +> `pubs.publication_candidate`. Triage: `design-doc-triage-report.md` §3. + ## Overview Automatically discover and surface relevant genomic publications using OpenAlex, reducing curator burden and ensuring the platform stays current with academic research. diff --git a/documents/planning/post-mvp-roadmap.md b/documents/planning/post-mvp-roadmap.md index c3918148..d21e6367 100644 --- a/documents/planning/post-mvp-roadmap.md +++ b/documents/planning/post-mvp-roadmap.md @@ -1,5 +1,16 @@ # Post-MVP Feature Roadmap +> **📝 Superseded for sequencing (2026-06-07).** The authoritative roadmap is now +> [`design-roadmap-rust-rewrite.md`](./design-roadmap-rust-rewrite.md) — the gap +> catalog, the two-track **D1–D8** plan, and the no-PII reconciliation. This doc +> **predates and omits the entire collaboration/IBD-via-D1 platform (D1–D5)** and +> still lists IBD as a standalone system (now superseded by D1 + D3). Status +> drift below: tree versioning, the curator proposal/review half, and the multi-test +> + sequencer-lab **schema** are built beyond what's checked; OpenAlex candidate +> queue and the JSONB consolidation are done. Use the design-roadmap for current +> sequencing; keep this for the per-phase detail + success metrics. Triage: +> `design-doc-triage-report.md` §11. + ## Overview This document serves as the central planning reference for features targeted after MVP completion. Each feature has a detailed planning document; this roadmap provides the high-level view, dependencies, and sequencing. @@ -11,9 +22,10 @@ This document serves as the central planning reference for features targeted aft | Haplogroup Discovery System | [`haplogroup-discovery-system.md`](./haplogroup-discovery-system.md) | Planned | | Sequencer Lab Inference | [`sequencer-lab-inference-system.md`](./sequencer-lab-inference-system.md) | Planned | | Multi-Test-Type Support | [`multi-test-type-roadmap.md`](./multi-test-type-roadmap.md) | Planned | -| IBD Matching System | [`ibd-matching-system.md`](./ibd-matching-system.md) | Planned | +| IBD Matching System | superseded → [`d1-encrypted-edge-exchange.md`](./d1-encrypted-edge-exchange.md) + [`d3-ibd-matching-impl.md`](./d3-ibd-matching-impl.md) | Planned | | OpenAlex Publication Discovery | [`openalex-publication-discovery.md`](./openalex-publication-discovery.md) | In Progress | -| JSONB Consolidation | [`jsonb-consolidation-analysis.md`](./jsonb-consolidation-analysis.md) | Technical Debt | +| Tree-endpoint cache revalidation (ETag/version) | [`y-tree-cache-revalidation.md`](./y-tree-cache-revalidation.md) | Backlog | +| JSONB Consolidation | realized in the Rust redesign (mig 0002/0004) | Done | ### Non-Technical Summary @@ -351,7 +363,7 @@ The JSONB consolidation work is distributed across feature phases to minimize di - [Haplogroup Discovery Overview](./haplogroup-discovery-system-overview.md) - Non-technical summary - [Sequencer Lab Inference](./sequencer-lab-inference-system.md) - Full technical design - [Multi-Test-Type Roadmap](./multi-test-type-roadmap.md) - Full technical design -- [IBD Matching System](./ibd-matching-system.md) - Full technical design +- IBD Matching System — superseded; see [`d1-encrypted-edge-exchange.md`](./d1-encrypted-edge-exchange.md) + [`d3-ibd-matching-impl.md`](./d3-ibd-matching-impl.md) - [OpenAlex Publication Discovery](./openalex-publication-discovery.md) - Automated literature discovery -- [JSONB Consolidation Analysis](./jsonb-consolidation-analysis.md) - Technical debt analysis +- JSONB Consolidation — realized in the Rust redesign (migrations 0002/0004) - [Atmosphere Lexicon](../Atmosphere_Lexicon.md) - AT Protocol record definitions \ No newline at end of file diff --git a/documents/planning/sequencer-lab-inference-system.md b/documents/planning/sequencer-lab-inference-system.md index 2c188d6c..7e0d33db 100644 --- a/documents/planning/sequencer-lab-inference-system.md +++ b/documents/planning/sequencer-lab-inference-system.md @@ -1,5 +1,41 @@ # Sequencer Lab Inference System +> **⚖️ Rust status (2026-06-07).** The **full schema is built — including the +> consensus tables this doc marks NEW**: `genomics.sequencing_lab`, +> `genomics.sequencer_instrument`, `genomics.instrument_observation`, and +> `genomics.instrument_association_proposal` (mig 0004). Deltas: `genomics` schema + +> `BIGINT IDENTITY`; `sequencer_instrument` has **no `lab_id` FK** (and adds +> `model_name`/`manufacturer`/`year_introduced`/`estimated_max_throughput`) — +> instrument↔lab resolves via observation→proposal→accept, not a static FK; the +> proposed instrument confidence columns live in the proposal table instead. +> **DONE (2026-06-12): lookup API, consensus engine (production-hardened), curator +> HTMX review UI, AND the `instrumentObservation` lexicon end-to-end** — citizens +> publish `com.decodingus.atmosphere.instrumentObservation` records (real confidence +> level KNOWN/INFERRED/GUESSED + `observedAt`); the Jetstream consumer mirrors them +> into `fed.instrument_observation` (mig 0027), and `recompute_consensus` folds them +> in alongside the implicit `centerName` claims with **real confidence-level + +> recency scoring** (the score's `recency`/`level` terms are no longer constants). +> Nothing material remains; future items are the "Future Considerations" list below. +> The consensus engine +> (`du_db::sequencer::recompute_consensus`) derives observations from +> `fed.sequencerun ⋈ fed.biosample.center_name` into `genomics.instrument_observation`, +> aggregates per instrument into `instrument_association_proposal` (dominant lab, +> distinct-citizen counts, confidence, threshold status, conflict held at PENDING), +> and a curator **accept** (`/manage/instrument-proposals/:id/accept`) sets +> `sequencer_instrument.lab_id` — the same column the lookup resolves. Run by +> `du-jobs run-once sequencer-consensus` (+ hourly). Accept/reject are audited to +> `ident.audit_log`. Auto-accept is opt-in (off by default — curator-gated). The +> lookup serves the **preseeded direct** instrument→lab tie: +> mig 0025 re-adds a nullable `genomics.sequencer_instrument.lab_id`, the ETL +> backfills it from the legacy `lab_id`, and `du_db::sequencer::{lookup_lab, +> lab_instruments}` resolves through it. Endpoints: `GET /api/v1/sequencer/lab? +> instrument_id=…` (→ `SequencerLabDto`, 404 if unknown) and `GET /api/v1/sequencer/ +> lab-instruments` (bulk cache seed). The proposal tables stay dormant; when +> consensus goes live, accepting a proposal will set `lab_id`. The consensus source +> will be `fed.sequencerun.instrument_id`; the `instrumentObservation` lexicon + its +> `fed.*` mirror are still undefined. The "Current State / existing API endpoints / +> domain models" below are **Scala-era**. Triage: `design-doc-triage-report.md` §6. + ## Executive Summary This document outlines enhancements to the existing sequencer lab lookup system to support: diff --git a/documents/planning/tree-versioning-system.md b/documents/planning/tree-versioning-system.md index cd1efc02..34aceae7 100644 --- a/documents/planning/tree-versioning-system.md +++ b/documents/planning/tree-versioning-system.md @@ -1,5 +1,25 @@ # Tree Versioning System: Production and WIP Trees +> **📝 Rust status (2026-06-07).** Built — the doc's recommended **Option B (overlay +> change-sets)** is what shipped: `tree.change_set` (native enum +> `tree.change_set_status`) + `tree.tree_change`; there is **no `tree.tree_version` +> table** (Option A was not taken); audit in `tree.curator_action`. **Stale below:** +> the Scala/Slick/`*.scala.html` code, the `SERIAL`/`VARCHAR CHECK`/`TIMESTAMP` +> schema (reality uses `BIGINT IDENTITY` + native enums + the existing temporal +> `valid_from`/`valid_until` model, no version-id columns), the public +> `/api/v1/tree/change-sets` + `/api/v1/curator/changes/*` endpoints (reality: +> `/curator/change-sets/*` + `/curator/reviews/*` UI and `/manage/change-sets/*` +> machine), and the granular `tree.version.*` permissions (reality: the single +> **`Curator`** role). +> +> **Substantive evolution:** ambiguity handling moved from a file-based +> `ambiguity_report_path` to the **`tree.wip_*` staging tables + the `/curator/reviews` +> resolution flow** (REPARENT / MERGE_EXISTING / DEFER), enacted by the change-set +> apply engine — see the user guide +> [`../curator-guide-tree-versioning.md`](../curator-guide-tree-versioning.md). Treat +> the API/permissions/`.scala` sections below as historical. Triage: +> `design-doc-triage-report.md` §2. + ## Executive Summary This document outlines a system for managing multiple versions of the haplogroup tree: a **Production** (canonical, public-facing) version and a **WIP** (Work-In-Progress, staging) version. This enables large-scale tree merges (ISOGG, ytree.net, academic sources) to be ingested, reviewed, and validated before affecting production reporting. diff --git a/documents/planning/variant-naming-authority.md b/documents/planning/variant-naming-authority.md index 93a22c4e..78247c08 100644 --- a/documents/planning/variant-naming-authority.md +++ b/documents/planning/variant-naming-authority.md @@ -1,5 +1,16 @@ # DecodingUs Variant Naming Authority +> **Implementation status (2026-06, Rust):** Core BUILT. Migration 0016 makes +> `core.variant.canonical_name` nullable (NULL = unnamed, by coordinates) with a +> partial unique index, and adds `core.du_variant_name_seq` + `core.next_du_name()`. +> `du_db::naming` provides the naming queue, DU minting (old name → alias), the +> lifecycle (`UNNAMED`→`PENDING_REVIEW`→`NAMED`), and a local same-coordinate +> dedup check. Curator UI at `/curator/naming`; propagation feed at +> `GET /api/v1/variants/export.gff` (GFF3). **Not yet:** a live external-source +> (YBrowse/ISOGG/YFull) dedup lookup, and surfacing unnamed variants in the public +> API/domain (the shared `du-domain::Variant.canonical_name` is still `String` — +> a cross-repo change since Navigator shares it). + **Objective:** Establish DecodingUs as a recognized naming authority for Y-DNA variants, using the `DU` prefix. ## Naming Strategy diff --git a/documents/planning/y-preprint-hallast-2026-incorporation.md b/documents/planning/y-preprint-hallast-2026-incorporation.md new file mode 100644 index 00000000..1561c44e --- /dev/null +++ b/documents/planning/y-preprint-hallast-2026-incorporation.md @@ -0,0 +1,123 @@ +# TODO: Incorporate Hallast et al. 2026 (population-scale Y assemblies) + +Created 2026-06-11. Repo: decodingus (AppView), branch `rust-rewrite-foundation`. + +**Source:** Hallast, Rhie, Loftus, et al. *"Population-scale Y chromosome assemblies +reveal recurrent remodeling within constrained architectures."* bioRxiv +2026.06.03.729890v1, posted 2026-06-06. CC-BY-NC-ND 4.0. +DOI: https://doi.org/10.64898/2026.06.03.729890 +Local PDF: `~/Downloads/2026.06.03.729890v1.full.pdf` + +142 near-T2T de novo Y assemblies, 17 major haplogroups, dated phylogeny, full +T2T-CHM13v2Y annotation set, and a three-way callable-mask comparison. It's a +**resource paper**, and the resource lines up with three threads we already have +open. This note scopes the two actionable threads + records the out-of-scope bulk. + +Related docs: +- `documents/proposals/branch-age-estimation.md` (age framework — thread 2 lands here) +- `documents/planning/y-tree-hs1-coordinate-enrichment.md` (the hs1/CHM13 native-coord issue) +- memory: `yregions-ingest`, `y-tree-coords-recurrence`, `etl-cutover-verified` + +--- + +## Thread 1 — Region-flag + callable-mask refinement (PRIORITY: high) + +Lands on the in-flight `yregions` ingest. Our ingest already pulls from the **exact +bucket this paper is built on** (`human-pangenomics/T2T/CHM13/assemblies/annotation`, +the `chm13v2.0Y_*_v1.bed` files). The paper's Methods reference **v2** versions of two +of them and add features we don't yet load. + +- **Where:** `rust/crates/du-jobs/src/yregions.rs` (`SOURCES`, `classify_*`), + `rust/crates/du-db/src/genome_region.rs`, `du_db::variant::refresh_region_overlaps`. + +### Tasks +- [x] **v2 BEDs wired** (commit `d39b314`). Both v2 files exist; bumped the two + `SOURCES` entries. v2 changes: inverted-repeats adds `IR2` (10→12 inverted_repeat + rows); amplicon coords refined. The `(region_type, name)` orphan risk is handled — + `run` now has full-snapshot sync (fetch-all-first → upsert → `prune_source_orphans`). + Live v2 reload pruned 9 orphaned v1 rows, 0 leftovers. P9/Rep1 NOT literally in v2 + (still a separate hunt, below). +- [x] **AZFc color-blocks — already loaded** (no action needed). The v2 amplicons BED + (`chm13v2.0Y_amplicons_v2.bed`, wired in d39b314) already carries the full Teitz + colorblock set: blue1-4, gray1-2, green1-3, red1-4, teal1-2, yellow1-2, plus + P1/P3/P5-AZFb/c blocks — all classified `ampliconic` (a flag type), so AZFc variants + are already low-confidence-for-placement. Paper Fig 2a confirms these ARE the AZFc + amplicon repeat blocks (b/g/r/t/y, Teitz ref 6). +- [ ] **Add palindrome P9 — BLOCKED on coords.** Confirmed: v2 inverted-repeats has P1-P8 + only, not P9. Paper main text (lines 519-523) gives only median length 15.8 kb + the + hg38 "Rep1" (12 kb) lineage; **exact CHM13v2.0 arm coordinates are in Suppl. Tables + 28-29** (not in the main PDF) — harvest from there or the T2T-chrY repo, then add as a + `palindromic` source (or wait for a v3 inverted-repeats BED that includes it). Low + urgency: one ~15.8 kb region. +- [x] **Callable-mask justification — recorded** in `branch-age-estimation.md` (SNP rate + section) and `yregions.rs` module doc. Fig 5h-i numbers: phylogeny mask ~10.4 Mb = + XDR+AMPL+OTHER (excl. XTR/SAT/HET/DYZ19/CEN); XDR retained 8.111/8.341/7.437 Mb @ QV + 50.2/55.2/60.9; AMPL kept but QV ~46; SAT/HET/DYZ19 QV 35-44 or uncallable; no mask + calls centromere. de novo: 49/53 DNMs in Yq12, ~1 in euchromatin, 6/40 Yq12 SNVs are + gene conversion. → empirically validates the X-DEG denominator + HET_MASK. +- [ ] After P9 (if added), re-run `decodingus-jobs run-once yregions` and confirm + `refresh_region_overlaps` re-flags cleanly (idempotent; full-snapshot sync since d39b314). + +### Validation note +The paper empirically confirms variants in AMPL/SAT/CEN/DYZ17/DYZ19/HET are unreliable +and recurrent — exactly the classes `classify_sequence_class` already folds to flagged +types. This *validates* the existing `region_overlaps` design; cite it rather than +re-architecting. + +--- + +## Thread 2 — Branch-age calibration (PRIORITY: medium; follows thread 1) + +Lands in the age framework (`du_db::age`, `documents/proposals/branch-age-estimation.md`). +Our model uses µ = 8.33×10⁻¹⁰ (Helgason). The paper provides an independent recent +calibration + ready-made anchor nodes. + +### Tasks +- [x] **Record the paper's clock rate as an alternative/cross-check** (commit `6544c6a`). + BEAST v1.10.4 strict molecular clock, **0.76×10⁻⁹ sub/site/yr (95% CI 0.67–0.86×10⁻⁹)** + — ~9% slower than our 0.833×10⁻⁹. Added as `du_db::age::HALLAST_RATE{,_LO,_HI}` next + to `SNP_RATE`; `recompute_combined_ages` keeps the Helgason default (no silent swap). + Both surfaced with provenance in `branch-age-estimation.md` (cross-check-clock table). +- [x] **Seed `tree.genealogical_anchor` from dated nodes** (commit `6544c6a`, partial). + `scripts/seed-hallast-anchors.sql` — name-keyed, idempotent, `anchor_type='MODEL_DATED'` + with BEAST clock + HPD provenance in `details` (round-tripped via `date_ce` + the + consumer's `uncertainty_years`). Seeded + verified live (dev du-pg): **D1** TMRCA + 19,450 ybp (HPD 16,360–22,880) → GENEALOGICAL=COMBINED=19,450, `tmrca_ybp` gap-filled. + PENDING nodes (no clean clade-name map yet, recorded in the script): HG00512⋂HG02056 + ~10,300 ybp (HPD 8,400–12,300, Suppl. Fig 61); HG00609-ref node 10,350 ybp (HPD + 8,540–12,330). Per-node TMRCAs aren't in extractable supplement text (Fig 1b / Suppl. + Fig 1 are figures) — harvest the rest from the Suppl. Tables workbook. + ⚠ Circularity caveat documented: these calibrate our SNP clock against another SNP + clock (intended, but flagged via `MODEL_DATED` so the term can be filtered). +- [x] **Note the de novo per-generation rate** (commit `6544c6a`). Recorded in + `branch-age-estimation.md` (cross-check-clock subsection): CEPH-pedigree DNMs (R1b, + Porubsky et al. 2025) are the matching per-generation empirical anchor for the clock. + +--- + +## Out of scope (record, don't build) + +The bulk of the paper is deep sequence biology a haplogroup/genealogy platform won't +model: DAZ/RBMY/TSPY multicopy copy-number evolution, 5mC methylation profiles, +centromere DYZ3 α-satellite HOR / CDR, Yq12 (DYZ1/DYZ2) structural genomics, gene +conversion / G4 motifs, AZFc structural-haplotype cataloguing. The recurrence *principle* +("repeat-mediated variants arise independently → distrust for placement") is already +captured by our `defining_haplogroup_id` recurrence model + `region_overlaps` flag; the +paper is supporting evidence, not new schema. + +--- + +## Data sources (all public) + +- HPRC Data Release 2: https://humanpangenome.org/hprc-data-release-2/ +- T2T-chrY analysis repo (annotation/scaffolding code): https://github.com/arangrhie/T2T-chrY +- Annotation BEDs (what `yregions` already reads): + `s3://human-pangenomics/T2T/CHM13/assemblies/annotation/` +- GQC suspect-region BEDs: `s3://human-pangenomics/T2T/scratch/chrY/GQC/` +- Samples: 1kGP Diversity Panel cell lines (132/144) + GIAB + CEPH1463 — public, except + two CEPH samples (NA12883/NA12884) which are dbGaP-restricted. + +## Suggested first step +Verify the two v2 BED URLs resolve, diff v2-vs-v1 parsed output, then wire the v2 +sources + AZFc color-blocks + P9 into `SOURCES`. That's the smallest contained change +and it's on code already uncommitted on this branch. diff --git a/documents/planning/y-tree-cache-revalidation.md b/documents/planning/y-tree-cache-revalidation.md new file mode 100644 index 00000000..05f809ec --- /dev/null +++ b/documents/planning/y-tree-cache-revalidation.md @@ -0,0 +1,77 @@ +# Work Item: Tree-endpoint cache revalidation (ETag / version) + +**Status:** DONE (2026-06-12) — see "Implementation" below +**Surface:** `du-web` — `GET /api/v1/y-tree/full` (and `mt-tree/full`) +**Filed:** 2026-06-11 · **Origin:** Navigator (Edge) tree-cache staleness incident + +## Problem + +The Navigator caches the full tree JSON on disk and (historically) served it +**cache-first forever**, with no way to know the AppView had a newer tree. This bit +a real placement: a Navigator cached the `/y-tree/full` payload on 2026-06-10 when +only **28.9%** of variants carried `hs1` (CHM13) coordinates; the AppView was later +enriched to **91.7%**, but the Edge kept using the stale copy and **under-placed a +low-coverage HiFi sample** (R-FGC29071 → K2b) because the deep R1b tips had no +`hs1` coordinate to genotype. Only a manual refetch fixed it. + +The Edge has since added a **7-day TTL** (refetch weekly; fall back to the stale +copy if the AppView is unreachable). That bounds staleness but is coarse: it +re-downloads the full payload (~28 MB) on every expiry even when nothing changed, +and a curated tree update inside the 7-day window isn't seen until the window rolls. + +## Ask + +Let the Edge **revalidate cheaply** instead of blindly re-downloading on a timer: + +1. Emit a stable **`ETag`** (and/or `Last-Modified`) on `GET /api/v1/y-tree/full` + and `GET /api/v1/mt-tree/full`, derived from the tree's content/version — e.g. + the active tree revision id + a hash of the serialized payload. The same input + that changes the payload must change the ETag. +2. Honor **conditional GET**: `If-None-Match` (and `If-Modified-Since`) → return + **`304 Not Modified`** with no body when unchanged. +3. (Optional, nice-to-have) expose the tree **version/revision** as a small JSON + field (e.g. `GET /api/v1/y-tree/version` → `{ "revision": …, "etag": … }`) so + clients can check version without fetching the tree at all. + +Since the tree is temporal (bitemporal `tree.haplogroup_relationship`, per-revision +metadata), the ETag should key on the **current published revision** + the build's +coordinate-enrichment state, so a `hs1`-coordinate backfill (the exact thing that +caused the incident) bumps the ETag even if the topology is unchanged. + +## Acceptance criteria + +- `GET /api/v1/y-tree/full` returns an `ETag`; a subsequent request with a matching + `If-None-Match` returns `304` with an empty body. +- Any change that alters the served payload (topology, variant set, **coordinate + enrichment**, naming) changes the `ETag`. +- mt-tree parity. + +## Implementation (2026-06-12) + +A **persisted revision marker** (`tree.tree_revision`, migration 0024 — one global +row) is the ETag source, **not** a request-time payload hash. It is bumped (+1) +once by each tree-mutating operation: `change_set::apply` (in-txn), the coordinate +/alias bulk enrichers (`variant::set_coordinates_bulk` / `set_aliases_bulk` — the +hs1 backfill that caused the incident), `ybrowse::reconcile`, and the `tree-init` +build. (`du_db::tree_revision::{current,bump}`.) + +The tree handlers now do a **cheap conditional GET**: read the revision marker, +build the ETag, and short-circuit to **304** on a matching `If-None-Match` +*before* the ~28 MB query/serialization. ETag = `"---r"` +(strong), so full-vs-plain, Y-vs-mt, and per-root payloads get distinct tokens; a +global bump revalidates both trees (safe over-invalidation, never a false 304). +Responses carry `ETag` + `Last-Modified` + `Cache-Control: no-cache`. Added +`GET /api/v1/y-tree/version` + `/mt-tree/version` → `{revision, etag, updated_at}`. + +Verified over HTTP against the dev tree: full payload 28.67 MB → a matching +`If-None-Match` returns **304 / 0 bytes**; a revision bump flips the old validator +back to **200**. Tests: `du_db::tree_revision` integration, ETag-helper unit tests, +and a `du-web` oneshot 200→304→bump→200 + version cycle. + +## Edge-side counterpart (already done, for reference) + +Navigator now: 7-day cache TTL (`NAVIGATOR_TREE_TTL_DAYS`), graceful fallback to the +stale copy on fetch failure, and a **scoring cache keyed on the tree's content hash** +(re-scores only when the tree content actually changes). With an ETag, the Edge would +switch from "re-download weekly" to "revalidate weekly via `If-None-Match`, download +only on `200`" — cutting the bandwidth and letting the TTL drop without cost. diff --git a/documents/planning/y-tree-hs1-coordinate-enrichment.md b/documents/planning/y-tree-hs1-coordinate-enrichment.md new file mode 100644 index 00000000..93b00ffb --- /dev/null +++ b/documents/planning/y-tree-hs1-coordinate-enrichment.md @@ -0,0 +1,139 @@ +# Issue: Y-tree needs complete `hs1` (CHM13) coordinates for native CHM13 placement + +Created 2026-06-10. Repo: decodingus (AppView), branch `rust-rewrite-foundation`. Companion to +the Navigator (DUNavigator) DecodingUs Y-tree provider — see that repo's +`documents/design/DecodingUsTreeProvider.md` + `memory/decodingus-tree-provider.md`. + +## RESOLUTION (2026-06-10) + +Done in-place (no tree rebuild — the dev DB was already ISOGG + decoding-us, no FTDNA): + +1. **FTDNA descoped from code** — removed the Y-graft + mt-foundation paths from + `tree_init.rs` and the `reattach` path from `du_db::snp_graft` (FTDNA-only). +2. **Root cause of the coordinate gap found + fixed:** the YBrowse mirror had been + ingested **without chain files**, so GRCh37/hs1 were empty on all 3.1M rows. + Re-ran `decodingus-jobs run-once ybrowse` with `YBROWSE_CHAIN_GRCH37` + (`hg38ToHg19`) + `YBROWSE_CHAIN_HS1` (`hg38ToHs1`), chains at + `~/Development/decodingus-data/chains/`. Reconcile enriched 2.98M variants. + **Live `/api/v1/y-tree/full` hs1 coverage: 29% → 88%** (GRCh37 28% → 88%). +3. **ISOGG name-only resolution** — the residual coordless tips were ISOGG + name-*decoration* mismatches, not missing SNPs (ybrowse has the base SNP): + `.1`/`.2` = recurrence (same site, different branch), `^^` = stability marker. + Modeled per the universal-variant design via a new + `core.variant.defining_haplogroup_id` (migration 0023) — each recurrence is a + sibling row sharing name+coordinate, scoped to its branch. Ran + `tree-init --resolve-recurrence --apply` → 2,590 recurrence-ized + 2 folded; + empty-coordless tree variants 6,256 → 3,665 (residue: 3,122 reconcile-flags for + the curator, ~580 no-base/compound, ~118 genuinely not in ybrowse). +4. **decoding-us multi-branch / back-mutation (forward/reverse) labeling** — 715 + variants link the same SNP to >1 branch. `tree-init --label-recurrence --apply` + classifies each link by topological parsimony (Dollo: even defining-ancestors = + forward, odd = reverse/back-mutation) and writes the migration-0021 per-link + `ancestral_allele`/`derived_allele`. 630 labeled (381 homoplasy, 251 + back-mutations). `scrub_recurrent_links` now **skips ASR-labeled variants**, so + genuine recurrence survives (scrub examined 715→85 after labeling). NOTE: purely + topological — no tip genotypes/character-state data exist for true ASR. + +Remaining: the 18 multi-link ISOGG-decorated variants (need one recurrence row per +branch), the reconcile-flag curator queue, and genotype-based ASR if/when sample +call data lands. + +## (original issue follows) + +## TL;DR + +The Navigator desktop app now places Y haplogroups against **our** DecodingUs tree (served by +`GET /api/v1/y-tree/full`, added in commit cd97864) instead of FTDNA. It uses each variant's +**native build coordinate** — for a CHM13 alignment, the `hs1` coordinate — so placement needs +**no liftover**. That's the intended architecture: the AppView owns multi-build coordinates; +Navigator stays liftover-free. + +**The gap:** `hs1` coordinates today cover only the **decoding-us backbone**, not the +FTDNA-grafted tips. So a CHM13 sample places correctly down the backbone but **stops at K2b +instead of reaching its terminal (R-FGC29071)**. The AppView needs to provide `hs1` coordinates +for **every** tree variant. + +## Evidence (live, GFX0457637 CHM13 HiFi BAM) + +Validated against a locally-running AppView (`/api/v1/y-tree/full`), Navigator test +`validate_gfx_decodingus_y`: + +| Path | Coords used | SNPs matched | Placement | +|------|-------------|-------------:|-----------| +| DecodingUs **native hs1** (no liftover) | `hs1` | 101 / 119 | **K2b** (backbone only) | +| FTDNA GRCh38 + liftover (reference) | GRCh38 | 1592 / 1919 | **R-FGC29071** (correct terminal) | + +Coordinate coverage across the **79,602** tree variant-links in `/api/v1/y-tree/full`: + +| Build | variant-links with this coordinate | % | +|-------|-----------------------------------:|---| +| GRCh38 | 70,294 | 88% | +| GRCh37 | 22,300 | 28% | +| **hs1 (CHM13)** | **22,988** | **29%** | + +So ~47k tree variants have a GRCh38 coordinate but **no hs1** — and those are the deeper +(FTDNA-grafted) tips. M207 (R root) *does* have hs1, but the R-subclade tips below K2b largely +don't, so descent halts. (Across all 3M `core.variant` rows, GRCh38-with-derived = ~2.99M, +hs1-with-derived = ~72.9k — `hs1` is sparse globally too.) + +## What's needed (the ask) + +Populate `hs1` (and ideally `GRCh37`) coordinates for **all** Y-tree variants, by lifting their +GRCh38 coordinate to `hs1`. Two viable shapes (the user is open to either): + +1. **Ingest / enrichment phase (persistent).** A `decodingus-tree-init` / enrichment step that, + for every `core.variant` with a GRCh38 coordinate but no `hs1`, lifts GRCh38→hs1 and writes + the `hs1` entry into `core.variant.coordinates`. The AppView already has GRCh38→hs1 liftover + infrastructure — see `rust/crates/du-jobs/src/ybrowse.rs` ("GRCh38 -> hs1 (T2T-CHM13) chain + file") and `rust/crates/du-migrate/src/bin/tree_init.rs` (`prod_build` maps `hs1`). Reuse the + same chain. **Scale/constraint:** the dev Postgres container is RAM-limited (1 GB) and was + OOM-killed by a single 3M-row `UPDATE` (migration 0021) — **batch** the enrichment (commit per + chunk) or scope it to tree-linked variants (~47k missing hs1) rather than all 3M. + +2. **On the fly (in the API).** In the `/api/v1/y-tree/full` handler, for any variant lacking an + `hs1` coordinate, lift its GRCh38 coordinate to `hs1` at response time (chain loaded at + startup). No DB mutation; pairs naturally with the existing `du-jobs` liftover. Heavier per + request, but the tree response is already cached on the Navigator side. + +Either way the goal is identical: every Y-tree variant carries a usable `hs1` coordinate (contig +`chrY`, position, ancestral, derived) so Navigator's native-CHM13 path reaches terminals. + +## Interaction with the FTDNA-merge descope decision + +The user is separately deciding to **descope the FTDNA merge and build the tree from only the +ISOGG + decoding-us prod trees**. That decision directly bears on this issue: + +- The `hs1` gap is concentrated in the **FTDNA-grafted** tips (decoding-us variants already carry + multi-build coords incl. `hs1`). If FTDNA tips are dropped, the remaining tree is + ISOGG + decoding-us — so check whether **ISOGG** coordinates include `hs1`/CHM13 (if ISOGG is + GRCh38-only, those nodes still need the GRCh38→hs1 lift). +- Net: the enrichment lift is still likely needed for ISOGG-sourced nodes, but the volume and the + "which tips exist at all" both change. **Sequence this issue after (or alongside) the + FTDNA-descope decision** so we don't lift coordinates for variants we're about to drop. + +## Verification + +Once `hs1` is complete for the Y tree: +- `GET /api/v1/y-tree/full` → most/all variants carry an `hs1` coordinate with `derived`. +- Navigator `validate_gfx_decodingus_y` (in DUNavigator, against the AppView) reaches + **R-FGC29071** via the native-hs1 path (no liftover), matching the FTDNA reference result. + +## Local dev-DB state note (for whoever picks this up) + +To get the AppView running locally on 2026-06-10, migration **0021_ancestral_state** was applied +**manually** to the dev DB (`du-pg` container, `postgres://postgres:dev@192.168.64.2:5432/ +decodingus`): its 3M-row `UPDATE` OOM-killed the 1 GB container, so the relabel was run in +**100k-id batches** (committing each), then the part-2 `ALTER` ran, then the row was recorded in +`_sqlx_migrations` (version 21, checksum `f78640156ad4…`) so `du-web` startup skips it. The DB is +now at migration 21, clean. If you `reset` the container you'll need to re-apply 0021 the same +batched way (or bump the container's memory). + +## Secondary (Navigator-side) observation — not blocking + +Navigator also has a GRCh38-coords + liftover fallback path for the DecodingUs tree. On the DU +tree it currently under-matches (the GFX sample's M207 came back as a no-call; carried SNPs +scattered → shallow placement) even though the DU GRCh38 coordinates match FTDNA's +(55,293/55,354 same position). This looks like a back-map collision when many DU variants share +or recur at the same lifted CHM13 position (the tree has recurrent-SNP/homoplasy structure — cf. +migration 0021's per-branch allele columns). It's not on the critical path (native-hs1 is the +intended route), but worth a look on the Navigator side if the GRCh38+lift fallback is kept. diff --git a/documents/proposals/Messaging_and_Feed_System.md b/documents/proposals/Messaging_and_Feed_System.md index f5406c13..4d89508f 100644 --- a/documents/proposals/Messaging_and_Feed_System.md +++ b/documents/proposals/Messaging_and_Feed_System.md @@ -1,5 +1,13 @@ # Messaging & Feed System Design +> **📝 Status (2026-06-07): forward (Bucket B).** Schema present +> (`social.{user_block, conversation, message, feed_post}`, mig 0009); **no logic**. +> Refresh Slick→Rust when the social layer is scheduled. **Reconcile with the no-PII +> direction:** DMs must NOT be central plaintext (`social.message`) — route them over +> the **D1 encrypted relay** (or AT-Proto records); the public feed (AT-Proto +> `feed.post` + AppView index) is fine. Reconcile threads with D4 assertions. +> Triage: `triage-report.md` §5. + ## 1. Overview This document outlines the design for a comprehensive messaging and social feed system for DecodingUs. The goal is to facilitate communication between: 1. **Java Edge Applications (PDS Managers):** Automated or semi-automated agents running on user hardware (The Navigator Workbench) that need to coordinate with other nodes or contact researchers. diff --git a/documents/proposals/Patronage_Donation_System.md b/documents/proposals/Patronage_Donation_System.md index fc160da7..70329b53 100644 --- a/documents/proposals/Patronage_Donation_System.md +++ b/documents/proposals/Patronage_Donation_System.md @@ -1,5 +1,11 @@ # Patronage Donation System Proposal +> **📝 Status (2026-06-07): deferred.** Not in scope for the current rewrite, but +> **deferred, not dropped** — patronage/billing will likely return to fund +> infrastructure once active users cross ~a few hundred (the `/faq` already names it +> as the sustainability path). Refresh the Scala/Play/Stripe specifics to the Rust +> stack when revived. Triage: `triage-report.md` §7. + ## 1. Overview DecodingUs is committed to operating as a free-to-use community service for genetic genealogy and population research. However, as the platform scales, the operational costs associated with hardware, hosting, and maintaining the DecodingUs Atmosphere will grow. To ensure long-term sustainability without compromising user privacy or monetizing user data, we propose implementing a Patronage Donation System. diff --git a/documents/proposals/Reputation_System_Implementation.md b/documents/proposals/Reputation_System_Implementation.md index b5da33e5..2cd17318 100644 --- a/documents/proposals/Reputation_System_Implementation.md +++ b/documents/proposals/Reputation_System_Implementation.md @@ -1,5 +1,11 @@ # Reputation System Implementation Plan +> **📝 Status (2026-06-07): forward (Bucket B).** Schema present +> (`social.{reputation_event_type, reputation_event, user_reputation_score}`, +> mig 0009); **no logic**. The public `/reputation` page already describes it. +> Refresh Slick→Rust when the social layer is scheduled (lower priority — depends on +> social being live). Triage: `triage-report.md` §6. + ## 1. Overview The Reputation System is a core mechanism to ensure quality interactions within the DecodingUs social features (Messaging, Feed, Lab Discovery). It rewards positive contributions and limits spam/abuse by gating features based on a user's `UserReputationScore`. diff --git a/documents/proposals/branch-age-estimation.md b/documents/proposals/branch-age-estimation.md index bf427843..54db073c 100644 --- a/documents/proposals/branch-age-estimation.md +++ b/documents/proposals/branch-age-estimation.md @@ -1,5 +1,16 @@ # Branch Age Estimation System +> **✅ Realized in the Rust build (status 2026-06-07).** The combined branch-age +> framework is implemented: `tree.haplogroup_ancestral_str` + the combined age +> (mig 0013/0014), `tree.genealogical_anchor`, `genomics.str_mutation_rate`, +> `genomics.biosample_callable_loci`, `du_db::age` (`combine` + +> `recompute_combined_ages`), `du_db::ystr`, the `branch-age-recompute` job, and +> `GET /api/v1/haplogroups/{name}/age`. Caveats: the Rust combine is +> **inverse-variance** (a simplification of the full PDF multiplication below) and +> genealogical-anchor wiring may be partial. **Kept as the scientific methodology +> reference** (mutation rates, multi-step STR frequencies, precision tables, the +> McDonald port) for future refinement. Triage: `triage-report.md` §3. + **Reference:** McDonald, I. (2021). "Improved Models of Coalescence Ages of Y-DNA Haplogroups." *Genes*, 12(6), 862. https://doi.org/10.3390/genes12060862 **Status:** Backlog @@ -18,7 +29,7 @@ This proposal integrates with other planning documents: |----------|-------------| | `../planning/haplogroup-discovery-system.md` | **Primary integration point.** SNP counts come from `tree.haplogroup_variant`. Private variants from `tree.biosample_private_variant` provide per-sample data for individual TMRCA calculations. Age recalculation should trigger when branches are promoted. | | `../planning/multi-test-type-roadmap.md` | **Test type coverage data.** Callable loci vary by test type (WGS ~3Gbp, BigY-700 ~15Mbp, Chip ~2000 SNPs). Uses `test_type_definition` table for platform characteristics. | -| `../planning/appview-pds-backfeed-system.md` | **PDS data flow.** STR profiles and private SNP counts flow from user PDS via firehose. Age estimates are NOT backfed (computed results, not user data). | +| Federation ingest (Jetstream → `fed.*`) | **PDS data flow.** STR profiles and private SNP counts flow from the user PDS via the Jetstream summary mirror. Age estimates are AppView-computed (not backfed). | | `group-project-system.md` | **Group TMRCA.** Group projects display TMRCA estimates in `projectTreeView`. Project-level modal haplotypes feed into STR-based age estimation. | **Schema Note:** All haplogroup-related tables reside in the `tree` schema. Branch age fields (`formed_ybp`, `tmrca_ybp`, etc.) were added to `tree.haplogroup` in evolution 48. @@ -79,10 +90,29 @@ Must account for: - **Parallel mutations** (independent lines mutate to same value) - **Multi-step mutations** (+2, -2, +3, etc.) -**Multi-step frequencies:** -- ω±1 ≈ 0.962 (single-step) +**Multi-step frequencies** (McDonald §2.5.3, from ref [8]): +- ω±1 ≈ 0.962 (single-step; adjusted to 0.96217 so Σω±n = 1) - ω±2 ≈ 0.032 (two-step) - ω±3 ≈ 0.004 (three-step) +- ω±≥4 ÷√10 per further repeat + +**Implemented** (`du_db::ystr`): `P(g|m)` is McDonald's **Table 1**, embedded verbatim +over its published range (g,m ≤ 10) and extended by the signed-step convolution of +the ω above beyond it (deep-time, low-weight terms only — the convolution is the +*exact* all-orders sum, so it differs from the f_r-truncated Table 1 by up to ~0.1 at +a few cells; the embedded table is authoritative in-range). A marker's age term is +`P(t|g) = Σ_m P(t|m)·P(g|m)` — a mixture over the hidden mutation count `m` of Poisson +age PDFs (`du_db::pdf::Pdf::mixture`), rate per generation → years via +`GENERATION_YEARS = 33`. STR ages **propagate up the tree** (`ystr::propagate_str`, +the §2.2 SNP strategy): ancestral motifs are reconstructed for internal nodes +(§2.5.2 up-pass modal-of-sub-clades + down-pass parent fill), then a node's TMRCA is +the product over children of (child TMRCA ⊛ the parent→child STR branch time) and +over direct tester tips — so internal nodes get ages from their descendants and a +parent stays older than its children. (`compute_str_age`'s per-clade star pooling is +retained as a utility but no longer drives the written ages.) Per-marker +`omega_plus`/`omega_minus`/`multi_step_rate` +(`genomics.str_mutation_rate`) build a marker-specific `P(g|m)` table when they depart +from the global symmetric single-step-dominated model. ### Confidence Intervals @@ -97,12 +127,86 @@ Must account for: ### 1. Reference Data (System-Level) -#### SNP Mutation Rate Table -| Region | Rate (SNPs/bp/yr) | 95% CI | Source | -|--------|-------------------|--------|--------| -| MSY Combined | 8.33 × 10⁻¹⁰ | 7.57–9.17 × 10⁻¹⁰ | Helgason 2015 | -| X-degenerate + Ampliconic | 8.71 × 10⁻¹⁰ | 8.03–9.43 × 10⁻¹⁰ | Helgason 2015 | -| Palindromic | 7.37 × 10⁻¹⁰ | 6.41–8.48 × 10⁻¹⁰ | Helgason 2015 | +#### SNP Mutation Rate + +**The method uses a *single* combined rate** (McDonald 2021 §2.2.1, Eq 2–3; §3: "the +combined Y-SNP mutation rate of Helgason et al. is used"): `µ_SNP = 8.33 × 10⁻¹⁰` +SNPs/bp/yr (95% CI 7.57–9.17 × 10⁻¹⁰). It is **not** applied per-region. + +The per-region figures below are **evidence that the rate is ~constant across the +MSY** (McDonald Appendix A.4, from Helgason 2015) — *not* a directive to apply +different rates to different regions. The paper's conclusion: "the mutation rate is +constant when sufficiently large regions of the MSY are considered." + +| Region | Rate (SNPs/bp/yr) | 95% CI | Notes | +|--------|-------------------|--------|-------| +| MSY combined (used) | 8.33 × 10⁻¹⁰ | 7.57–9.17 × 10⁻¹⁰ | The rate the model applies | +| X-transposed + X-degenerate + ampliconic (15.2 Mbp) | 8.71 × 10⁻¹⁰ | 8.03–9.43 × 10⁻¹⁰ | ~constant evidence | +| Palindromic (6.1 Mbp) | 7.37 × 10⁻¹⁰ | 6.41–8.48 × 10⁻¹⁰ | slightly lower (gene conversion), P=0.04 | + +**Region handling is by self-consistent *masking*, not per-region rates** (McDonald +Appendix A.2): "As highly recurrent base pairs are excised from mutation-rate +estimations, they should also be self-consistently removed from TMRCA calculations +and excised from the subset of base pairs b̄." A.3 names the regions to mask +(centromere, DYZ19; palindromic arms depending on calling). Ampliconic sequence is +**kept** (same rate as X-degenerate). The implication for `b`: drop only the +recurrent/heterochromatic regions — *not* all of ampliconic/palindromic — and ensure +the SNP count `m` is excised over the same regions (`m ⊆ b`, McDonald §2.2.3). + +**Empirical validation (Hallast et al. 2026, 142 population-scale Y assemblies).** +This masking choice is confirmed independently by the paper's three-way callable-mask +comparison (their Fig 5h-i): +- Their phylogeny ran on a **~10.4 Mb mask (10,400,778 callable positions, 25,426 + polymorphic sites) = X-degenerate + ampliconic + "other"**, *excluding* X-transposed, + satellite, heterochromatin, DYZ19, and centromere — the same split as our denominator + (`y_xdegen + y_ampliconic + y_palindromic`, with `HET_MASK` dropping heterochromatic + SNPs). +- **X-degenerate is the agreed, high-QV core** across all three masks (GRCh37 / T2T / + pangenome): retained bp 8.111 / 8.341 / 7.437 Mb, mean QV 50.2 / 55.2 / 60.9. +- **Ampliconic is kept but lower quality** (QV 45.7 / 46.2 / 61.5) — consistent with + keeping it in `b` (same mutation rate) while flagging it low-confidence-for-*placement*. +- **Satellite / heterochromatin / DYZ19 are low-QV (35–44) or uncallable; no mask calls + centromeric sequence** — validating their exclusion from the age count. +- The de novo data underline this: 49/53 (92.5%) pedigree DNMs fall in Yq12, only ~1 SNV + in euchromatin, and 6/40 Yq12 SNVs trace to gene conversion (recurrent), not de novo — + i.e. the masked compartments are exactly where mutations are unreliable/recurrent. + +**Cross-check clock (Hallast et al. 2026).** The same paper provides an *independent* +recent calibration we record but **do not** substitute for Helgason: + +| Clock | Rate (sub/site/yr) | 95% CI | Role | +|-------|--------------------|--------|------| +| Helgason 2015 (used) | 0.833 × 10⁻⁹ | 0.757–0.917 × 10⁻⁹ | the rate the model applies | +| Hallast 2026 BEAST (cross-check) | 0.76 × 10⁻⁹ | 0.67–0.86 × 10⁻⁹ | sanity bound only | + +Method: BEAST v1.10.4 strict molecular clock, RAxML GTR+Γ start tree, constant-size +coalescent, 150 M MCMC (10% burn-in), TreeAnnotator MCC tree — run on the ~10.4 Mb +X-degenerate-style mask above. It is **~9% slower** than Helgason, so adopting it would +push every TMRCA ~9% older; the two CIs overlap, so it functions as a consistency check, +not a correction. Constants `HALLAST_RATE{,_LO,_HI}` live alongside `SNP_RATE` in +`du_db::age`; the default stays Helgason (do not silently swap — surface both with +provenance). The CEPH-pedigree de-novo rate (R1b lineages, Porubsky et al. 2025) is the +matching *per-generation* empirical anchor for the same clock. + +**Calibration anchors (dated nodes).** Hallast's time-calibrated phylogeny (their Suppl. +Fig. 1, ISOGG v15.73 labels; 95% HPD from BEAST) yields ready-made `tree.genealogical_anchor` +rows — model-dated TMRCAs, *not* radiocarbon, so they carry `anchor_type = MODEL_DATED` +and full provenance in `details` (source, clock, HPD) so a curator can down-weight or +exclude them. Seeded by `scripts/seed-hallast-anchors.sql` (name-keyed, idempotent, run +after the tree load). Currently mappable to our clade names: + +| Node | TMRCA (ybp) | 95% HPD | Source | +|------|-------------|---------|--------| +| D1 | 19,450 | 16,360–22,880 | Hallast 2026 Fig 1b / Suppl. Fig 1 | +| HG00512 ⋂ HG02056 | ~10,300 | 8,400–12,300 | Hallast 2026 Suppl. Fig 61 | + +> **Circularity caveat:** these are themselves molecular-clock estimates, so feeding them +> into the inverse-variance `COMBINED` term partly calibrates our SNP clock against another +> SNP clock. That is intended (a tight external constraint on deep nodes), but it is *not* +> independent evidence the way an aDNA C14 date is — hence `MODEL_DATED` and the recorded +> provenance, so the term can be filtered. Most of the dated phylogeny lives in figures +> (Suppl. Fig. 1 / Fig 1b) and Suppl. Tables, not extractable text; harvest more nodes from +> the tables workbook when mapping them to our haplogroup names. #### STR Mutation Rate Database Per-marker mutation rates needed for ~700+ Y-STR markers: @@ -201,7 +305,7 @@ case class StrMarkerValue( ### Edge Computing Model -**Critical Architecture Principle** (from `appview-pds-backfeed-system.md`): Raw genomic data (BAM/CRAM/VCF) **never** flows to DecodingUs. All raw data analysis happens locally in the Navigator Workbench. +**Critical Architecture Principle**: Raw genomic data (BAM/CRAM/VCF) **never** flows to DecodingUs. All raw data analysis happens locally in the Navigator Workbench. ``` ┌─────────────────────────────────────────────────────────────────────────┐ @@ -495,13 +599,25 @@ Group projects compute modal STR haplotypes (`projectModal`). These can feed int **Goal:** Add Y-STR data to improve precision. **Tasks:** -1. [ ] Create `genomics.str_mutation_rate` table -2. [ ] Import mutation rates from Ballantyne/Willems studies -3. [ ] Create `tree.haplogroup_ancestral_str` table -4. [ ] Implement ancestral STR motif calculation (modal values) -5. [ ] Implement P(g|m) mapping with multi-step mutations -6. [ ] Create `StrAgeService` for STR-based age calculation -7. [ ] Integrate STR PDFs into combined calculation +1. [x] Create `genomics.str_mutation_rate` table (migration `0014_str_age`) +2. [x] Import mutation rates from Ballantyne/Willems studies — + `scripts/seed-str-mutation-rates.sql` seeds 137 markers: Willems 2016 (1000G + MUTEA, 116 markers + 95% CIs) primary, YHRD combined rates gap-filling 11 + core markers Willems' short-read set misses (DYS393, DYS390, DYS449, …). + Only DYS447 among common single-copy markers still falls back to + `DEFAULT_STR_RATE = 0.0025`. (Ballantyne is McDonald's ref [8] — its + single:multi-step 25.23:1 already sets the global ω.) +3. [x] Create `tree.haplogroup_ancestral_str` table (migrations `0013`/`0014`) +4. [x] Implement ancestral STR motif calculation (modal values) — `ystr::compute_modal` +5. [x] Implement P(g|m) mapping with multi-step mutations — `ystr` (Table 1 + convolution) +6. [x] Create `StrAgeService` for STR-based age calculation — `ystr::compute_str_age` + (multi-step PDF model; supersedes the legacy linear ΣΔ/Σµ estimator) +7. [x] Integrate STR PDFs into combined calculation — `COMBINED` (`du_db::age`) is + the direct PDF product (Eq 1) of the SNP TMRCA PDF (propagation), the STR + TMRCA PDF (`ystr::str_tmrca_pdfs`), and the genealogical anchor PDF, all on + the shared TREE grid (50 yr / 350 ky) — preserving non-Gaussian shape instead + of inverse-variance-averaging medians. Disjoint terms fall back to the + Gaussian combine; a stored STR_VARIANCE row with no fresh PDF still contributes. **Data needed:** - Y-STR profiles from PDS (ensure Atmosphere capture) diff --git a/documents/proposals/denovo-tree-ingestion.md b/documents/proposals/denovo-tree-ingestion.md new file mode 100644 index 00000000..7beb1ddf --- /dev/null +++ b/documents/proposals/denovo-tree-ingestion.md @@ -0,0 +1,228 @@ +# De-novo Y / mtDNA tree ingestion + +**Status:** proposed (2026-06-17). Supersedes the ISOGG-import tree foundation. + +## Why + +The tree foundation is moving from **importing ISOGG** (a curated external +nomenclature) to **ingesting a tree we build ourselves** from genotypes. The +de-novo pipeline at `~/Genomics/ytree` joint-calls chrY + chrM across four +cohorts on CHM13v2 (hs1), builds an IQ-TREE ML tree, and runs marginal +ancestral-state reconstruction to derive the defining SNPs of every branch — +i.e. it produces an ISOGG-shaped tree (nodes + per-branch SNPs) but grounded in +real data on a modern reference. + +This retires a whole class of problems at once: the ISOGG `~`-fold corruption, +the cross-source SNP-anchor graft, the YCC-longhand rename, the legacy GRCh37 +coordinate frame, and the absence of an mt tree. The de-novo tree **is** the +foundation; nothing is grafted onto it. The clearing front-end is the +`du_db::haplogroup::reset_tree` / `tree-init --reset` work already in hand. + +The current artifacts are a **1,742 chrY (male) + 3,344 chrM** workflow-correctness +batch. A more complete tree (adding the remaining HGDP + SGDP samples) follows; +ingestion must therefore be **re-runnable as a clean replace**, not a merge. + +## Decisions (locked) + +1. **Seam = pipeline-side JSON export + Rust loader.** A new `68_export_ingest.py` + in `~/Genomics/ytree/bin` emits one normalized JSON per chromosome; a Rust + loader consumes it. Same architecture as the ISOGG `isogg_to_json.rb → json → + tree-init` seam — it decouples the phylo pipeline from our DB schema so neither + side breaks the other. (The historical bug was the import *logic*, not this + seam.) +2. **Node naming = ISOGG / PhyloTree label primary.** Display name is the + compare-mapped label (`R-M269`, `B-Z43718`) where one exists; synthesized from + the node's strongest defining SNP otherwise. The de-novo `NodeN` id and the + ISOGG-clade mapping are retained in provenance. +3. **Initial scope = topology + defining SNPs first** (Y, then mt). Sample-leaf + placement and the curation surface are follow-ups. +4. **Coordinate frame = CHM13v2 / hs1.** Variants link to `core.variant` by hs1 + coordinate, reusing the YBrowse-loaded catalog so known SNPs inherit their + names; novel de-novo SNPs are created. +5. **Greenfield replace.** Each ingest clears `tree.*` (`reset_tree`) and loads + the de-novo tree as the sole foundation. No ISOGG/decoding-us/FTDNA layers. + +## Source artifacts (per chromosome) + +All under `~/Genomics/ytree/`. Y shown; mt mirrors it (`chrM.asr.*`, `mt_*`). + +| File | Role | +|---|---| +| `results/chrY.asr.treefile` | Newick; internal `NodeN/`, tips = sample IDs, branch lengths. **The topology.** | +| `results/chrY.asr.branch_transitions.tsv` | per branch `parent→child`: `n_mut`, `n_reversion`, `chrY:pos anc>der` list | +| `results/chrY.asr.snp_assignments.tsv` | per (branch, SNP): `chrom,pos,ref,alt,ancestral,derived,parent,child,to_alt,reversion,anc_chimp,polarity` | +| `compare/internal_node_labels.tsv` | `our_node(NodeN) → isogg, label, markers_matched, markers_expected` — the **display name** + mapping | +| `compare/tip_haplogroup_calls.tsv` | `sample → terminal_isogg, terminal_label, balance, path_derived, path_ancestral, path` — leaf metadata | +| `compare/conflict_triage.tsv` | `isogg,label,n_tips,magnitude,members_away,foreign_in,home_node` — curation conflicts vs ISOGG | +| `results/chrY.callable_mask.chm13v2.bed` | Poznik-style call mask (region reliability) | +| `manifests/samples.tsv` | `sample, cohort, cram_path, sex` — tip → biosample provenance | + +We ingest the **publication tree** (`results/chrY.asr.publication.treefile`): the +full ML tree collapsed under the builder's **keep-set rule** and QC-failed tips +pruned (HG02772). A node survives iff **(UFBoot ≥ 95 AND it carries ≥1 defining +mutation) OR it is a primary best-clade haplogroup placement** (`compare/chrY.keepset.tsv`). +The keep-set is essential: rapid Y expansions give real, named macro-clades (R, R1, +R1b — all UFBoot ≈ 80) only moderate bootstrap, so a pure UFBoot ≥ 95 rule **gutted +the named backbone** (R-CTS4466 dangled directly under IJK). The keep-set preserves +named clades even at moderate support while still collapsing anonymous weak nodes; it +also dedupes recurrent placements to one node per haplogroup (e.g. spurious DF13 +`Node494` collapses; real DF13 `Node423` is kept). The **`n_mut ≥ 1` clause** (from +`*.asr.branch_transitions.tsv`) drops zero-mutation bifurcations that UFBoot +over-supported — the mtDNA "0 defining variant" placeholder nodes (`Node82`, `Node110`, +…); their named children reattach to the parent as polytomies, so no tips are lost and +every named clade survives. Because the exporter derives survival from the publication +treefile itself, this refinement needed **no loader/exporter change** — only a +re-export + reload once the builder regenerated the treefiles. The exporter reads the surviving `NodeN` +set **directly from the publication treefile** (all artifacts share the full-tree +`NodeN` namespace, so survivors keep their ids; SNPs/labels/tips still join by +`NodeN`) rather than re-deriving the collapse. For chrY: **1,203 internal nodes** +(from 1,740) + 1,741 tips; R-CTS4466 nests at depth 21. + +**Naming** (owned import-side; the keep-set/labels provide the clade identity, we +format the display name). Per node, in order: **backbone/macro clade verbatim** if +the clade (keep-set / `isogg`) has no lowercase — `A`, `IJK`, `BT`, `CT`, `NO`, +`K2` (the comparison's `-` label mangles these: `IJK→I-M2696`); +else the `-` `label` from `internal_node_labels` for ISOGG subclade +longhands (`R1b1a…→R-L389`); else the node's own catalog-matched defining SNP +(resolves `Node423→DF13`, `Node341→P312`); else a synthetic `chrY:>` +coordinate name (≈106 genuinely novel de-novo clades with no known SNP); the root +keeps its `NodeN`. Result: 1,097/1,204 (91%) carry proper haplogroup/SNP names. + +**Collapsed-branch SNPs (the one subtle point).** Collapsing 682 weak nodes +orphans 8,919 defining SNPs whose true MRCA node no longer exists. Policy +(decided): a collapsed branch's SNPs **lift to the nearest surviving ancestor** as +a tagged *unresolved* block in that node's provenance — **not** strict defining +links (the ancestor's other children don't carry them; exact placement in the +subtree is unresolved). Surviving nodes keep only their **own** branch SNPs as +defining links. This preserves every SNP exactly once (85,955 defining + 8,919 +unresolved = 94,874) with no link bloat and no invented homoplasy. + +## The contract — normalized ingest JSON + +`68_export_ingest.py` joins the treefile + the three result TSVs + the two +compare TSVs + the manifest into one file per chromosome: + +```jsonc +{ + "chromosome": "chrY", // | "chrM" + "haplogroupType": "Y_DNA", // | "MT_DNA" + "build": "chm13v2.0", + "source": "decodingus-denovo", + "root": "Node1408", + "run": { "tips": 1742, "model": "GTR+ASC", "rooting": "polarize", + "ufboot": 1000, "date": "2026-06-17" }, + "nodes": [ + { + "id": "Node1409", // stable de-novo NodeN id + "parent": "Node1408", // null at root + "support": 100, // UFBoot → confidence_level + "branchLength": 0.007374, + "label": "B-Z43718", // mapped display name (null ⇒ synthesize) + "isogg": "B3", // mapped clade (provenance), nullable + "markersMatched": 502, "markersExpected": 521, + "nMut": 1924, "nReversion": 205, + "definingVariants": [ // this node's OWN branch → haplogroup_variant links + { "chrom":"chrY","pos":2472503,"ref":"A","alt":"T", + "ancestral":"A","derived":"T","reversion":false,"polarity":"forward" } + ], + "unresolvedVariants": [ // collapsed sub-branch SNPs → provenance block, NOT links + { "chrom":"chrY","pos":2480028,"ref":"C","alt":"T", + "ancestral":"C","derived":"T","reversion":false,"polarity":"forward" } + ] + } + ], + "tips": [ // phase 3 (leaf placement) + { "sample":"Ale22","parentNode":"Node1640","cohort":"PRJEB9586","sex":"male", + "terminalLabel":"J-Y27554","terminalIsogg":"J2a1a2b1~", + "balance":388,"pathDerived":391,"pathAncestral":3 } + ], + "conflicts": [ + { "isogg":"A1b","label":"A-P108","nTips":1733,"magnitude":1, + "homeNode":"Node1404","foreignIn":1,"membersAway":0 } + ] +} +``` + +Notes for the exporter: +- A tip's **placement** is its parent `NodeN` in the Newick (not the ISOGG + `path` — that is naming/validation metadata only). +- `definingVariants` come from `branch_transitions` cross-joined with + `snp_assignments` (the latter supplies `reversion`/`polarity`/`anc_chimp`). +- Emit the full-tree node set; carry `support` so the AppView can collapse by + UFBoot for display rather than us discarding low-support structure. + +## Rust loader + +`decodingus-tree-init --denovo-y --apply` (and `--denovo-mt`), a new +foundation path beside the existing `--isogg`: + +1. **Clear** — `reset_tree(pool)` (handles the `core.variant.defining_haplogroup_id` + FK + derived recurrence rows). +2. **Nodes** → `tree.haplogroup`: `name` = `label` (disambiguated with `NodeN`/top + SNP on collision), `haplogroup_type`, `source='decodingus-denovo'`, + `confidence_level` from UFBoot, `provenance` = `{ node_id, isogg, markers_matched, + markers_expected, support, branch_length, n_mut, n_reversion }`. +3. **Edges** → `tree.haplogroup_relationship` (parent→child, `source`). +4. **Variants** → `core.variant` get-or-create **by hs1 coordinate** + (`coordinates` = `{build:'chm13v2.0', chrom, position, ancestral, derived}`): + reuse the YBrowse/ISOGG catalog row when (chrom,pos,ref,alt) matches so known + SNPs keep their `canonical_name`; else create a de-novo-named variant. + Link via `tree.haplogroup_variant` (`ancestral_allele`/`derived_allele`); + `reversion`/`polarity` → `annotations`. +5. **Post** — `recompute_backbone`; bump `tree_revision`. (No `reconcile_tilde_twins`, + no graft, no rename — all ISOGG-specific.) + +Reuses the existing engines: `tree_revision` (cache ETag), `recompute_backbone`, +and — for phase 3 — `tree_sample` (mig 0037) for the tip leaves. + +## Phasing + +1. **Y topology + SNPs — DONE.** exporter + loader; 1,204 nodes; validated against + `compare/summary.md` anchors (A–T at best-clade F1 ≈ 1.00; R-CTS4466 spine at depth 21). +2. **mt tree — DONE.** Exporter generalized to `build(chrom)` + `CONFIGS`. mt differs: + rooted at the human MRCA `Node1767` (RSRS), the **CHIMP outgroup tip is dropped**, + there is **no `internal_node_labels`** (mt clade names `L0`/`H1a1`/`U5b2a1` are + the display form, taken verbatim from `chrM.keepset.tsv`), and the tip/conflict + TSVs use `mt_haplogroup` columns. **1,765 nodes / 3,344 tips** (after the `n_mut ≥ 1` + keep-rule refinement dropped the empty 0-mutation placeholders; was 2,015); catalog has no mt + variants so all SNPs mint. Loader uses **`clear_dna(dna)`** (dna-scoped, FKs are + NO ACTION → delete dependents first) so **Y and mt coexist**; + `tree-init --denovo-mt --apply`. Verified: `H1→H→HV→R→N→L3→…→RSRS`, + served at `/api/v1/mt-tree`. +3. **Sample leaves — DONE.** `tips[]` → get-or-create `core.biosample` **by accession** + (deduped across lineages: a male is one biosample with a Y *and* an mt placement) + → `tree.haplogroup_sample` under the known `parentNode` (direct placement, not + call-resolution). `PRJEB*` cohorts EXTERNAL/public, own genome STANDARD/private. + 3,344 biosamples; Y 1,741 + mt 3,344 placements; reuses the mig-0037 leaf machinery + (`sample_count` + `…/node/{name}/samples`). WGS229 → `R-S1128`/`U5a1b1`; R-S1128 leaf + set = {NA20278, NA20279, WGS229} (matches the SCALEUP.md anchor). +4. **Curation — DONE (conflicts).** `conflicts[]` → `tree.denovo_conflict` (mig 0039), + populated by the loader and replaced per-lineage (cleared by `clear_dna`/`reset_tree`). + Read-only Curator queue at **`/curator/denovo-conflicts`** (page + HTMX fragment, + lineage filter, worst-magnitude first) via `du_db::denovo::list_conflicts`; dashboard + card + i18n (en/es/fr). 88 Y + 37 mt conflicts. + **Call mask — deferred.** The chrY Poznik mask is **12,986 fine intervals** — a poor + fit for `core.genome_region`'s ~85-row named-region model, and de-novo variants are all + *in-mask* by construction (redundant). The right home is a dedicated callable-interval + representation / coverage-norm, designed separately. + +## Validation gates + +- Node + edge counts match the treefile; single root; fully reachable; no + multi-parent edges. +- Macro clades A–T present and monophyletic-ish (F1 ≈ 1.00 per `summary.md`). +- Spot anchors: `R-M269` clade size ≈ 289; `WGS229` terminal `R-FGC29076` on the + L21 path; mt `WGS229 → U5a1b1g`. +- Defining-SNP reuse rate against the hs1 catalog (how many known vs novel). + +## Deferred / open + +- **HGDP + SGDP scale-up** — re-export + re-ingest when the fuller tree lands + (`SCALEUP.md`); ingestion is a clean replace by design. +- **Node-name uniqueness policy** — when two de-novo nodes map to the same ISOGG + label (finer de-novo splits), the disambiguation rule (suffix `NodeN` vs top + SNP) needs a final call during phase 1. +- **Novel-SNP naming** — convention for de-novo SNPs absent from the catalog + (position-based vs node-anchored). +- **Branch lengths / ages** — `branchLength` is substitutions/site; feeding the + branch-age-estimation model is a later concern. diff --git a/documents/proposals/group-project-system.md b/documents/proposals/group-project-system.md index aaf8a342..ae935614 100644 --- a/documents/proposals/group-project-system.md +++ b/documents/proposals/group-project-system.md @@ -1,9 +1,18 @@ # Proposal: Privacy-First Group Project System +> **📝 Status (2026-06-07): forward; reconciled by D5.** This is the member-sovereign +> group-project proposal; `planning/d5-group-project-reconciliation.md` is the +> authoritative reconciliation with D1–D4 — it adopts this proposal's roles/policies/ +> succession, maps its aggregate records (`projectTreeView`/`projectModal`/ +> `strComparison`) onto D4's rails, makes governance/membership the AppView-enforced +> `research.project`/`project_member` ACL, and treats the member-sovereign visibility +> model as the post-claim state. Unbuilt (`social`/`research` placeholder schema). +> Triage: `triage-report.md` §4. + **Status:** Draft **Author:** DecodingUs Team **Created:** 2025-12-07 -**Related:** [Atmosphere Lexicon](../Atmosphere_Lexicon.md), [IBD Matching System](../planning/ibd-matching-system.md) +**Related:** [Atmosphere Lexicon](../Atmosphere_Lexicon.md), IBD: [`d1-encrypted-edge-exchange.md`](../planning/d1-encrypted-edge-exchange.md) + [`d3-ibd-matching-impl.md`](../planning/d3-ibd-matching-impl.md) ## Overview diff --git a/documents/proposals/haplogroup-tree-merge-api-proposal.md b/documents/proposals/haplogroup-tree-merge-api-proposal.md deleted file mode 100644 index d666a3f4..00000000 --- a/documents/proposals/haplogroup-tree-merge-api-proposal.md +++ /dev/null @@ -1,346 +0,0 @@ -# Haplogroup Tree Merge API Proposal - -**Status**: Draft -**Created**: 2025-12-12 -**Author**: DecodingUs Team - ---- - -## Executive Summary - -DecodingUs maintains a comprehensive haplogroup tree that serves as a foundation for genetic genealogy research. As the field matures, multiple authoritative sources—ISOGG, ytree.net, academic researchers, and citizen scientists—independently develop and refine portions of the phylogenetic tree. Currently, integrating updates from these sources requires manual curation, which is time-consuming and error-prone. - -This proposal introduces an automated Tree Merge API that enables programmatic integration of external haplogroup trees into the DecodingUs baseline. The system is source-agnostic: any researcher or institution can submit tree data through a secured API endpoint, with configurable priority rules determining how conflicts are resolved. - -A key design decision is **variant-based matching**. Because different sources use different naming conventions (ytree.net uses "R-L21", ISOGG uses "R1b1a1a2a1a1", DecodingUs uses "R1b-L21"), the merge algorithm matches nodes by their defining genetic variants rather than names. This ensures accurate alignment regardless of nomenclature differences. - -The system tracks **multi-source provenance** through a JSONB column storing which sources contributed to each node and variant. ISOGG serves as the authoritative backbone and retains primary credit on existing nodes, while incoming sources receive credit for new discoveries—splits that reveal finer structure and new terminal branches they contribute. - -The API supports both full tree replacement and subtree merging under a designated anchor node, with dry-run capability for previewing changes before application. All endpoints are protected by API key authentication, ensuring only authorized integrations can modify tree data. - -This infrastructure positions DecodingUs as a collaborative hub for phylogenetic research while preserving attribution for original discoveries and maintaining data integrity through priority-based conflict resolution. - ---- - -## Overview - -Add API-key protected endpoints for automated haplogroup tree migration from external researcher sources into the DecodingUs baseline tree, with multi-source provenance tracking via JSONB column. - -## Design Philosophy - -- **DecodingUs is the baseline** - The existing internal tree that external sources merge into -- **Source-agnostic** - Any researcher or institution can submit trees (e.g., ISOGG, ytree.net, academic researchers, citizen scientists) -- **Priority ranking retained** - Configurable source priority for conflict resolution -- **Full attribution** - Track all contributing sources per node and variant - -## Credit Assignment Rules - -Merges are applied tree-by-tree from a designated anchor node. Credit follows a tiered model: - -1. **ISOGG is primary.** Existing nodes with ISOGG credit retain it. ISOGG serves as the authoritative backbone for haplogroup nomenclature. - -2. **Incoming source gets credit for new discoveries:** - - **New splits** - When incoming data reveals finer structure (new intermediate branches), the source gets credit for those split nodes - - **New terminal branches** - When incoming data adds leaf nodes not in the existing tree, the source gets credit - -This ensures ISOGG maintains credit for the established tree structure while researchers who discover new sub-branches or terminal clades receive attribution for their contributions. - -The `primaryCredit` field in provenance tracks which source gets discovery attribution, separate from `nodeProvenance` which tracks all contributors. - -## Requirements - -- **Attribution**: JSONB column for multi-source provenance tracking -- **Input Format**: Nested JSON tree structure (PhyloNode-like) -- **Conflict Resolution**: Priority-based (caller-specified source ordering) -- **Update Modes**: Both subtree anchor and full tree replacement - ---- - -## Technical Design - -### 1. Database Schema Changes - -Add `provenance JSONB` column to `tree.haplogroup` table with GIN index. - -```sql -ALTER TABLE tree.haplogroup ADD COLUMN provenance JSONB; -CREATE INDEX idx_haplogroup_provenance ON tree.haplogroup USING GIN (provenance); -``` - -### 2. Provenance Data Model - -```scala -case class HaplogroupProvenance( - primaryCredit: String, // Source with discovery credit (applying credit rules) - nodeProvenance: Set[String], // All sources contributing to node existence - variantProvenance: Map[String, Set[String]], // Per-variant source attribution - lastMergedAt: Option[LocalDateTime], - lastMergedFrom: Option[String] -) -``` - -**Credit assignment:** ISOGG credit is preserved on existing nodes. Incoming source gets `primaryCredit` for new splits and new terminal branches they contribute. - -### 3. Provenance JSONB Structure - -```json -{ - "primaryCredit": "ytree.net", - "nodeProvenance": ["ytree.net", "DecodingUs"], - "variantProvenance": { - "M269": ["ytree.net", "DecodingUs"], - "L21": ["ytree.net"] - }, - "lastMergedAt": "2025-12-12T10:30:00", - "lastMergedFrom": "ytree.net" -} -``` - ---- - -## API Design - -### Endpoints - -| Method | Endpoint | Description | -|--------|----------|-------------| -| POST | `/api/v1/manage/haplogroups/merge` | Merge full haplogroup tree | -| POST | `/api/v1/manage/haplogroups/merge/subtree` | Merge subtree under anchor node | -| POST | `/api/v1/manage/haplogroups/merge/preview` | Preview merge without applying | - -All endpoints are secured with X-API-Key authentication. - -### Request Models - -**PhyloNodeInput** - Input tree node structure (source-agnostic) -```scala -case class PhyloNodeInput( - name: String, - variants: List[String] = List.empty, - formedYbp: Option[Int] = None, - formedYbpLower: Option[Int] = None, - formedYbpUpper: Option[Int] = None, - tmrcaYbp: Option[Int] = None, - tmrcaYbpLower: Option[Int] = None, - tmrcaYbpUpper: Option[Int] = None, - children: List[PhyloNodeInput] = List.empty -) -``` - -**SourcePriorityConfig** - Dynamic priority ordering (caller specifies) -```scala -case class SourcePriorityConfig( - sourcePriorities: List[String], // First = highest priority - defaultPriority: Int = 100 -) -``` - -**ConflictStrategy** - Conflict resolution modes -- `HigherPriorityWins` - Higher priority source wins conflicts -- `KeepExisting` - Always keep existing values -- `AlwaysUpdate` - Always use incoming values - -**SubtreeMergeRequest** -```scala -case class SubtreeMergeRequest( - haplogroupType: HaplogroupType, // Y or MT - anchorHaplogroupName: String, // e.g., "R1b" - sourceTree: PhyloNodeInput, - sourceName: String, // Any identifier - priorityConfig: Option[SourcePriorityConfig] = None, - conflictStrategy: Option[ConflictStrategy] = None, - dryRun: Boolean = false -) -``` - -### Response Models - -**TreeMergeResponse** -```scala -case class TreeMergeResponse( - success: Boolean, - message: String, - statistics: MergeStatistics, - conflicts: List[MergeConflict] = List.empty, - errors: List[String] = List.empty -) - -case class MergeStatistics( - nodesProcessed: Int, - nodesCreated: Int, - nodesUpdated: Int, - nodesUnchanged: Int, - variantsAdded: Int, - variantsUpdated: Int, - relationshipsCreated: Int, - relationshipsUpdated: Int -) - -case class MergeConflict( - haplogroupName: String, - field: String, - existingValue: String, - newValue: String, - resolution: String, - existingSource: String, - newSource: String -) -``` - -### API Usage Example - -```bash -# Merge ytree.net tree under R1b anchor - ytree.net gets primary credit (default) -curl -X POST https://api.decodingus.com/api/v1/manage/haplogroups/merge/subtree \ - -H "X-API-Key: $API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "haplogroupType": "Y", - "anchorHaplogroupName": "R1b", - "sourceName": "ytree.net", - "sourceTree": { - "name": "R1b-L21", - "variants": ["L21", "S145"], - "children": [ - { - "name": "R1b-DF13", - "variants": ["DF13"], - "children": [] - } - ] - }, - "priorityConfig": { - "sourcePriorities": ["ytree.net", "DecodingUs"] - }, - "dryRun": false - }' -``` - ---- - -## Merge Algorithm - -### Node Matching Strategy - -**Match on variants, not names.** Different sources use different naming schemes: -- ytree.net: `R-L21` -- ISOGG: `R1b1a1a2a1a1` -- DecodingUs: `R1b-L21` - -All refer to the same haplogroup defined by variant `L21`. The merge algorithm matches nodes by their defining variants. - -### Process Flow - -Starting from anchor (e.g., R1b), walk down the tree: - -1. **Index existing tree by variant sets** - Build lookup from variant → haplogroup -2. **For each incoming node**, find matching existing node by variants: - - Exact match: Same defining variants → merge/update - - Partial overlap: Shared variants → potential match, check tree position - - No match: New branch → create -3. **Assign primary credit** - ISOGG preserved on existing nodes; incoming source credited for new splits and terminal branches -4. **Merge node data** based on priority config (age estimates, metadata) -5. **Recurse into children**, maintaining parent-child relationships -6. Return statistics and conflicts - -### Example: Merging ytree.net under R1b - -``` -Anchor: R1b (matched by variant M343) - └─ ytree.net sends: R-L21 [variants: L21, S145] - └─ Matches existing: R1b-L21 [variants: L21, S145] ✓ - └─ ytree.net sends: R-DF13 [variants: DF13] - └─ Matches existing: R1b-DF13 [variants: DF13] ✓ - └─ ytree.net sends: R-ZZ123 [variants: ZZ123] - └─ No match → CREATE new branch -``` - -### Handling Branch Splits - -As phylogenetic research advances, existing branches often need to be split into finer sub-branches. The merge algorithm detects and handles these splits automatically. - -**Split Detection:** -A split is detected when incoming data introduces intermediate nodes between an existing parent-child relationship. This occurs when: -1. Incoming tree has a node with variants that are a subset of an existing node's variants -2. The incoming node positions itself between the existing node and its parent -3. Some existing children should be reassigned to the new intermediate node - -**Split Process:** - -``` -BEFORE (DecodingUs tree): -R1b-L21 [variants: L21, S145, Z290] - └─ R1b-DF13 [variants: DF13] - └─ R1b-L513 [variants: L513] - -INCOMING (ytree.net): -R-L21 [variants: L21, S145] - └─ R-Z290 [variants: Z290] ← NEW intermediate branch - └─ R-DF13 [variants: DF13] - └─ R-L513 [variants: L513] - -AFTER (merged): -R1b-L21 [variants: L21, S145] ← Z290 removed, moved to child - └─ R1b-Z290 [variants: Z290] ← NEW intermediate node created - └─ R1b-DF13 [variants: DF13] ← Reassigned under Z290 - └─ R1b-L513 [variants: L513] ← Reassigned under Z290 -``` - -**Split Algorithm:** -1. **Identify variant redistribution** - Compare incoming node's variants against existing node -2. **Create intermediate node** - If incoming shows finer structure, create new branch with subset of variants -3. **Reassign children** - Move existing children under the new intermediate based on incoming tree structure -4. **Update parent node** - Remove variants that moved to the new intermediate -5. **Record provenance** - Credit the source that provided the split information - -**Conflict Handling:** -- If split conflicts with existing structure (e.g., would orphan branches), flag for manual review -- Priority config determines whether to apply split or preserve existing structure -- Dry-run mode shows proposed splits before application - -**Helper methods:** -- `findByVariants(variants: Set[String]): Option[Haplogroup]` - Lookup existing haplogroup by defining variants -- `variantOverlap(a: Set[String], b: Set[String]): Double` - Calculate Jaccard similarity for fuzzy matching - ---- - -## Implementation Files - -| File | Action | -|------|--------| -| `conf/evolutions/default/52.sql` | CREATE - Schema migration | -| `app/models/domain/haplogroups/HaplogroupProvenance.scala` | CREATE - Provenance model | -| `app/models/domain/haplogroups/Haplogroup.scala` | MODIFY - Add provenance field | -| `app/models/dal/domain/haplogroups/HaplogroupsTable.scala` | MODIFY - Add column + projection | -| `app/models/dal/MyPostgresProfile.scala` | MODIFY - Add JSONB type mapper | -| `app/models/api/haplogroups/TreeMergeModels.scala` | CREATE - API DTOs | -| `app/repositories/HaplogroupCoreRepository.scala` | MODIFY - Add provenance methods | -| `app/services/HaplogroupTreeMergeService.scala` | CREATE - Merge service | -| `app/controllers/HaplogroupTreeMergeController.scala` | CREATE - API controller | -| `conf/routes` | MODIFY - Add 3 routes | -| `app/modules/ServicesModule.scala` | MODIFY - Add service binding | -| `app/api/TreeMergeEndpoints.scala` | CREATE (optional) - Swagger docs | - -## Implementation Order - -1. Evolution (52.sql) -2. HaplogroupProvenance.scala -3. Haplogroup.scala update -4. HaplogroupsTable.scala update -5. MyPostgresProfile.scala type mapper -6. TreeMergeModels.scala -7. HaplogroupCoreRepository.scala updates -8. HaplogroupTreeMergeService.scala -9. HaplogroupTreeMergeController.scala -10. Routes update -11. ServicesModule.scala binding -12. Tapir endpoints (optional) - ---- - -## Notes - -- `sourceName` field accepts any string identifier (institution, researcher name, project name) -- `nodeProvenance` in input is optional - defaults to `sourceName` -- The service applies credit rules automatically based on variant prefixes and ancestry -- Dry-run mode available for testing merges without applying changes diff --git a/documents/proposals/pds-workbench-biosample-flow.md b/documents/proposals/pds-workbench-biosample-flow.md deleted file mode 100644 index 9c8fab4d..00000000 --- a/documents/proposals/pds-workbench-biosample-flow.md +++ /dev/null @@ -1,1149 +0,0 @@ -# PDS Workbench Biosample Flow Design - -## Overview - -This proposal describes a redesigned biosample management flow where researchers use the **Decoding-Us Navigator** desktop application as their primary interface for managing external biosamples, with data flowing naturally through their Personal Data Store (PDS) to the DecodingUs AppView. - -### Current State - -Today, researchers submit external biosamples via dedicated REST APIs: -- `POST /api/private/external/biosamples` (traditional biosample API) -- `POST /api/external-biosamples` (citizen/firehose-aware API) - -These APIs require: -1. Manual JSON payload construction -2. Direct API authentication -3. No local preview or validation -4. No workspace organization -5. Disconnect between local analysis and remote submission - -### Proposed State - -Researchers use Navigator's workspace to: -1. Organize biosamples into projects locally -2. Import and analyze BAM/CRAM files with full GATK pipeline -3. Compose biosample metadata with publication linkage -4. Sync biosamples to their PDS (creating Atmosphere Lexicon records) -5. DecodingUs AppView automatically ingests via Firehose subscription - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ RESEARCHER WORKFLOW │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─────────────────────┐ ┌─────────────────────┐ │ -│ │ BAM/CRAM Files │────▶│ Navigator Desktop │ │ -│ │ (Local Analysis) │ │ Application │ │ -│ └─────────────────────┘ └──────────┬──────────┘ │ -│ │ │ -│ ┌──────────▼──────────┐ │ -│ │ Local Workspace │ │ -│ │ - Projects │ │ -│ │ - Biosamples │ │ -│ │ - Analysis Cache │ │ -│ └──────────┬──────────┘ │ -│ │ │ -│ ┌──────────▼──────────┐ │ -│ │ PDS Sync Engine │ │ -│ │ (AT Protocol) │ │ -│ └──────────┬──────────┘ │ -│ │ │ -└─────────────────────────────────────────┼────────────────────────────────┘ - │ - ┌──────────▼──────────┐ - │ Researcher's PDS │ - │ - workspace │ - │ - biosample(s) │ - │ - sequencerun(s) │ - │ - alignment(s) │ - │ - strProfile(s) │ - └──────────┬──────────┘ - │ - ┌──────────▼──────────┐ - │ AT Protocol │ - │ Firehose │ - └──────────┬──────────┘ - │ - ┌──────────▼──────────┐ - │ DecodingUs AppView │ - │ (Backend) │ - └──────────────────────┘ -``` - ---- - -## Record Flow Mapping - -### From Navigator Analysis to Atmosphere Lexicon Records - -| Navigator Concept | Atmosphere Record | Notes | -|:---|:---|:---| -| Workspace | `workspace` | Root container in PDS | -| Project | `project` | Aggregates biosamples for research | -| Biosample | `biosample` | Core sample with donor metadata | -| Library Analysis | `sequencerun` | From BAM/CRAM header parsing | -| WGS Metrics | `alignment` | Coverage stats, callable loci | -| Haplogroup Results | `biosample.haplogroups` | Y-DNA and mtDNA assignments | -| STR Extraction | `strProfile` | If STR calling enabled | -| Publication Link | External reference | Via `publication` field in request | - -### Analysis-to-Record Mapping - -``` -Navigator Analysis Pipeline Atmosphere Records Created -───────────────────────────── ───────────────────────────── - -┌─────────────────────────┐ -│ Import BAM/CRAM │ -│ (drag-drop or picker) │ -└───────────┬─────────────┘ - │ - ▼ -┌─────────────────────────┐ ┌─────────────────────────┐ -│ Library Statistics │────────▶│ sequencerun │ -│ - Platform detection │ │ - platformName │ -│ - Read length │ │ - instrumentModel │ -│ - Insert size │ │ - instrumentId │ -│ - @RG header parsing │ │ - testType │ -└───────────┬─────────────┘ │ - files[] │ - │ └─────────────────────────┘ - ▼ -┌─────────────────────────┐ ┌─────────────────────────┐ -│ WGS Metrics │────────▶│ alignment │ -│ - Mean coverage │ │ - referenceBuild │ -│ - Depth thresholds │ │ - aligner │ -│ - Per-contig stats │ │ - metrics.meanCoverage │ -│ - Callable loci │ │ - metrics.contigs[] │ -└───────────┬─────────────┘ └─────────────────────────┘ - │ - ▼ -┌─────────────────────────┐ ┌─────────────────────────┐ -│ Haplogroup Analysis │────────▶│ biosample.haplogroups │ -│ - Y-DNA tree matching │ │ - yDna.haplogroupName │ -│ - mtDNA tree matching │ │ - yDna.lineagePath[] │ -│ - Private SNP detection │ │ - yDna.privateVariants │ -└───────────┬─────────────┘ │ - mtDna.* │ - │ └─────────────────────────┘ - ▼ -┌─────────────────────────┐ ┌─────────────────────────┐ -│ STR Extraction │────────▶│ strProfile │ -│ (Optional, from WGS) │ │ - markers[] │ -│ - HipSTR/GangSTR │ │ - derivationMethod │ -└─────────────────────────┘ │ - source: WGS_DERIVED │ - └─────────────────────────┘ -``` - ---- - -## Data Model Extensions - -### Local Workspace State (Navigator) - -Navigator needs to track sync state for each local entity: - -```scala -case class SyncState( - atUri: Option[String], // AT URI if synced to PDS - atCid: Option[String], // Content ID for versioning - syncStatus: SyncStatus, // Pending, Synced, Modified, Conflict - lastSyncedAt: Option[Instant], - localVersion: Int, // Local modification counter - remoteVersion: Option[Int] // PDS meta.version -) - -enum SyncStatus: - case NotSynced // Never pushed to PDS - case Pending // Queued for sync - case Syncing // Currently uploading - case Synced // Up to date with PDS - case Modified // Local changes since last sync - case Conflict // Both local and remote changed - case Error // Sync failed -``` - -### Biosample Composition Model - -Navigator needs a richer model for composing biosamples before sync: - -```scala -case class ComposedBiosample( - // Core identity - localId: UUID, - sampleAccession: String, - donorIdentifier: Option[String], - - // Donor metadata - description: Option[String], - sex: Option[BiologicalSex], - location: Option[GeoCoordinate], - - // Analysis results (from Navigator pipeline) - analysisResults: Option[AnalysisResults], - - // Publication linkage - publication: Option[PublicationInfo], - - // Sync state - syncState: SyncState, - - // Project membership (local organization) - projectIds: Set[UUID] -) - -case class AnalysisResults( - libraryStats: Option[LibraryStatistics], - wgsMetrics: Option[WgsMetrics], - callableLoci: Option[CallableLociSummary], - yDnaHaplogroup: Option[HaplogroupResult], - mtDnaHaplogroup: Option[HaplogroupResult], - strProfile: Option[StrProfile], - privateSnps: Option[PrivateSnpReport] -) - -case class PublicationInfo( - doi: Option[String], - pubmedId: Option[String], - title: Option[String], - authors: Option[String], - year: Option[Int], - originalHaplogroups: Option[OriginalHaplogroupInfo] -) -``` - ---- - -## Navigator UI Modifications - -### 1. Enhanced Workspace View - -**Current**: Simple list of projects and biosamples -**Proposed**: Rich workspace with sync status indicators - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ Workspace [↻ Sync All] │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ 🔵 PDS: did:plc:researcher123 Connected ✓ │ -│ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ │ -│ │ -│ 📁 Viking Age Study (12 samples) [⬆ 3 pending] │ -│ │ │ -│ ├── 🧬 VIK-001 R-Z284 ✓ Synced │ -│ ├── 🧬 VIK-002 I-M253 ⬆ Modified (haplogroup updated) │ -│ ├── 🧬 VIK-003 R-U106 ○ Not synced │ -│ └── ... │ -│ │ -│ 📁 Iron Age Britain (8 samples) [✓ All synced] │ -│ │ │ -│ └── ... │ -│ │ -│ 📁 Unpublished Analysis (draft) [○ Local only] │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### 2. Biosample Composition Panel - -New panel for composing biosample metadata before sync: - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ Biosample: VIK-003 [Save] [Sync ⬆] │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─ Identity ─────────────────────────────────────────────────────────┐ │ -│ │ Sample Accession: [VIK-003____________] │ │ -│ │ Donor Identifier: [DONOR-VIK-003______] (optional) │ │ -│ │ Description: [Ancient DNA from Birka burial site_________] │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─ Donor Metadata ───────────────────────────────────────────────────┐ │ -│ │ Biological Sex: (•) Male ( ) Female ( ) Unknown │ │ -│ │ Location: [59.3369°N, 17.5544°E] 📍 │ │ -│ │ Date Range: [750] to [850] CE │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─ Analysis Results (from Navigator) ────────────────────────────────┐ │ -│ │ ✓ Library Stats Platform: Illumina NovaSeq │ │ -│ │ ✓ WGS Metrics Coverage: 32.5x │ │ -│ │ ✓ Y-DNA Haplogroup R-U106 (score: 0.97) │ │ -│ │ ✓ mtDNA Haplogroup H1a (score: 0.99) │ │ -│ │ ○ STR Profile [Run STR Extraction] │ │ -│ │ ✓ Private SNPs 3 novel variants detected │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─ Publication Link (optional) ──────────────────────────────────────┐ │ -│ │ DOI: [10.1038/s41586-024-00001-1] [🔍 Lookup] │ │ -│ │ PubMed: [39012345] │ │ -│ │ Title: Ancient Genomics of Viking Age Scandinavia │ │ -│ │ Authors: Smith et al. │ │ -│ │ │ │ -│ │ Original Haplogroups (from paper): │ │ -│ │ Y-DNA: [R1a1a1_______] mtDNA: [H1a__________] │ │ -│ │ Notes: [Supplementary Table S2, Sample ID: BKA-003] │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─ Sync Status ──────────────────────────────────────────────────────┐ │ -│ │ Status: ○ Not yet synced to PDS │ │ -│ │ [ Sync to PDS ] [ Preview JSON ] │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### 3. Bulk Import Wizard - -For researchers importing multiple samples from a publication: - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ Bulk Import Wizard Step 2/4 │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ Publication: 10.1038/s41586-024-00001-1 │ -│ "Ancient Genomics of Viking Age Scandinavia" │ -│ │ -│ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ │ -│ │ -│ Import CSV with sample metadata: │ -│ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ [sample_metadata.csv] [Browse...] │ │ -│ └─────────────────────────────────────────────────────────────────┘ │ -│ │ -│ Column Mapping: │ -│ ┌────────────────────┬────────────────────────────────────────────┐ │ -│ │ CSV Column │ Maps To │ │ -│ ├────────────────────┼────────────────────────────────────────────┤ │ -│ │ sample_id │ [Sample Accession ▼] │ │ -│ │ sex │ [Biological Sex ▼] │ │ -│ │ lat │ [Latitude ▼] │ │ -│ │ lon │ [Longitude ▼] │ │ -│ │ y_haplogroup │ [Original Y-DNA ▼] │ │ -│ │ mt_haplogroup │ [Original mtDNA ▼] │ │ -│ │ bam_path │ [BAM File Path ▼] │ │ -│ └────────────────────┴────────────────────────────────────────────┘ │ -│ │ -│ Preview (first 5 rows): │ -│ ┌────────┬─────┬─────────┬──────────┬────────────────────────────┐ │ -│ │ ID │ Sex │ Y-Hg │ mt-Hg │ BAM │ │ -│ ├────────┼─────┼─────────┼──────────┼────────────────────────────┤ │ -│ │ VIK-01 │ M │ R-Z284 │ H1a │ /data/viking/VIK-01.bam │ │ -│ │ VIK-02 │ M │ I-M253 │ U5b │ /data/viking/VIK-02.bam │ │ -│ │ VIK-03 │ F │ - │ H1c │ /data/viking/VIK-03.bam │ │ -│ │ VIK-04 │ M │ R-U106 │ K1a │ /data/viking/VIK-04.bam │ │ -│ │ VIK-05 │ M │ N-L550 │ H6a │ /data/viking/VIK-05.bam │ │ -│ └────────┴─────┴─────────┴──────────┴────────────────────────────┘ │ -│ │ -│ [◀ Back] [Next: Analyze ▶]│ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### 4. Sync Status Dashboard - -Global view of PDS sync state: - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ PDS Sync Dashboard │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ Connection: did:plc:researcher123 @ bsky.social ✓ Connected │ -│ │ -│ ┌─ Sync Summary ─────────────────────────────────────────────────────┐ │ -│ │ │ │ -│ │ Total Biosamples: 156 │ │ -│ │ ├── ✓ Synced: 142 (91%) │ │ -│ │ ├── ⬆ Pending: 8 (5%) │ │ -│ │ ├── ⚠ Conflicts: 2 (1%) │ │ -│ │ └── ○ Local only: 4 (3%) │ │ -│ │ │ │ -│ │ Last sync: 2025-12-07 14:30:22 UTC │ │ -│ │ │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─ Pending Changes ──────────────────────────────────────────────────┐ │ -│ │ │ │ -│ │ ☑ VIK-002 Modified: Haplogroup refined R-Z284 → R-Z284>BY3456 │ │ -│ │ ☑ VIK-015 New: Ready for initial sync │ │ -│ │ ☑ VIK-016 New: Ready for initial sync │ │ -│ │ ☐ IAB-003 Modified: Coverage updated (re-analysis) │ │ -│ │ ... │ │ -│ │ │ │ -│ │ [Select All] [Deselect All] [Sync Selected (3) ⬆] │ │ -│ │ │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─ Conflicts (require resolution) ───────────────────────────────────┐ │ -│ │ │ │ -│ │ ⚠ ANC-007 Local: mtDNA H1a Remote: mtDNA H1a1 (updated by │ │ -│ │ AppView haplogroup refinement) │ │ -│ │ [Keep Local] [Accept Remote] [View Diff] │ │ -│ │ │ │ -│ │ ⚠ ANC-012 Local: deleted Remote: still exists │ │ -│ │ [Confirm Delete] [Restore Local] │ │ -│ │ │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### 5. Publication Lookup Integration - -DOI/PubMed lookup with auto-population: - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ Publication Lookup │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ Enter DOI or PubMed ID: [10.1038/s41586-024-00001-1____] [🔍 Search] │ -│ │ -│ ┌─ Found Publication ────────────────────────────────────────────────┐ │ -│ │ │ │ -│ │ Title: Ancient Genomics of Viking Age Scandinavia │ │ -│ │ Authors: Smith J, Jones A, Brown B, et al. │ │ -│ │ Journal: Nature (2024) │ │ -│ │ DOI: 10.1038/s41586-024-00001-1 │ │ -│ │ PubMed: 39012345 │ │ -│ │ │ │ -│ │ Abstract: (truncated) │ │ -│ │ We present genome-wide data from 150 ancient individuals from │ │ -│ │ Viking Age Scandinavia, revealing complex patterns of... │ │ -│ │ │ │ -│ │ ┌─ Already in DecodingUs ──────────────────────────────────────┐ │ │ -│ │ │ ✓ This publication exists in our database │ │ │ -│ │ │ Current samples linked: 127 │ │ │ -│ │ │ [View Publication Page] │ │ │ -│ │ └──────────────────────────────────────────────────────────────┘ │ │ -│ │ │ │ -│ │ [Use This Publication] [Cancel] │ │ -│ │ │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - ---- - -## PDS Sync Protocol - -### Record Creation Flow - -When syncing a new biosample to PDS: - -``` -Navigator PDS AppView -──────── ─── ─────── - │ │ │ - │ 1. Build Atmosphere records │ │ - │ ────────────────────────▶ │ │ - │ │ │ - │ POST com.atproto.repo.createRecord │ - │ collection: com.decodingus.atmosphere.biosample │ - │ ─────────────────────────────▶ │ - │ │ │ - │ ◀─ { uri, cid } │ │ - │ │ │ - │ 2. Store atUri/atCid locally │ │ - │ │ │ - │ │ Firehose event │ - │ │ ─────────────────────────▶ │ - │ │ │ - │ │ Process biosample │ - │ │ Create DB records │ - │ │ Link to publication │ - │ │ Queue haplogroup work │ - │ │ │ -``` - -### Multi-Record Transaction - -A complete biosample with sequence data requires multiple records: - -```scala -// Pseudo-code for sync operation -def syncBiosampleToPds(biosample: ComposedBiosample): Future[SyncResult] = { - for { - // 1. Create sequence run record first (child) - sequenceRunUri <- createSequenceRunRecord(biosample.analysisResults) - - // 2. Create alignment record (grandchild) - alignmentUri <- createAlignmentRecord(biosample.analysisResults, sequenceRunUri) - - // 3. Create STR profile if available - strProfileUri <- biosample.analysisResults.strProfile match { - case Some(str) => createStrProfileRecord(str).map(Some(_)) - case None => Future.successful(None) - } - - // 4. Create biosample record with references - biosampleUri <- createBiosampleRecord( - biosample, - sequenceRunRefs = List(sequenceRunUri), - strProfileRef = strProfileUri - ) - - // 5. Update workspace record to include new biosample - _ <- updateWorkspaceRecord(biosampleUri) - - } yield SyncResult.Success(biosampleUri) -} -``` - -### Conflict Resolution Strategy - -```scala -enum ConflictResolution: - case KeepLocal // Overwrite PDS with local version - case AcceptRemote // Discard local changes, pull from PDS - case Merge // Attempt automatic merge (field-level) - case Manual // Require user intervention - -def resolveConflict( - local: ComposedBiosample, - remote: AtmosphereBiosample -): ConflictResolution = { - - // AppView-computed fields always win (haplogroup refinement) - val appViewFields = Set("haplogroups.yDna", "haplogroups.mtDna") - - // If only AppView fields changed remotely, merge - if (remote.meta.lastModifiedField.exists(appViewFields.contains)) { - ConflictResolution.Merge - } - // If local has newer analysis results, prefer local - else if (local.analysisResults.isDefined && - local.syncState.localVersion > remote.meta.version) { - ConflictResolution.KeepLocal - } - // Otherwise require manual resolution - else { - ConflictResolution.Manual - } -} -``` - ---- - -## API Integration - -### DecodingUs Backend Changes - -The existing `CitizenBiosampleController` and Firehose handler already support this flow. Minor enhancements needed: - -1. **Publication Lookup Endpoint** (new) - ``` - GET /api/publications/lookup?doi={doi}&pubmed={pubmedId} - ``` - Returns publication metadata for Navigator's lookup feature. - -2. **Batch Validation Endpoint** (new) - ``` - POST /api/external-biosamples/validate - ``` - Validates a batch of biosample records without creating them. - -3. **Sync Status Endpoint** (new) - ``` - GET /api/external-biosamples/sync-status?atUris[]={uri1}&atUris[]={uri2} - ``` - Returns current state of biosamples in AppView (for conflict detection). - -### Navigator API Client - -New module for AT Protocol and DecodingUs API integration: - -```scala -// AT Protocol client for PDS operations -trait PdsClient { - def createRecord[T](collection: String, record: T): Future[CreateRecordResponse] - def updateRecord[T](uri: String, record: T): Future[UpdateRecordResponse] - def deleteRecord(uri: String): Future[Unit] - def getRecord[T](uri: String): Future[Option[T]] - def listRecords[T](collection: String, cursor: Option[String]): Future[ListRecordsResponse[T]] -} - -// DecodingUs API client for auxiliary operations -trait DecodingUsClient { - def lookupPublication(doi: Option[String], pubmedId: Option[String]): Future[Option[Publication]] - def validateBiosamples(biosamples: Seq[BiosampleValidation]): Future[ValidationResult] - def getSyncStatus(atUris: Seq[String]): Future[Map[String, SyncStatus]] -} -``` - ---- - -## Implementation Phases - -### Phase 1: Local Composition (MVP) -- Biosample composition panel in Navigator -- Publication lookup integration -- Local-only save (no PDS sync yet) -- Export to JSON for manual API submission - -### Phase 2: PDS Sync -- AT Protocol authentication in Navigator -- Single-record sync (biosample only) -- Basic conflict detection -- Sync status indicators in UI - -### Phase 3: Full Record Graph -- Multi-record sync (sequencerun, alignment, strProfile) -- Workspace record management -- Bulk sync operations -- Background sync with retry - -### Phase 4: Bidirectional Sync -- Pull changes from PDS (AppView updates) -- Automatic conflict resolution for AppView-computed fields -- Real-time sync status updates -- Offline queue with eventual consistency - ---- - -## Security Considerations - -### Authentication Flow - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ OAuth 2.0 + DPoP Flow │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ 1. User clicks "Connect PDS" in Navigator │ -│ 2. Navigator opens browser to PDS authorization URL │ -│ 3. User authenticates with PDS (handle + password or passkey) │ -│ 4. PDS redirects back to Navigator with auth code │ -│ 5. Navigator exchanges code for access token + DPoP key │ -│ 6. Navigator stores refresh token securely (OS keychain) │ -│ 7. Navigator uses access token for API calls │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### Data Privacy - -- All genomic data stays local until explicit sync -- Only Atmosphere record metadata synced to PDS -- File locations can be local paths (not synced) or remote URLs -- User controls what gets published to their PDS - ---- - -## Benefits - -### For Researchers -1. **Unified workflow**: Analysis and submission in one tool -2. **Local preview**: Review and validate before publishing -3. **Batch operations**: Import and sync multiple samples efficiently -4. **Offline capable**: Work without internet, sync later -5. **Version control**: Track changes, resolve conflicts - -### For DecodingUs -1. **Reduced API complexity**: Firehose handles all ingestion -2. **Better data quality**: Navigator validates before sync -3. **Richer metadata**: Full analysis results included -4. **Provenance tracking**: Clear audit trail via AT Protocol - -### For the Ecosystem -1. **Data sovereignty**: Researchers own their PDS data -2. **Interoperability**: Standard AT Protocol records -3. **Decentralization**: No single point of failure -4. **Transparency**: Public record of contributions - ---- - -## Cross-Researcher Deduplication - -### The Problem - -Many researchers work with the same canonical datasets: -- **1000 Genomes Project**: ~3,200 samples widely used in population genetics -- **Human Genome Diversity Project (HGDP)**: ~900 samples -- **Simons Genome Diversity Project**: ~300 samples -- **Ancient DNA publications**: Shared samples across meta-analyses - -When multiple researchers sync these samples to their PDS, the AppView receives duplicate records for the same biological sample from different sources. - -### Deduplication Model - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ CANONICAL SAMPLE REGISTRY │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ Canonical Sample: HG00096 (1000 Genomes) │ -│ ══════════════════════════════════════════ │ -│ │ -│ ┌─ Authoritative Identity ─────────────────────────────────────────┐ │ -│ │ Canonical Accession: HG00096 │ │ -│ │ Registry: 1000GENOMES │ │ -│ │ ENA Accession: SAMEA3302682 │ │ -│ │ BioSample: SAMN00001598 │ │ -│ └──────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─ Researcher Contributions ───────────────────────────────────────┐ │ -│ │ │ │ -│ │ did:plc:alice → at://did:plc:alice/.../biosample/hg00096 │ │ -│ │ Analysis: 32x coverage, haplogroup R-L21 │ │ -│ │ Files: local analysis only │ │ -│ │ │ │ -│ │ did:plc:bob → at://did:plc:bob/.../biosample/1kg-hg00096 │ │ -│ │ Analysis: 45x coverage (deep WGS) │ │ -│ │ Files: s3://bob-lab/HG00096.cram │ │ -│ │ │ │ -│ │ did:plc:carol → at://did:plc:carol/.../biosample/hg00096 │ │ -│ │ Analysis: haplogroup R-L21>FT12345 (novel) │ │ -│ │ STR Profile: Y-111 │ │ -│ │ │ │ -│ └───────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─ Merged View (AppView Computed) ─────────────────────────────────┐ │ -│ │ Best Coverage: 45x (from did:plc:bob) │ │ -│ │ Refined Haplogroup: R-L21>FT12345 (from did:plc:carol) │ │ -│ │ STR Profile: Y-111 markers (from did:plc:carol) │ │ -│ │ Contributing Researchers: 3 │ │ -│ └──────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### Canonical Accession Resolution - -The AppView maintains a registry of known canonical sample identifiers: - -```scala -case class CanonicalSampleRegistry( - registryCode: String, // "1000GENOMES", "HGDP", "SGDP", "ENA", "NCBI" - pattern: Regex, // Pattern to match accessions - normalizeFn: String => String // Normalize variations (HG00096 vs hg00096) -) - -val knownRegistries = Seq( - CanonicalSampleRegistry( - "1000GENOMES", - """^(HG|NA)\d{5}$""".r, - _.toUpperCase - ), - CanonicalSampleRegistry( - "HGDP", - """^HGDP\d{5}$""".r, - _.toUpperCase - ), - CanonicalSampleRegistry( - "ENA", - """^SAM[END]A?\d+$""".r, - _.toUpperCase - ), - CanonicalSampleRegistry( - "NCBI_BIOSAMPLE", - """^SAMN\d+$""".r, - _.toUpperCase - ) -) - -def resolveCanonicalId(sampleAccession: String): Option[CanonicalIdentity] = { - knownRegistries.collectFirst { - case reg if reg.pattern.matches(sampleAccession) => - CanonicalIdentity( - registry = reg.registryCode, - canonicalAccession = reg.normalizeFn(sampleAccession) - ) - } -} -``` - -### Database Schema for Deduplication - -```sql --- Canonical sample identity (one per biological sample) -CREATE TABLE canonical_sample ( - id SERIAL PRIMARY KEY, - registry VARCHAR(50) NOT NULL, -- '1000GENOMES', 'HGDP', 'ENA' - canonical_accession VARCHAR(255) NOT NULL, - - -- Cross-references to other registries - ena_accession VARCHAR(50), - ncbi_biosample VARCHAR(50), - - -- Merged/computed best values - best_coverage FLOAT, - best_coverage_source_at_uri TEXT, - refined_y_haplogroup TEXT, - refined_y_haplogroup_source_at_uri TEXT, - refined_mt_haplogroup TEXT, - refined_mt_haplogroup_source_at_uri TEXT, - - -- Tracking - contributor_count INT DEFAULT 0, - created_at TIMESTAMP DEFAULT NOW(), - updated_at TIMESTAMP DEFAULT NOW(), - - UNIQUE(registry, canonical_accession) -); - --- Link between canonical samples and researcher contributions -CREATE TABLE canonical_sample_contribution ( - id SERIAL PRIMARY KEY, - canonical_sample_id INT REFERENCES canonical_sample(id), - - -- The researcher's PDS record - contributor_did TEXT NOT NULL, - biosample_at_uri TEXT NOT NULL, - biosample_at_cid TEXT, - - -- What this contribution provides - coverage FLOAT, - y_haplogroup TEXT, - mt_haplogroup TEXT, - has_str_profile BOOLEAN DEFAULT FALSE, - has_private_variants BOOLEAN DEFAULT FALSE, - - -- File availability - files_accessible BOOLEAN DEFAULT FALSE, -- Can AppView access the files? - - created_at TIMESTAMP DEFAULT NOW(), - updated_at TIMESTAMP DEFAULT NOW(), - - UNIQUE(canonical_sample_id, contributor_did) -); - --- Index for fast lookup during Firehose processing -CREATE INDEX idx_canonical_sample_accession - ON canonical_sample(registry, canonical_accession); -``` - -### Firehose Event Handling with Deduplication - -```scala -def handleBiosampleCreate(event: BiosampleCreateEvent): Future[ProcessingResult] = { - val biosample = event.record - - // 1. Check if this matches a canonical registry - val canonicalId = resolveCanonicalId(biosample.sampleAccession) - - canonicalId match { - case Some(canonical) => - // This is a known canonical sample (1KG, HGDP, etc.) - handleCanonicalSampleContribution(canonical, biosample, event.citizenDid) - - case None => - // Novel sample - check for cross-researcher duplicates by other means - handleNovelSample(biosample, event.citizenDid) - } -} - -def handleCanonicalSampleContribution( - canonical: CanonicalIdentity, - biosample: AtmosphereBiosample, - contributorDid: String -): Future[ProcessingResult] = { - - for { - // Find or create canonical sample record - canonicalSample <- canonicalSampleRepo.findOrCreate( - canonical.registry, - canonical.canonicalAccession - ) - - // Record this researcher's contribution - contribution <- contributionRepo.upsert( - CanonicalSampleContribution( - canonicalSampleId = canonicalSample.id, - contributorDid = contributorDid, - biosampleAtUri = biosample.atUri, - biosampleAtCid = biosample.meta.atCid, - coverage = biosample.extractCoverage(), - yHaplogroup = biosample.haplogroups.flatMap(_.yDna.map(_.haplogroupName)), - mtHaplogroup = biosample.haplogroups.flatMap(_.mtDna.map(_.haplogroupName)), - hasStrProfile = biosample.strProfileRef.isDefined, - hasPrivateVariants = biosample.hasPrivateVariants() - ) - ) - - // Recompute merged "best" values - _ <- recomputeCanonicalSampleMergedValues(canonicalSample.id) - - } yield ProcessingResult.CanonicalContribution( - canonicalSampleId = canonicalSample.id, - isNewContributor = contribution.isNew, - improvedFields = contribution.improvements - ) -} -``` - -### Merged Value Computation - -When multiple researchers contribute data for the same canonical sample: - -```scala -def recomputeCanonicalSampleMergedValues(canonicalSampleId: Int): Future[Unit] = { - for { - contributions <- contributionRepo.findByCanonicalSample(canonicalSampleId) - - // Best coverage = highest value - bestCoverage = contributions - .filter(_.coverage.isDefined) - .maxByOption(_.coverage.get) - - // Best haplogroup = most refined (deepest tree depth) - bestYHaplogroup = contributions - .flatMap(c => c.yHaplogroup.map(h => (c, h))) - .maxByOption { case (_, hg) => haplogroupTreeDepth(hg) } - - bestMtHaplogroup = contributions - .flatMap(c => c.mtHaplogroup.map(h => (c, h))) - .maxByOption { case (_, hg) => haplogroupTreeDepth(hg) } - - // Update canonical sample with merged values - _ <- canonicalSampleRepo.update( - canonicalSampleId, - CanonicalSampleUpdate( - bestCoverage = bestCoverage.flatMap(_.coverage), - bestCoverageSourceAtUri = bestCoverage.map(_.biosampleAtUri), - refinedYHaplogroup = bestYHaplogroup.map(_._2), - refinedYHaplogroupSourceAtUri = bestYHaplogroup.map(_._1.biosampleAtUri), - refinedMtHaplogroup = bestMtHaplogroup.map(_._2), - refinedMtHaplogroupSourceAtUri = bestMtHaplogroup.map(_._1.biosampleAtUri), - contributorCount = contributions.map(_.contributorDid).distinct.size - ) - ) - } yield () -} -``` - -### Navigator UI: Duplicate Detection - -When a researcher imports a sample, Navigator checks for existing canonical samples: - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ Import Sample: HG00096 │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─ Canonical Sample Detected ────────────────────────────────────────┐ │ -│ │ │ │ -│ │ ⚠ This sample exists in the 1000 Genomes Project registry │ │ -│ │ │ │ -│ │ Canonical ID: HG00096 │ │ -│ │ Registry: 1000 Genomes Project (Phase 3) │ │ -│ │ ENA Accession: SAMEA3302682 │ │ -│ │ Population: GBR (British) │ │ -│ │ │ │ -│ │ ┌─ Existing Contributions in DecodingUs ───────────────────────┐ │ │ -│ │ │ 3 researchers have contributed analysis for this sample: │ │ │ -│ │ │ │ │ │ -│ │ │ • Best coverage: 45x (from did:plc:bob) │ │ │ -│ │ │ • Y-DNA: R-L21 (2 contributors agree) │ │ │ -│ │ │ • mtDNA: H1a (3 contributors agree) │ │ │ -│ │ │ • STR Profile: Available (Y-111) │ │ │ -│ │ └───────────────────────────────────────────────────────────────┘ │ │ -│ │ │ │ -│ │ Your contribution will be added to the merged record. │ │ -│ │ │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ What would you like to do? │ -│ │ -│ (•) Add my analysis as a new contribution │ -│ Your haplogroup calls and coverage will be compared with │ -│ existing data. Novel findings (deeper haplogroups, private │ -│ SNPs) will be highlighted. │ -│ │ -│ ( ) Skip this sample (already well-characterized) │ -│ │ -│ ( ) Import anyway as a separate local sample │ -│ (Will not sync to PDS) │ -│ │ -│ [Continue Import] [Cancel] │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### Contribution Value Indicator - -Show researchers what value their contribution adds: - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ Contribution Analysis: HG00096 │ -├─────────────────────────────────────────────────────────────────────────┤ -│ │ -│ Your Analysis Results: │ -│ ┌─────────────────────┬──────────────────┬────────────────────────┐ │ -│ │ Field │ Your Value │ Current Best │ │ -│ ├─────────────────────┼──────────────────┼────────────────────────┤ │ -│ │ Coverage │ 32x │ 45x (did:plc:bob) │ │ -│ │ Y-DNA Haplogroup │ R-L21>FT54321 🆕 │ R-L21 │ │ -│ │ mtDNA Haplogroup │ H1a │ H1a (same) │ │ -│ │ STR Profile │ Y-67 │ Y-111 (more markers) │ │ -│ │ Private Variants │ 2 novel SNPs 🆕 │ None detected │ │ -│ └─────────────────────┴──────────────────┴────────────────────────┘ │ -│ │ -│ ┌─ Contribution Value ───────────────────────────────────────────────┐ │ -│ │ │ │ -│ │ ✓ Your Y-DNA haplogroup is MORE REFINED than current best │ │ -│ │ R-L21 → R-L21>FT54321 (new terminal SNP!) │ │ -│ │ │ │ -│ │ ✓ You discovered 2 NOVEL PRIVATE VARIANTS │ │ -│ │ These will be submitted to the Haplogroup Discovery System │ │ -│ │ │ │ -│ │ ○ Your coverage (32x) does not improve on current best (45x) │ │ -│ │ │ │ -│ │ ○ Your STR profile (Y-67) has fewer markers than current (Y-111) │ │ -│ │ │ │ -│ └────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ [Sync Contribution] [Cancel] │ -│ │ -└─────────────────────────────────────────────────────────────────────────┘ -``` - -### API Endpoints for Deduplication - -``` -# Check if a sample accession is canonical -GET /api/canonical-samples/lookup?accession={accession} -Response: { - "isCanonical": true, - "registry": "1000GENOMES", - "canonicalAccession": "HG00096", - "crossReferences": { - "ena": "SAMEA3302682", - "ncbiBiosample": "SAMN00001598" - }, - "contributorCount": 3, - "mergedValues": { - "bestCoverage": 45.0, - "refinedYHaplogroup": "R-L21", - "refinedMtHaplogroup": "H1a", - "hasStrProfile": true - } -} - -# Get all contributions for a canonical sample -GET /api/canonical-samples/{registry}/{accession}/contributions -Response: { - "canonicalAccession": "HG00096", - "contributions": [ - { - "contributorDid": "did:plc:alice", - "biosampleAtUri": "at://did:plc:alice/.../biosample/hg00096", - "coverage": 32.0, - "yHaplogroup": "R-L21>FT54321", - "mtHaplogroup": "H1a", - "hasStrProfile": true, - "hasPrivateVariants": true, - "contributedAt": "2025-12-07T10:30:00Z" - }, - // ... - ] -} - -# Preview contribution value before sync -POST /api/canonical-samples/preview-contribution -Request: { - "sampleAccession": "HG00096", - "coverage": 32.0, - "yHaplogroup": "R-L21>FT54321", - "mtHaplogroup": "H1a", - "strMarkerCount": 67, - "privateVariantCount": 2 -} -Response: { - "isCanonical": true, - "improvements": [ - { "field": "yHaplogroup", "current": "R-L21", "yours": "R-L21>FT54321", "isImprovement": true }, - { "field": "privateVariants", "current": 0, "yours": 2, "isImprovement": true } - ], - "noChange": [ - { "field": "mtHaplogroup", "value": "H1a" } - ], - "notBest": [ - { "field": "coverage", "current": 45.0, "yours": 32.0 }, - { "field": "strMarkerCount", "current": 111, "yours": 67 } - ] -} -``` - -### Conflict Resolution for Canonical Samples - -When contributions disagree: - -```scala -case class HaplogroupDisagreement( - canonicalSampleId: Int, - field: String, // "yHaplogroup" or "mtHaplogroup" - values: Map[String, Set[String]], // haplogroup -> Set of contributor DIDs - suggestedResolution: Option[String], - resolutionReason: Option[String] -) - -def detectHaplogroupDisagreements(canonicalSampleId: Int): Future[Seq[HaplogroupDisagreement]] = { - for { - contributions <- contributionRepo.findByCanonicalSample(canonicalSampleId) - - yHaplogroupGroups = contributions - .flatMap(c => c.yHaplogroup.map(h => (h, c.contributorDid))) - .groupBy(_._1) - .view.mapValues(_.map(_._2).toSet).toMap - - yDisagreement = if (yHaplogroupGroups.size > 1) { - // Check if disagreements are just refinement levels - val baseHaplogroups = yHaplogroupGroups.keys.map(extractBaseHaplogroup).toSet - if (baseHaplogroups.size == 1) { - // All agree on base, just different refinement levels - val mostRefined = yHaplogroupGroups.keys.maxBy(haplogroupTreeDepth) - Some(HaplogroupDisagreement( - canonicalSampleId, - "yHaplogroup", - yHaplogroupGroups, - suggestedResolution = Some(mostRefined), - resolutionReason = Some("Most refined call, compatible with others") - )) - } else { - // True disagreement - needs manual review - Some(HaplogroupDisagreement( - canonicalSampleId, - "yHaplogroup", - yHaplogroupGroups, - suggestedResolution = None, - resolutionReason = Some("Conflicting base haplogroups - curator review needed") - )) - } - } else None - - } yield Seq(yDisagreement, mtDisagreement).flatten -} -``` - ---- - -## Open Questions - -1. **File storage**: Should sequence files (BAM/CRAM) be referenced by local path, remote URL, or uploaded to blob storage? - -2. **Project visibility**: Should `project` records be public or private by default? - -3. **Batch limits**: What's the maximum number of records to sync in one operation? - -4. **Offline duration**: How long should Navigator queue changes before warning about potential conflicts? - -5. **AppView authority**: Should AppView-computed updates (haplogroup refinement) automatically overwrite local values? - -6. **Canonical registry maintenance**: Who maintains the list of known canonical registries (1KG, HGDP, etc.) and their accession patterns? - -7. **Contribution attribution**: How should we display multi-researcher contributions on the public biosample page? - -8. **Disagreement handling**: When researchers disagree on haplogroup calls, should the AppView auto-resolve or flag for curator review? - ---- - -## Related Documents - -- [Atmosphere Lexicon Design](../Atmosphere_Lexicon.md) - Record schemas -- [Group Project System](./group-project-system.md) - Project membership model -- [Haplogroup Discovery System](../planning/haplogroup-discovery-system.md) - Private variant flow diff --git a/documents/proposals/triage-report.md b/documents/proposals/triage-report.md new file mode 100644 index 00000000..9812a765 --- /dev/null +++ b/documents/proposals/triage-report.md @@ -0,0 +1,220 @@ +# Proposals ↔ Rust Triage Report + +**Started:** 2026-06-07. Companion to `../planning/design-doc-triage-report.md`, +same method: compare each `documents/proposals/` doc against the Rust +implementation and record a verdict + recommended action **for later action**. + +**Verdict legend:** ✅ current · 📝 update doc · 🔧 make code compliant · ⚖️ split · +🗑️ remove (superseded/realized/dropped). + +## Execution log (2026-06-07) + +- **Removed:** #1 variant-schema-simplification, #2 haplogroup-tree-merge-api, + #8 pds-workbench-biosample-flow. Inbound refs rewired in `planning/` + (haplogroup-discovery-system → "realized in `core.variant`"; design-roadmap + Bucket-B list drops pds-workbench). +- **Headers added** to the kept proposals: #3 (realized; kept for methodology), + #4 (forward; reconciled by D5), #5/#6 (forward Bucket B), #7 (deferred). #3's + dead `appview-pds-backfeed-system.md` refs and #4's dead `ibd-matching-system.md` + ref were rewired. +- **#7 Patronage kept as deferred** (revive past ~a few hundred active users). + +## Status index + +| # | Proposal | Verdict | Action | +|---|----------|---------|--------| +| 1 | variant-schema-simplification.md | 🗑️ REMOVED (realized) | done | +| 2 | haplogroup-tree-merge-api-proposal.md | 🗑️ REMOVED (realized + extended) | done | +| 3 | branch-age-estimation.md | 📝 Keep (realized; methodology ref) | header + ref fix ✓ | +| 4 | group-project-system.md | 📝 Keep + reconcile via D5 | header + ref fix ✓ | +| 5 | Messaging_and_Feed_System.md | 📝 Keep + reconcile (forward; no-PII) | header ✓ | +| 6 | Reputation_System_Implementation.md | 📝 Keep + reconcile (forward) | header ✓ | +| 7 | Patronage_Donation_System.md | 📝 Keep (deferred — revive at scale) | header ✓ | +| 8 | pds-workbench-biosample-flow.md | 🗑️ REMOVED (Navigator-side) | done | + +--- + +## 1. variant-schema-simplification.md — 🗑️ Remove (realized) + +**Compared against:** `core.variant` (mig 0002, universal JSONB coordinates/aliases), +`du_db::variant`, the YBrowse ingestion pipeline. + +**Finding:** the doc is already marked **"✅ Implemented" (2025-12-14)** and is just a +thin documentation index for the Scala `variant_v2` model. The Rust rewrite realized +it as `core.variant` (single row per site, JSONB `coordinates`/`aliases`, +`ON CONFLICT` batch upserts in the YBrowse pipeline). Nothing to keep — it's a +realized-status pointer with the Scala table name. + +**Inbound ref to fix on removal:** `planning/haplogroup-discovery-system.md` cites it +as the variant-schema prerequisite ("See: documents/proposals/variant-schema-simplification.md"). +Its other links point at `schema/` + `deployment/` guides (separate doc sets, their +own passes). + +**Recommended action:** **remove**; reword the discovery-system prereq line to +"realized in `core.variant` (mig 0002)" instead of linking this file. + +--- + +## 2. haplogroup-tree-merge-api-proposal.md — 🗑️ Remove (realized + extended) + +**Compared against:** `/manage/haplogroups/merge` + `/merge/preview` +(`routes/versioning.rs`), `du_domain::merge` (Identify-Match-Graft), +`du_db::snp_graft`, `tree.haplogroup.provenance` JSONB, change-sets + `/curator/reviews`. + +**Finding:** the proposal's core is **realized**: variant/SNP-based node matching, +the `provenance` JSONB on `tree.haplogroup` (used for backbone/aliases/credit), the +`/manage/haplogroups/merge[/preview]` endpoints with dry-run, and split detection. +The Rust impl **went further**: merges materialize into reviewable **change-sets** +(not direct writes), and external source trees graft into the **ISOGG foundation** +via **SNP-anchored grafting** (`du_db::snp_graft`, `--graft`/`--reattach`) with the +`/curator/reviews` (`wip_*`) resolution flow — documented in `rust/README.md` and +the reconciled `planning/tree-versioning-system.md` + the curator guide. The +proposal itself is Scala (Tapir, `evolutions/52.sql`, `app/...`). + +**Recommended action:** **remove** — realized and better-documented by the rust +README (SNP-graft) + tree-versioning-system.md + the curator guide. (Check inbound +refs on removal.) + +--- + +## 3. branch-age-estimation.md — 📝 Kept (realized; methodology reference) + +**Compared against:** mig 0013 (`tree.haplogroup_ancestral_str`) + 0014 (combined +age), `tree.genealogical_anchor`, `genomics.str_mutation_rate`, +`genomics.biosample_callable_loci`, `du_db::age` (`combine`, +`recompute_combined_ages`), `du_db::ystr`, the `branch-age-recompute` job, and +`/api/v1/haplogroups/{name}/age`. + +**Finding:** essentially **fully realized**. **Every** table the proposal designs +exists (ancestral STR, genealogical anchor, STR mutation rate, per-sample callable +loci), plus the SNP+STR combined-age compute, the weekly→daily recompute job, and +the age API. Caveats: the combination is **inverse-variance** (a simplification of +the doc's full PDF-multiplication `P(t|e)=k∏P(t|eᵢ)`), and genealogical-anchor +wiring into the combine may be partial. The doc is Scala (`evolutions/48.sql`, +`BranchAgeEstimationService`, etc.) and **references the now-removed +`appview-pds-backfeed-system.md`** (lines 21, 204). + +**Recommended action:** **remove** (built; the McDonald framework lives in +`du_db::age`/`ystr` + mig 0013/0014) — **but confirm**, since the doc carries +non-trivial *scientific methodology reference* (mutation rates, multi-step STR +frequencies, expected-precision tables, the McDonald port pointers) that isn't +fully captured in code. If kept instead: add a Rust-status header and drop the two +backfeed references. + +--- + +## 4. group-project-system.md — 📝 Keep + reconcile via D5 + +**Compared against:** `d5-group-project-reconciliation.md` (read in full), +`social` placeholder schema (mig 0009). Unbuilt. + +**Finding:** this is the member-sovereign group-project proposal that **D5 exists to +reconcile** with D1–D4. Per D5: it **supersedes the proposal's governance/membership +sections** (now the AppView-enforced `research.project`/`project_member` ACL), +**adopts** its roles/policies/succession, **maps** its aggregate records +(`projectTreeView`/`projectModal`/`strComparison`) onto D4's R1/R2 rails, and treats +its member-sovereign visibility model as the **post-claim** state. The platform is +**forward** (`social` placeholder only). The proposal is AT-Proto/lexicon-focused +(no Scala). It **links the now-removed `ibd-matching-system.md`** (line 6). + +**Recommended action:** **keep** (D5 builds on it; platform unbuilt) — add a header +pointing to D5 as the authoritative reconciliation and noting it's forward; fix the +dangling `ibd-matching-system.md` link → D1/D3. + +--- + +## 5. Messaging_and_Feed_System.md — 📝 Keep + reconcile (forward) + +**Compared against:** `social.{user_block, conversation, message, feed_post}` +(mig 0009). Schema present; **zero logic** (only the static `/reputation` page exists). + +**Finding:** forward Bucket-B social design (Scala/Slick). The schema is in place but +unbuilt. **Reconciliation needed with the no-PII direction:** the proposal stores +**DMs centrally as plaintext** (`social.message.content`) — that conflicts with the +"AppView holds no PII" invariant. Under the new model, DMs should ride the **D1 +encrypted relay** (or AT-Proto records), not a central plaintext mailbox; the public +feed (AT-Proto `feed.post` records + AppView index) is consistent. Also reuses the +Reputation system (#6) and should reconcile with D4 assertion threads (roadmap). + +**Recommended action:** **keep** — add a forward/Bucket-B header noting: schema +exists (mig 0009), logic unbuilt, refresh Slick→Rust, and **rework DM transport to +D1/AT-Proto (no central plaintext)**. Build after the social layer is scheduled. + +--- + +## 6. Reputation_System_Implementation.md — 📝 Keep + reconcile (forward) + +**Compared against:** `social.{reputation_event_type, reputation_event, +user_reputation_score}` (mig 0009), the public `/reputation` page (static content, +no backend). + +**Finding:** forward Bucket-B design (Scala/Slick). The schema matches (singularized +table names) and is in place; the service/guard logic is **unbuilt**. The +user-facing `/reputation` page already describes the system. Lower priority +(roadmap: "depends on social being live"). + +**Recommended action:** **keep** — add a forward/Bucket-B header (schema exists +mig 0009; logic unbuilt; refresh Slick→Rust). No code action now. + +--- + +## 7. Patronage_Donation_System.md — 📝 Keep (deferred — revive at scale) + +**Compared against:** `rust/README.md` (billing not in production today) and the FAQ +(`/faq` lists a "Patronage Donation System" under sustainability). + +**Finding:** a Scala/Play + Stripe donation-tier design. Not in scope for the +current rewrite, **but explicitly deferred, not dead** — per the owner, patronage/ +billing will likely return to fund infrastructure once the platform crosses ~a few +hundred active users. The FAQ already names it as the sustainability path. Only +Scala/payment specifics are stale. + +**Recommended action:** **keep** as a deferred proposal — add a light header: +"deferred; revive when active users cross ~a few hundred; refresh the +Scala/Play/Stripe specifics to the Rust stack at that time." No code action. + +--- + +## 8. pds-workbench-biosample-flow.md — 🗑️ REMOVED (Navigator-side) + +**Compared against:** the Jetstream ingest reality (`fed.biosample` + the du-jobs +Jetstream consumer), D2 (`research_subject` model), biosample consolidation. +*(Triaged from the overview + the known ingest model — 65 KB, not read in full.)* + +**Finding:** this is **predominantly a Navigator (Edge / DUNavigator) design** — +the desktop workspace, local GATK pipeline, project organization, and PDS sync. Its +AppView-relevant slice (researchers' biosamples reaching the AppView) is the +ingest, and that's **realized differently**: the "Current State" REST APIs it cites +(`POST /api/private/external/biosamples`, `/api/external-biosamples`) are Scala-era +and **don't exist** — ingest is now the outbound **Jetstream → `fed.biosample`** +mirror. It also **predates the D2 ResearchSubject / consolidation model**. + +**Recommended action:** **remove from this repo** (Navigator-side + superseded for +the AppView) — **but confirm**: if its Edge-workflow detail still has value, it +belongs in the **DUNavigator** repo, so consider relocating rather than deleting. + +--- + +## Summary + +| Verdict | Proposals | +|---------|-----------| +| 🗑️ Removed (realized) | #1 variant-schema-simplification, #2 tree-merge-api | +| 🗑️ Removed (Navigator-side) | #8 pds-workbench | +| 📝 Kept (realized; methodology reference) | #3 branch-age-estimation | +| 📝 Kept + reconcile (forward Bucket B) | #4 group-project (via D5), #5 messaging, #6 reputation | +| 📝 Kept (deferred — revive at scale) | #7 Patronage | + +**Themes** +- As in the planning set, **no code is wrong** — verdicts are remove (realized/ + dropped) or keep-and-reconcile (forward). +- **Realized & removable:** the variant model, the tree-merge API, and branch age + are all built (schema + compute + endpoints); their proposals are historical. +- **Forward Bucket B (keep):** group-project (D5 reconciles it), messaging, + reputation — all have `social`/`research` placeholder schema (mig 0009) but no + logic. Headers should mark them forward + reconcile to Rust and the **no-PII / + D1–D5** model (esp. messaging: DMs must not be central plaintext). +- **Confirm before deleting:** #3 (scientific methodology reference) and #8 (large, + Navigator-side — relocate vs delete). +- **Refs to fix on removal:** `planning/haplogroup-discovery-system.md` cites #1; + `#4` links the removed `ibd-matching-system.md`; check inbound refs for #2/#3/#8. diff --git a/documents/proposals/variant-schema-simplification.md b/documents/proposals/variant-schema-simplification.md deleted file mode 100644 index 48b56737..00000000 --- a/documents/proposals/variant-schema-simplification.md +++ /dev/null @@ -1,26 +0,0 @@ -# Proposal: Variant Schema Simplification (Implemented) - -**Status:** ✅ Implemented -**Date:** 2025-12-14 - -This proposal has been fully implemented. The documentation has been split into focused guides for operational use. - -## Documentation Index - -| Topic | Document | Description | -|-------|----------|-------------| -| **Schema Design** | [Universal Variant Schema](../schema/universal-variant-schema.md) | Technical reference for the `variant_v2` table, JSONB structures, and multi-reference model. | -| **Migration** | [Migration Guide](../deployment/variant-migration-guide.md) | Instructions for migrating legacy data to the new schema and dropping old tables. | -| **Performance** | [Performance Tuning](../deployment/performance-tuning-variant-ingestion.md) | Lessons learned optimizing the GFF ingestion pipeline (batch upserts, indexing). | -| **Naming** | [Naming Authority](../planning/variant-naming-authority.md) | Workflows for assigning `DU` names to novel variants. | - ---- - -## Executive Summary of Changes - -The project successfully migrated from a "row-per-reference" model to a **Universal Variant Model** using `variant_v2`. - -* **Unified Storage**: A single database row now represents a variant, with coordinates for multiple assemblies (GRCh37, GRCh38, hs1) stored in a `coordinates` JSONB column. -* **Flexible Aliases**: `aliases` JSONB column replaces the rigid `variant_alias` table. -* **High-Performance Ingestion**: The `YBrowseVariantIngestionService` uses optimized batch upserts (`INSERT ... ON CONFLICT`) to handle millions of variants efficiently. -* **Pangenome Ready**: The JSONB coordinate structure allows for future addition of graph-based coordinates without schema changes. \ No newline at end of file diff --git a/documents/register.mermaid b/documents/register.mermaid deleted file mode 100755 index e42590ea..00000000 --- a/documents/register.mermaid +++ /dev/null @@ -1,38 +0,0 @@ -sequenceDiagram - participant R_Edge as "Researcher (JVM Edge App)" - participant R_PDS as "Researcher's PDS" - participant ScalaApp as "App Server (Scala/Play)" - participant MetadataDB as "T4 Metadata DB (DID Registry)" - - title PDS Registration and Sync Setup - - R_Edge->>R_PDS: 1. Login: com.atproto.server.createSession(handle, password) - activate R_PDS - R_PDS-->>R_Edge: 2. Response: Auth Token (R_Token), DID (did:plc:XYZ) - deactivate R_PDS - - R_Edge->>R_PDS: 3. Verify Identity: com.atproto.identity.resolveHandle - activate R_PDS - R_PDS-->>R_Edge: 4. Response: DID Document (Confirms PDS Endpoint) - deactivate R_PDS - - R_Edge->>ScalaApp: 5. Registration Request: POST /api/registerPDS(DID, R_Token, PDS_URL) - activate ScalaApp - - ScalaApp->>R_PDS: 6. *Server-Side Verification*: com.atproto.repo.getLatestCommit (Using R_Token) - activate R_PDS - R_PDS-->>ScalaApp: 7. Response: Latest Commit CID, Repo Root - deactivate R_PDS - - ScalaApp->>ScalaApp: 8. Validation: Confirm DID is valid and PDS is responsive - - ScalaApp->>MetadataDB: 9. Write New DID Record: INSERT(DID, PDS_URL, Initial_Cursor=0) - activate MetadataDB - MetadataDB-->>ScalaApp: 10. Success - deactivate MetadataDB - - ScalaApp-->>R_Edge: 11. Final Response: Registration Success - deactivate ScalaApp - - ScalaApp->>ScalaApp: 12. Trigger Internal Notification (e.g., Pekko Pub/Sub) - Note over ScalaApp, MetadataDB: Rust Sync Cluster detects new entry in Metadata DB (next poll) and begins monitoring. diff --git a/project/build.properties b/project/build.properties deleted file mode 100644 index cc68b53f..00000000 --- a/project/build.properties +++ /dev/null @@ -1 +0,0 @@ -sbt.version=1.10.11 diff --git a/project/plugins.sbt b/project/plugins.sbt deleted file mode 100644 index 520ba80a..00000000 --- a/project/plugins.sbt +++ /dev/null @@ -1,2 +0,0 @@ -addSbtPlugin("org.playframework" % "sbt-plugin" % "3.0.9") -addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.2.2") diff --git a/public/images/decoding_us_logo_placeholder.png b/public/images/decoding_us_logo_placeholder.png deleted file mode 100644 index f711e6e8..00000000 Binary files a/public/images/decoding_us_logo_placeholder.png and /dev/null differ diff --git a/public/javascripts/main.js b/public/javascripts/main.js deleted file mode 100644 index e69de29b..00000000 diff --git a/public/stylesheets/main.css b/public/stylesheets/main.css deleted file mode 100644 index 6dbd5ef1..00000000 --- a/public/stylesheets/main.css +++ /dev/null @@ -1,21 +0,0 @@ -html { - scroll-behavior: smooth; -} - -body { - padding-top: 56px; - padding-bottom: 56px; -} - -/* Override Bootstrap's default pink/cyan code styling to use gray */ -code { - color: #495057; - background-color: #f8f9fa; -} - -.logo-placeholder { - max-width: 900px; /* Increased for more horizontal span */ - display: block; - margin: 0 auto; - padding: 20px 0; /* Add some vertical spacing */ -} \ No newline at end of file diff --git a/rust/.env.example b/rust/.env.example new file mode 100644 index 00000000..54e8d2fe --- /dev/null +++ b/rust/.env.example @@ -0,0 +1,83 @@ +# DecodingUs AppView (Rust) — example environment. +# +# The app does NOT auto-load this file; it reads variables from the shell. Copy to +# `.env` (gitignored) and source it, or export the ones you need before running: +# +# set -a; . ./.env; set +a +# cargo run -p du-web +# +# Only DATABASE_URL is strictly required to boot; everything else has a safe default +# or degrades gracefully (the dependent feature is simply disabled when unset). + +# ── Core (du-web) ───────────────────────────────────────────────────────────── + +# Postgres + PostGIS connection. Required. The dev DB runs via `./scripts/test-db.sh up`. +DATABASE_URL=postgres://postgres:dev@localhost:5432/decodingus?sslmode=disable + +# Cookie/session signing secret. Optional in dev (an insecure default is used), but +# MUST be set to a strong random value (32+ chars) in production. It is padded/repeated +# to 64 bytes, so longer is fine. +APP_SECRET=change-me-to-a-long-random-string-32-plus-chars + +# HTTP listen port. Default: 9000. +PORT=9000 + +# tracing/log filter (RUST_LOG-style). Default: "info,du_web=debug". +RUST_LOG=info,du_web=debug + +# Public base URL, used to build absolute links. Default: https://decoding-us.com +DU_BASE_URL=http://localhost:9000 + +# Directory of vendored static assets. Default: the du-web crate's `assets/` dir +# (the container image sets this to /app/assets). +# DU_ASSETS_DIR=/app/assets + +# ── AT Protocol OAuth (du-web sign-in) ──────────────────────────────────────── +# Leave OAUTH_BASE_URL unset to disable OAuth entirely (the rest are then ignored). + +# Public base URL advertised in the client metadata document. +# OAUTH_BASE_URL=https://your-appview.example +# Requested scope. Default: "atproto transition:generic" +# OAUTH_SCOPE=atproto transition:generic +# EC private key (PEM) for DPoP / client-assertion signing. +# OAUTH_EC_KEY= + +# Local-dev OAuth helpers (only for testing against a dev PDS): +# DU_OAUTH_LOOPBACK=http://127.0.0.1:9000/oauth/callback +# DU_OAUTH_DEV_PDS=https://localhost:2583 +# DU_OAUTH_DEV_RESOLVE=handle.example=did:plc:... # force handle->DID resolution +# DU_OAUTH_DEV_CA=/path/to/dev-ca.pem # extra CA for the dev PDS + +# ── Curator / public forms ──────────────────────────────────────────────────── + +# API key gating the machine curation-intake endpoint. Unset -> that endpoint is off. +# DU_CURATION_API_KEY= + +# reCAPTCHA for public forms (e.g. suggest-a-paper). Both unset -> captcha disabled. +# RECAPTCHA_SITE_KEY= +# RECAPTCHA_SECRET= + +# ── Background jobs (du-jobs) ───────────────────────────────────────────────── + +# AT Protocol Jetstream firehose for the federated reporting mirror. Unset -> mirror off. +# JETSTREAM_URL=wss://jetstream2.us-east.bsky.network/subscribe +# Comma-separated lexicon collections to subscribe to. +# JETSTREAM_COLLECTIONS=com.decodingus.atmosphere.haplogroupReconciliation,com.decodingus.atmosphere.populationBreakdown + +# Variant catalog ingest: path to the YBrowse GFF3 snapshot. +# YBROWSE_GFF=/path/to/ybrowse.gff3 +# Base URL/dir for the T2T-CHM13 Y-region BED files. Has a built-in default. +# YREGIONS_BASE= + +# ── External APIs (politeness pool + keys) ──────────────────────────────────── + +# Contact email for the OpenAlex "polite pool" (publication discovery). +# OPENALEX_MAILTO=you@example.com +# NCBI E-utilities contact + optional API key (publication enrichment). +# NCBI_EMAIL=you@example.com +# NCBI_API_KEY= + +# Secret store (du-external): secrets are read from SECRET_ env vars by default +# (e.g. the secret "api/token" -> SECRET_API_TOKEN); the optional `aws` feature reads +# AWS Secrets Manager instead. +# SECRET_API_TOKEN= diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100644 index 00000000..5093373c --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1,7 @@ +/target +.env +# SQLx offline query cache is committed once a dev DB exists; ignore until then. +.DS_Store + +# Local OAuth dev stack state (CA, env, PDS data) +.oauth-dev/ diff --git a/rust/Cargo.lock b/rust/Cargo.lock new file mode 100644 index 00000000..3522827e --- /dev/null +++ b/rust/Cargo.lock @@ -0,0 +1,5029 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + +[[package]] +name = "argon2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072" +dependencies = [ + "base64ct", + "blake2", + "cpufeatures 0.2.17", + "password-hash", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "askama" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b79091df18a97caea757e28cd2d5fda49c6cd4bd01ddffd7ff01ace0c0ad2c28" +dependencies = [ + "askama_derive", + "askama_escape", + "humansize", + "num-traits", + "percent-encoding", +] + +[[package]] +name = "askama_derive" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19fe8d6cb13c4714962c072ea496f3392015f0989b1a2847bb4b2d9effd71d83" +dependencies = [ + "askama_parser", + "basic-toml", + "mime", + "mime_guess", + "proc-macro2", + "quote", + "serde", + "syn 2.0.117", +] + +[[package]] +name = "askama_escape" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341" + +[[package]] +name = "askama_parser" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acb1161c6b64d1c3d83108213c2a2533a342ac225aabd0bda218278c2ddb00c0" +dependencies = [ + "nom", +] + +[[package]] +name = "async-compression" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac" +dependencies = [ + "compression-codecs", + "compression-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + +[[package]] +name = "aws-config" +version = "1.8.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "517aa062d8bd9015ee23d6daa5e1c1372328412fdae4e6c4c1be9b69c6ad37a2" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 1.4.1", + "sha1", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-lc-rs" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ec2f1fc3ec205783a5da9a7e6c1509cc69dedf09a1949e412c1e18469326d00" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a2f9779ce85b93ab6170dd940ad0169b5766ff848247aff13bb788b832fe3f4" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + +[[package]] +name = "aws-runtime" +version = "1.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ed8e8c52d2dc2390ad9f15647fe663f71e9780b4262c190fbb823a32721566" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "bytes-utils", + "fastrand", + "http 1.4.1", + "http-body 1.0.1", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-secretsmanager" +version = "1.106.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6fa2aa029a7298bc3d863c253fe6745dac677620f20c337b6ca7cc208f7201" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.1", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sesv2" +version = "1.121.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8e944a491795ff4c590f16147fa0b05fb118bf3e7983570b04bb4cf5136fefd" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.1", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.100.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee2719d4a5e5e147bb9e9b77490df6ece750df1094968aa857b09b618a1881a" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.1", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.102.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b30d254992d56ef19f430396e5765b11e0f5bd21a7a557cb12fca1c8c18b9636" +dependencies = [ + "arc-swap", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.1", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.105.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f4f8065fe615dbed9096458ba98dda6d641553ffd5aedd27e37e65211aca9f" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "http 1.4.1", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7083fb918b38474ac65ffbf8a69fc8792d36879f4ac5f1667b43aec61efe9a5" +dependencies = [ + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "crypto-bigint", + "form_urlencoded", + "hex", + "hmac 0.13.0", + "http 0.2.12", + "http 1.4.1", + "p256", + "percent-encoding", + "sha2 0.11.0", + "subtle", + "time", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-http" +version = "0.63.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.1", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3ef8931ad1c98aa6a55b4256f847f3116090819844e0dd41ea682cac5dd2d3" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.14", + "http 0.2.12", + "http 1.4.1", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.10.1", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.9", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.40", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.62.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "701a947f4797e52a911e114a898667c746c39feea467bbd1abd7b3721f702ffa" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06c2315d173edbf1920da8ba3a7189695827002e4c0fc961973ab1c54abca9c" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e6f5caf6fea86f8c2206541ab5857cfcda9013426cdbe8fa0098b9e2d32182" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.1", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9db177daa6ba8afb9ee1aefcf548c907abcf52065e394ee11a92780057fe0e8c" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api-macros", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.1", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-runtime-api-macros" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7396fd9500589e62e460e987ecb671bad374934e55ec3b5f498cc7a8a8a7b7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "aws-smithy-schema" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7442cb268338f0eb8278140a107c046756aa01093d8ef5e99628d34ae09c94f5" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "http 1.4.1", +] + +[[package]] +name = "aws-smithy-types" +version = "1.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53f93074121a1be41317b9aa607143ae17900631f7f59a99f2b905d519d6783b" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.4.1", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d16bf10b03a3c01e6b3b7d47cd964e873ffe9e7d4e80fad16bd4c077cb068531" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-schema", + "aws-smithy-types", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "axum-macros", + "bytes", + "futures-util", + "http 1.4.1", + "http-body 1.0.1", + "http-body-util", + "hyper 1.10.1", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.4.1", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-macros" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d123550fa8d071b7255cb0cc04dc302baa6c8c4a79f55701552684d8399bce" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "base-x" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270" + +[[package]] +name = "base16ct" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" + +[[package]] +name = "base256emoji" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e9430d9a245a77c92176e649af6e275f20839a48389859d1661e9a128d077c" +dependencies = [ + "const-str", + "match-lookup", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + +[[package]] +name = "basic-toml" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba62675e8242a4c4e806d12f11d136e626e6c8361d6b829310732241652a178a" +dependencies = [ + "serde", +] + +[[package]] +name = "bcrypt" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b1866ecef4f2d06a0bb77880015fdf2b89e25a1c2e5addacb87e459c86dc67e" +dependencies = [ + "base64", + "blowfish", + "getrandom 0.2.17", + "subtle", + "zeroize", +] + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +dependencies = [ + "serde_core", +] + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest 0.10.7", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "blowfish" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e412e2cd0f2b2d93e02543ceae7917b3c70331573df19ee046bcbc35e45e87d7" +dependencies = [ + "byteorder", + "cipher", +] + +[[package]] +name = "borsh" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfd1e3f8955a5d7de9fab72fc8373fade9fb8a703968cb200ae3dc6cf08e185a" +dependencies = [ + "borsh-derive", + "bytes", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfcfdc083699101d5a7965e49925975f2f55060f94f9a05e7187be95d530ca59" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "bumpalo" +version = "3.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" + +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + +[[package]] +name = "cc" +version = "1.2.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common 0.1.7", + "inout", +] + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + +[[package]] +name = "cmov" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c9ea0ac24bc397ab3c98583a3c9ba74fa56b09a4449bbe172b9b1ddb016027a" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "compression-codecs" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + +[[package]] +name = "const-str" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f421161cb492475f1661ddc9815a745a1c894592070661180fdec3d4872e9c3" + +[[package]] +name = "cookie" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" +dependencies = [ + "base64", + "hmac 0.12.1", + "percent-encoding", + "rand 0.8.6", + "sha2 0.10.9", + "subtle", + "time", + "version_check", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "217698eaf96b4a3f0bc4f3662aaa55bdf913cd54d7204591faa790070c6d0853" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "crypto-common" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "ctutils" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e" +dependencies = [ + "cmov", +] + +[[package]] +name = "curve25519-dalek" +version = "4.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "curve25519-dalek-derive", + "digest 0.10.7", + "fiat-crypto", + "rustc_version", + "subtle", + "zeroize", +] + +[[package]] +name = "curve25519-dalek-derive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "data-encoding" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" + +[[package]] +name = "data-encoding-macro" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3259c913752a86488b501ed8680446a5ed2d5aeac6e596cb23ba3800768ea32c" +dependencies = [ + "data-encoding", + "data-encoding-macro-internal", +] + +[[package]] +name = "data-encoding-macro-internal" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccc2776f0c61eca1ca32528f85548abd1a4be8fb53d1b21c013e4f18da1e7090" +dependencies = [ + "data-encoding", + "syn 2.0.117", +] + +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid 0.9.6", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer 0.10.4", + "const-oid 0.9.6", + "crypto-common 0.1.7", + "subtle", +] + +[[package]] +name = "digest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" +dependencies = [ + "block-buffer 0.12.0", + "const-oid 0.10.2", + "crypto-common 0.2.2", + "ctutils", +] + +[[package]] +name = "displaydoc" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "du-atproto" +version = "0.1.0" +source = "git+https://github.com/JamesKane/decodingus-shared.git?rev=f975a0835fe67b1e86d919fcb085fca4db1cb1f4#f975a0835fe67b1e86d919fcb085fca4db1cb1f4" +dependencies = [ + "base64", + "du-domain", + "ed25519-dalek", + "multibase", + "p256", + "rand_core 0.6.4", + "reqwest", + "serde", + "serde_json", + "sha2 0.10.9", + "thiserror 2.0.18", +] + +[[package]] +name = "du-bio" +version = "0.1.0" +source = "git+https://github.com/JamesKane/decodingus-shared.git?rev=f975a0835fe67b1e86d919fcb085fca4db1cb1f4#f975a0835fe67b1e86d919fcb085fca4db1cb1f4" +dependencies = [ + "du-domain", + "thiserror 2.0.18", +] + +[[package]] +name = "du-db" +version = "0.1.0" +dependencies = [ + "chrono", + "du-domain", + "serde", + "serde_json", + "sqlx", + "thiserror 2.0.18", + "tokio", + "tracing", + "uuid", +] + +[[package]] +name = "du-domain" +version = "0.1.0" +source = "git+https://github.com/JamesKane/decodingus-shared.git?rev=f975a0835fe67b1e86d919fcb085fca4db1cb1f4#f975a0835fe67b1e86d919fcb085fca4db1cb1f4" +dependencies = [ + "chrono", + "rust_decimal", + "serde", + "serde_json", + "thiserror 2.0.18", + "uuid", +] + +[[package]] +name = "du-external" +version = "0.1.0" +dependencies = [ + "aws-config", + "aws-sdk-secretsmanager", + "aws-sdk-sesv2", + "chrono", + "du-domain", + "reqwest", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tracing", +] + +[[package]] +name = "du-jobs" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "du-bio", + "du-db", + "du-domain", + "du-external", + "futures-util", + "reqwest", + "serde", + "serde_json", + "tokio", + "tokio-tungstenite", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "du-migrate" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "du-db", + "du-domain", + "reqwest", + "serde", + "serde_json", + "sqlx", + "tokio", + "tracing", + "tracing-subscriber", + "uuid", +] + +[[package]] +name = "du-web" +version = "0.1.0" +dependencies = [ + "anyhow", + "argon2", + "askama", + "axum", + "base64", + "bcrypt", + "chrono", + "du-atproto", + "du-db", + "du-domain", + "du-external", + "ed25519-dalek", + "percent-encoding", + "reqwest", + "serde", + "serde_json", + "sha2 0.10.9", + "sqlx", + "tokio", + "tower", + "tower-cookies", + "tower-http", + "tracing", + "tracing-subscriber", + "utoipa", + "utoipa-swagger-ui", + "uuid", +] + +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "ecdsa" +version = "0.16.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" +dependencies = [ + "der", + "digest 0.10.7", + "elliptic-curve", + "rfc6979", + "signature", + "spki", +] + +[[package]] +name = "ed25519" +version = "2.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53" +dependencies = [ + "pkcs8", + "signature", +] + +[[package]] +name = "ed25519-dalek" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e796c081cee67dc755e1a36a0a172b897fab85fc3f6bc48307991f64e4eca9" +dependencies = [ + "curve25519-dalek", + "ed25519", + "serde", + "sha2 0.10.9", + "subtle", + "zeroize", +] + +[[package]] +name = "either" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" +dependencies = [ + "serde", +] + +[[package]] +name = "elliptic-curve" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +dependencies = [ + "base16ct", + "crypto-bigint", + "digest 0.10.7", + "ff", + "generic-array", + "group", + "pem-rfc7468", + "pkcs8", + "rand_core 0.6.4", + "sec1", + "subtle", + "zeroize", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "ff" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0b50bfb653653f9ca9095b427bed08ab8d75a137839d9ad64eb11810d5b6393" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "fiat-crypto" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "flume" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +dependencies = [ + "futures-core", + "futures-sink", + "spin", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", + "zeroize", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "group" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.1", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac 0.12.1", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest 0.10.7", +] + +[[package]] +name = "hmac" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f" +dependencies = [ + "digest 0.11.3", +] + +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be7462df143984c4598a256ef469b251d7d7f9e271135073e78fc535414f3d0" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.4.1", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.1", + "http-body 1.0.1", + "pin-project-lite", +] + +[[package]] +name = "http-range-header" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9171a2ea8a68358193d15dd5d70c1c10a2afc3e7e4c5bc92bc9f025cebd7359c" + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humansize" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" +dependencies = [ + "libm", +] + +[[package]] +name = "hybrid-array" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" +dependencies = [ + "typenum", +] + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2 0.4.14", + "http 1.4.1", + "http-body 1.0.1", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" +dependencies = [ + "http 1.4.1", + "hyper 1.10.1", + "hyper-util", + "rustls 0.23.40", + "rustls-native-certs", + "tokio", + "tokio-rustls 0.26.4", + "tower-service", + "webpki-roots 1.0.7", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-util", + "http 1.4.1", + "http-body 1.0.1", + "hyper 1.10.1", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2 0.6.4", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "libredox" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f02ab6bace2054fb888a3c16f990117b579d14a3088e472d63c6011fa185c9d3" +dependencies = [ + "bitflags", + "libc", + "plain", + "redox_syscall 0.8.0", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "pkg-config", + "vcpkg", +] + +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616ec5685824bcc94416c6d4a7a446eea774a31efd7062c8480ba6fd06d7a6e5" + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "match-lookup" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "757aee279b8bdbb9f9e676796fd459e4207a1f986e87886700abf589f5abf771" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest 0.10.7", +] + +[[package]] +name = "memchr" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "multibase" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8694bb4835f452b0e3bb06dbebb1d6fc5385b6ca1caf2e55fd165c042390ec77" +dependencies = [ + "base-x", + "base256emoji", + "data-encoding", + "data-encoding-macro", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.6", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-conv" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521739c6d2bac4aa25192232afe6841231376b2b26d4d9fae5ecf8ca5772e441" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + +[[package]] +name = "p256" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b" +dependencies = [ + "ecdsa", + "elliptic-curve", + "primeorder", + "sha2 0.10.9", +] + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.5.18", + "smallvec", + "windows-link", +] + +[[package]] +name = "password-hash" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166" +dependencies = [ + "base64ct", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.117", +] + +[[package]] +name = "primeorder" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6" +dependencies = [ + "elliptic-curve", +] + +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls 0.23.40", + "socket2 0.6.4", + "thiserror 2.0.18", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +dependencies = [ + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.4", + "ring", + "rustc-hash", + "rustls 0.23.40", + "rustls-pki-types", + "slab", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.6.4", + "tracing", + "windows-sys 0.52.0", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_syscall" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c7591fa2c6b601dfcfe5f043f65a1c39fcdf50efefcd7f1572e538c1f4b398d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "futures-core", + "http 1.4.1", + "http-body 1.0.1", + "http-body-util", + "hyper 1.10.1", + "hyper-rustls 0.27.9", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.40", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls 0.26.4", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots 1.0.7", +] + +[[package]] +name = "rfc6979" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" +dependencies = [ + "hmac 0.12.1", + "subtle", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rkyv" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "rsa" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" +dependencies = [ + "const-oid 0.9.6", + "digest 0.10.7", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "signature", + "spki", + "subtle", + "zeroize", +] + +[[package]] +name = "rust-embed" +version = "8.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04113cb9355a377d83f06ef1f0a45b8ab8cd7d8b1288160717d66df5c7988d27" +dependencies = [ + "rust-embed-impl", + "rust-embed-utils", + "walkdir", +] + +[[package]] +name = "rust-embed-impl" +version = "8.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0902e4c7c8e997159ab384e6d0fc91c221375f6894346ae107f47dd0f3ccaa" +dependencies = [ + "proc-macro2", + "quote", + "rust-embed-utils", + "syn 2.0.117", + "walkdir", +] + +[[package]] +name = "rust-embed-utils" +version = "8.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bcdef0be6fe7f6fa333b1073c949729274b05f123a0ad7efcb8efd878e5c3b1" +dependencies = [ + "sha2 0.10.9", + "walkdir", +] + +[[package]] +name = "rust_decimal" +version = "1.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c5108e3d4d903e21aac27f12ba5377b6b34f9f44b325e4894c7924169d06995" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand 0.8.6", + "rkyv", + "serde", + "serde_json", + "wasm-bindgen", +] + +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + +[[package]] +name = "rustls" +version = "0.23.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" +dependencies = [ + "aws-lc-rs", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki 0.103.13", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "sec1" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest 0.10.7", + "rand_core 0.6.4", +] + +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "sqlx" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" +dependencies = [ + "base64", + "bytes", + "chrono", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashbrown 0.15.5", + "hashlink", + "indexmap", + "log", + "memchr", + "once_cell", + "percent-encoding", + "rustls 0.23.40", + "serde", + "serde_json", + "sha2 0.10.9", + "smallvec", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tracing", + "url", + "uuid", + "webpki-roots 0.26.11", +] + +[[package]] +name = "sqlx-macros" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn 2.0.117", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" +dependencies = [ + "dotenvy", + "either", + "heck", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2 0.10.9", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn 2.0.117", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" +dependencies = [ + "atoi", + "base64", + "bitflags", + "byteorder", + "bytes", + "chrono", + "crc", + "digest 0.10.7", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac 0.12.1", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.6", + "rsa", + "serde", + "sha1", + "sha2 0.10.9", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.18", + "tracing", + "uuid", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" +dependencies = [ + "atoi", + "base64", + "bitflags", + "byteorder", + "chrono", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-util", + "hex", + "hkdf", + "hmac 0.12.1", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.6", + "serde", + "serde_json", + "sha2 0.10.9", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.18", + "tracing", + "uuid", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" +dependencies = [ + "atoi", + "chrono", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "serde_urlencoded", + "sqlx-core", + "thiserror 2.0.18", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tinystr" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.52.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" +dependencies = [ + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2 0.6.4", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls 0.23.40", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-tungstenite" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9" +dependencies = [ + "futures-util", + "log", + "rustls 0.23.40", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tungstenite", + "webpki-roots 0.26.11", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.25.12+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2153edc6955a6c354fad8f5efd38b6a8769bdccf9fe50f8e1329f81b0baa5d7" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.1.2+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" +dependencies = [ + "winnow", +] + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-cookies" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fd0118512cf0b3768f7fcccf0bef1ae41d68f2b45edc1e77432b36c97c56c6d" +dependencies = [ + "async-trait", + "axum-core", + "cookie", + "futures-util", + "http 1.4.1", + "parking_lot", + "pin-project-lite", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" +dependencies = [ + "async-compression", + "bitflags", + "bytes", + "futures-core", + "futures-util", + "http 1.4.1", + "http-body 1.0.1", + "http-body-util", + "http-range-header", + "httpdate", + "mime", + "mime_guess", + "percent-encoding", + "pin-project-lite", + "tokio", + "tokio-util", + "tower", + "tower-layer", + "tower-service", + "tracing", + "url", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "tungstenite" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a" +dependencies = [ + "byteorder", + "bytes", + "data-encoding", + "http 1.4.1", + "httparse", + "log", + "rand 0.8.6", + "rustls 0.23.40", + "rustls-pki-types", + "sha1", + "thiserror 1.0.69", + "utf-8", +] + +[[package]] +name = "typenum" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" + +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "utoipa" +version = "5.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bde15df68e80b16c7d16b9616e80770ad158988daa56a27dccd1e55558b0160" +dependencies = [ + "indexmap", + "serde", + "serde_json", + "utoipa-gen", +] + +[[package]] +name = "utoipa-gen" +version = "5.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ba0b99ee52df3028635d93840c797102da61f8a7bb3cf751032455895b52ef8" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn 2.0.117", + "uuid", +] + +[[package]] +name = "utoipa-swagger-ui" +version = "8.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db4b5ac679cc6dfc5ea3f2823b0291c777750ffd5e13b21137e0f7ac0e8f9617" +dependencies = [ + "axum", + "base64", + "mime_guess", + "regex", + "rust-embed", + "serde", + "serde_json", + "url", + "utoipa", + "zip", +] + +[[package]] +name = "uuid" +version = "1.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d258b83ceec21034727ecee8c382cfa6c3e133699b0742c64571814fb420c9f7" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "serde_core", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "serde", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.7", +] + +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "whoami" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +dependencies = [ + "libredox", + "wasite", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" +dependencies = [ + "memchr", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zerofrom" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zip" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50" +dependencies = [ + "arbitrary", + "crc32fast", + "crossbeam-utils", + "displaydoc", + "flate2", + "indexmap", + "memchr", + "thiserror 2.0.18", + "zopfli", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + +[[package]] +name = "zopfli" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 00000000..46bc3eb1 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,104 @@ +# DecodingUs — Rust workspace (rewrite of the Play/Scala 3 app) +# See /Users/jkane/.claude/plans/robust-knitting-lampson.md +[workspace] +resolver = "2" +members = [ + "crates/du-db", + "crates/du-external", + "crates/du-web", + "crates/du-jobs", + "crates/du-migrate", +] +# du-domain, du-atproto, du-bio live in the sibling `decodingus-shared` repo +# (shared with Navigator) and are pulled in via the path deps below. + +[workspace.package] +version = "0.1.0" +edition = "2021" +rust-version = "1.80" +license = "BSD-3-Clause" +repository = "https://github.com/decodingus/decodingus" + +[workspace.dependencies] +# Shared crates — git deps pinned to a rev on the decodingus-shared repo (this +# also unblocks the Docker build, whose context can't reach a sibling path dep). +# To update: push decodingus-shared, then bump `rev` (or switch to a pushed tag, +# e.g. `tag = "v0.1.0"`). For local co-dev against working-tree changes, add a +# [patch] section pointing these back at ../../decodingus-shared/crates/*. +du-domain = { git = "https://github.com/JamesKane/decodingus-shared.git", rev = "f975a0835fe67b1e86d919fcb085fca4db1cb1f4" } +du-atproto = { git = "https://github.com/JamesKane/decodingus-shared.git", rev = "f975a0835fe67b1e86d919fcb085fca4db1cb1f4" } +du-bio = { git = "https://github.com/JamesKane/decodingus-shared.git", rev = "f975a0835fe67b1e86d919fcb085fca4db1cb1f4" } +# Internal (decodingus-only) crates +du-db = { path = "crates/du-db" } +du-external = { path = "crates/du-external" } + +# Async runtime + web +tokio = { version = "1", features = ["full"] } +axum = { version = "0.7", features = ["macros"] } +tower = "0.5" +tower-http = { version = "0.6", features = ["fs", "trace", "compression-gzip", "catch-panic"] } +tower-cookies = { version = "0.10", features = ["signed"] } + +# Database (runtime-checked queries for now; switch to compile-time macros + .sqlx +# offline cache once a dev DB is reachable — see plan §3 / §9) +sqlx = { version = "0.8", default-features = false, features = [ + "runtime-tokio-rustls", "postgres", "uuid", "chrono", "json", "macros", "migrate", +] } + +# Templating (typed, compile-time — Twirl analog) +askama = "0.12" + +# OpenAPI surface (Tapir analog): typed API descriptions + Swagger UI +utoipa = { version = "5", features = ["axum_extras", "chrono", "uuid"] } +utoipa-swagger-ui = { version = "8", features = ["axum"] } + +# Serialization / JSONB payloads +serde = { version = "1", features = ["derive"] } +serde_json = "1" + +# Common types +uuid = { version = "1", features = ["v4", "serde"] } +chrono = { version = "0.4", features = ["serde"] } +rust_decimal = "1" + +# HTTP client (external APIs) +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } + +# WebSocket client (Jetstream coverage-mirror consumer in du-jobs) +tokio-tungstenite = { version = "0.24", default-features = false, features = ["connect", "rustls-tls-webpki-roots"] } +futures-util = { version = "0.3", default-features = false } + +# URL/query encoding (language-switcher `next` param) +percent-encoding = "2" + +# AT Protocol identity/crypto +multibase = "0.9" +base64 = "0.22" +sha2 = "0.10" +p256 = { version = "0.13", features = ["ecdsa"] } +rand_core = { version = "0.6", features = ["getrandom"] } + +# CLI +clap = { version = "4", features = ["derive"] } + +# Errors / logging / config +thiserror = "2" +anyhow = "1" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } + +# Crypto +argon2 = { version = "0.5", features = ["std"] } +bcrypt = "0.16" +ed25519-dalek = "2" +aes-gcm = "0.10" + +# NB: raw-read processing (BAM/CRAM) and variant calling are OUT OF SCOPE for the +# AppView — Navigator (edge) does local calling and the AppView aggregates the +# resulting summaries/proposals. du-bio here is text-format + coordinate math +# (VCF variant ingest, BED callable loci, liftover), so no htslib/noodles dep. + +[profile.release] +lto = "thin" +codegen-units = 1 +strip = true diff --git a/rust/Dockerfile b/rust/Dockerfile new file mode 100644 index 00000000..f97759e4 --- /dev/null +++ b/rust/Dockerfile @@ -0,0 +1,51 @@ +# DecodingUs (Rust) — multi-stage build to a single static-ish binary. +# No JRE, no htslib system lib (genomics is pure-Rust via noodles). +# +# docker build -t decodingus -f rust/Dockerfile rust +# (or via compose.yaml) + +# ── builder ────────────────────────────────────────────────────────────────── +FROM rust:1-bookworm AS builder +WORKDIR /build + +# Cache dependencies: copy manifests first, then sources. +# NB: du-domain/du-atproto/du-bio live in the sibling `decodingus-shared` repo +# and are currently path deps (../../decodingus-shared/crates/*), which are NOT +# in this build context. Before building the image, switch those three to git +# deps (see rust/Cargo.toml) once decodingus-shared is pushed — then this builds +# self-contained. (Alternatively, build from a parent context containing both.) +COPY Cargo.toml Cargo.lock ./ +COPY crates ./crates +COPY migrations ./migrations +# SQLx is built in offline mode in CI/Docker (no DB at build time). Once a dev DB +# exists, commit the `.sqlx/` cache and this picks it up automatically. +ENV SQLX_OFFLINE=true +RUN cargo build --release --bin decodingus --bin decodingus-jobs --bin decodingus-migrate + +# ── runtime ────────────────────────────────────────────────────────────────── +FROM debian:bookworm-slim AS runtime +# curl for the healthcheck; ca-certificates for outbound TLS (OpenAlex/ENA/AWS). +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates curl \ + && rm -rf /var/lib/apt/lists/* + +RUN groupadd -r decodingus && useradd -r -g decodingus decodingus +WORKDIR /app + +COPY --from=builder /build/target/release/decodingus /usr/local/bin/decodingus +COPY --from=builder /build/target/release/decodingus-jobs /usr/local/bin/decodingus-jobs +COPY --from=builder /build/target/release/decodingus-migrate /usr/local/bin/decodingus-migrate +# Vendored static assets + migrations shipped alongside the binary. (Askama +# templates and locale catalogs are embedded into the binary at compile time.) +COPY --chown=decodingus:decodingus crates/du-web/assets ./assets +COPY --chown=decodingus:decodingus migrations ./migrations + +USER decodingus +EXPOSE 9000 +ENV PORT=9000 +ENV DU_ASSETS_DIR=/app/assets + +HEALTHCHECK --interval=30s --timeout=10s --start-period=20s --retries=3 \ + CMD curl -fsS http://localhost:9000/health || exit 1 + +CMD ["decodingus"] diff --git a/rust/Makefile b/rust/Makefile new file mode 100644 index 00000000..e8aaf23a --- /dev/null +++ b/rust/Makefile @@ -0,0 +1,37 @@ +# DecodingUs (Rust) dev tasks. +.PHONY: help test db-up db-down oauth-up oauth-web oauth-dev oauth-env oauth-down + +help: + @echo "Targets:" + @echo " test cargo test --workspace (set DATABASE_URL for live-DB tests)" + @echo " db-up start the dev Postgres (prints DATABASE_URL)" + @echo " db-down stop the dev Postgres" + @echo " oauth-dev bring up the local OAuth stack (PDS+Caddy+PG) AND run du-web" + @echo " oauth-up bring up the OAuth stack only (PDS+Caddy+PG, CA, test account)" + @echo " oauth-web run du-web on the host with the dev OAuth env" + @echo " oauth-env print the generated dev env" + @echo " oauth-down stop+remove PDS+Caddy" + +test: + cargo test --workspace + +db-up: + @./scripts/test-db.sh up + +db-down: + @./scripts/test-db.sh down + +# One command: stack up, then du-web (Ctrl-C stops du-web; `make oauth-down` clears containers). +oauth-dev: oauth-up oauth-web + +oauth-up: + @./scripts/oauth-dev.sh up + +oauth-web: + @./scripts/oauth-dev.sh web + +oauth-env: + @./scripts/oauth-dev.sh env + +oauth-down: + @./scripts/oauth-dev.sh down diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 00000000..c40d5de8 --- /dev/null +++ b/rust/README.md @@ -0,0 +1,367 @@ +# DecodingUs — Rust port + +A rewrite of the DecodingUs platform (originally Play Framework / Scala 3) in +Rust. It coexists with the Scala app during the transition and replaces it at a +single cutover. The Rust app is the **AppView**: a curated Y/mtDNA phylogenetic +catalog, a public read surface + JSON API, the curator tooling, and +privacy-preserving federated *reporting* (it aggregates, it does not analyze). + +**Status:** the spine is done — redesigned schema, data layer, public HTML/HTMX + +JSON API, auth + the full curator suite, tree versioning + merge + SNP-graft, the +multi-source tree build (ISOGG + decoding-us + FTDNA, Y + mt), the production ETL, +the YBrowse ingestion pipeline, the variant naming authority, and the federated +reporting mirror. Workspace builds clean; live-DB integration tests (gated on +`DATABASE_URL`) + unit tests pass. The data cutover is **verified end-to-end** on a +real prod dump (ETL `--skip-tree` + tree-init); what's left is executing it and the +live AT-Protocol OAuth handshake (verified to consent locally; confidential +round-trip is an Edge joint test), plus the re-scoped federation subsystems (IBD +coordination, social, sequencer-lab inference). See [Roadmap](#roadmap). A living, +detailed status lives in [`STATUS.md`](STATUS.md); the feature-by-feature +comparison with the Scala app is in [`docs/scala-vs-rust-diff.md`](docs/scala-vs-rust-diff.md). + +--- + +## Why the rewrite + +- Drop the JVM's memory/startup overhead for a single static binary. +- Replace a sprawling, accreted schema (~84 tables across 6 schemas + a second + "metadata" DB) with a de-sprawled design that leans on Postgres **JSONB** for + document-shaped data. +- Run fully **Docker-less for local dev/test** on Apple Silicon via Apple's + `container` CLI, while remaining Docker-deployable for production. + +## Stack + +| Concern | Choice | +|---|---| +| Web | **Axum** 0.7 (+ tower / tower-http / tower-cookies) | +| Templates | **Askama** (compile-time typed, Twirl analog) | +| Frontend | **HTMX** 2 + Bootstrap 5 (vendored), HATEOAS-first | +| Database | **SQLx** 0.8 (Postgres, runtime-checked queries) | +| Genomics | `du-bio` — coordinate math + text parsing (VCF / BED callable-loci / UCSC-chain liftover); the YBrowse GFF3 parser lives in `du-jobs`. Raw reads (BAM/CRAM) + calling are out of scope (done in Navigator) | +| Async | **tokio** | +| Auth | AT Protocol OAuth (PKCE/DPoP/`private_key_jwt`); legacy Argon2 sessions for dev/curator seeding | +| External | OpenAlex, ENA, NCBI/PubMed; AWS SES + Secrets Manager behind the `aws` feature; reCAPTCHA | +| i18n | embedded `key=value` catalogs (en/es/fr) | +| Local Postgres | Apple `container` running `imresamu/postgis` (arm64) | + +## Workspace layout + +Shared crates live in the sibling **`decodingus-shared`** repo (also consumed by +Navigator), pulled in as **git deps pinned to a rev** in `Cargo.toml` (so the +Docker build needs no sibling path). To co-develop locally, add a `[patch]` +pointing the three deps back at the sibling working tree. + +``` +github.com/JamesKane/decodingus-shared (separate repo) + du-domain/ pure types/enums/IDs + JSONB payload structs + the merge algorithm, no IO + du-atproto/ AT Protocol identity/crypto + OAuth client (PKCE/DPoP/metadata) + du-bio/ genomics: callable-loci (BED), liftover (UCSC chain), VCF reader + +rust/ (this repo — AppView/server-specific) + crates/ + du-db/ SQLx pool + per-aggregate query modules + versioning/merge/graft/naming/ybrowse engines + du-external/ OpenAlex / ENA / NCBI / AWS SES / Secrets clients + du-web/ Axum app: routes, Askama templates, i18n, HTMX, auth, OAuth, JSON API + du-jobs/ tokio scheduler + scheduled jobs + the Jetstream reporting-mirror consumer + du-migrate/ legacy → new-schema ETL + the `decodingus-tree-init` tree builder (ISOGG/decoding-us/FTDNA graft, Y + mt) + migrations/ redesigned schema (0001–0022) + locales/ en / es / fr message catalogs + docs/ STATUS pointers, Scala↔Rust diff, AT-Proto OAuth findings + scripts/ test-db.sh (Apple container), mock-legacy.sql + Dockerfile, compose.yaml, .env.example +``` + +## Schema redesign (`migrations/0001`–`0022`) + +Postgres schemas: `core`, `tree`, `genomics`, `pubs`, `ident`, `fed`, `ibd`, +`social`, `support`, `billing`, `source`. Key de-sprawl moves: + +- **3 biosample tables → 1** `core.biosample` (a `source` enum discriminator + + `source_attrs` JSONB). +- **Deprecated child tables folded into JSONB** on their parents (variant aliases + & coordinates, sequence-file checksums/locations, alignment coverage, original + haplogroups, per-revision tree metadata). +- The legacy second **"metadata" database collapses into the `fed` schema**. +- Scattered `at_uri`/`at_cid` columns → one consistent **`atproto` JSONB** column. +- **Tree** is temporal: no `parent_id`; hierarchy lives in + `tree.haplogroup_relationship` with bitemporal `valid_from`/`valid_until`. +- **Universal variant model**: one `core.variant` per physical SNP **site**; + `canonical_name` (nullable — unnamed variants are identified by coordinates), + `aliases`/`coordinates`/`evidence` JSONB, `naming_status`, `mutation_type`. + Coordinates carry `ancestral`/`derived` (the reference genome ≠ phylogenetic + root); recurrence (homoplasy) is modeled per-link on `tree.haplogroup_variant` + (`ancestral_allele`/`derived_allele`), so forward / back-mutation / recurrent + occurrences are representable (mig 0021). +- PostGIS (`geometry(Point,4326)`), `citext`, native enums, GIN/GiST/expression + indexes on queried JSONB paths. + +## What's implemented + +### Public surface (server-rendered, HTMX, i18n en/es/fr) + +| Area | Routes | +|---|---| +| Home / about / contact / reputation / terms / privacy / cookies / FAQ | `/` `/about` `/contact` `/reputation` `/terms` `/privacy` `/cookies` `/faq` | +| Per-sample report (public, `is_public`-gated) | `/sample/:slug` (identity, Y/mt haplogroup pathways, origin map, sequencing/coverage, ancestry; curator `is_public` toggle) | +| Variant browser | `/variants` (+ fragments; JSONB alias/rs-id search) | +| Y/MT tree — two SVG cladograms (horizontal + vertical) | `/ytree` `/mtree` (breadcrumb re-root, orientation toggle, name/variant search, SNP sidebar, backbone/recent coloring) | +| References + per-publication biosamples; suggest-a-paper | `/references` (+ report), `/references/submit` (public DOI → candidate queue) | +| Biosample map (PostGIS → Leaflet GeoJSON) | `/biosamples/map` `/biosamples/geo-data` | +| Coverage benchmarks + per-lab drill-down | `/coverage-benchmarks` `/coverage/labs` | +| Profile (view + display-name update); contact (reCAPTCHA) | `/profile` `/contact` | +| sitemap / robots / health; cookie-consent banner | `/sitemap.xml` `/robots.txt` `/health` `/cookie-consent` | +| Public JSON API + OpenAPI 3 / Swagger UI | `/api`, `/api/v1/*` (see below) | + +### Public JSON API (`/api/v1/*`, OpenAPI at `/api`) + +Y/MT tree, coverage benchmarks, references + biosamples, biosample studies, +variant search/detail/by-haplogroup, variant **CSV** + **GFF3** export, genome +regions, STR signature + prediction, branch age, the **per-sample report** +(`/samples/:slug`), and federated population **reports** +(`/reports/{coverage,ancestry,haplogroups}`). + +### Auth & curator + +AT Protocol **OAuth** (`/login/atproto`, dev public-client path); legacy +signed-cookie sessions for dev. `Curator` RBAC guard. The curator dashboard +(`/curator`) links a full suite: + +| Tool | Route | What | +|---|---|---| +| Haplogroups | `/curator/haplogroups` | CRUD + structural ops: **reparent / merge-into-parent / split** (direct temporal edits, cycle/name guards) | +| Variants | `/curator/variants` | CRUD; alias/coordinate JSONB editing | +| Genome regions | `/curator/regions` | CRUD (coordinates/properties JSONB) | +| Curation proposals | `/curator/proposals` | review/promote Navigator-submitted branch proposals → catalog | +| Publication candidates | `/curator/publications` | review OpenAlex discoveries → promote to references | +| Change-sets | `/curator/change-sets` | tree-versioning lifecycle + diff + per-change review/apply | +| Merge review | `/curator/reviews` | resolve SNP-graft flags / merge ambiguities via the `wip_*` staging tables (accept-anchor / reparent / merge / defer) | +| Variant naming | `/curator/naming` | the **DU naming authority**: queue + mint `DUxxxxx` + lifecycle | +| Reconcile flags | `/curator/reconcile-flags` | merge YBrowse synonym clusters split across catalog variants | + +A separate **management API** for machine/curator callers lives under +`/manage/*` (change-set lifecycle, `/manage/haplogroups/merge[/preview]`, +`/manage/curation/proposals` X-API-Key intake) — deliberately outside the public +`/api/v1`. + +### Tree versioning, merge & SNP-graft + +- **Change-set** lifecycle (DRAFT → READY_FOR_REVIEW → UNDER_REVIEW → + APPLIED/DISCARDED), per-change review, diff, and a temporal apply engine + (CREATE/UPDATE/DELETE/REPARENT/VARIANT_EDIT) — including a **WIP pass** that + enacts curator merge-review resolutions. +- **Tree merge** (Identify-Match-Graft) — a pure `du-domain::merge` + re-implementation against curated fixtures (the legacy was buggy): + subtree-scoped matching, ambiguity-flagged-not-guessed, materialized into a + reviewable change-set. +- **SNP-anchored graft** (`du-db::snp_graft`) — reconciles external source trees + (decoding-us, **FTDNA**) into the **ISOGG foundation** by SNP plurality: enrich + matches, graft truly-novel branches, flag the rest for curator review. A + `--reattach` mode anchors FTDNA's complete-topology "bushes" via vetted MATCH + dispositions when their backbone ancestor is flagged. A **recurrent-link scrub** + (`scrub_recurrent_links`) prunes homoplasic / ASR-scatter defining-variant links + to each variant's primary lineage. The **mtDNA tree** is FTDNA-only (single RSRS + root), loaded as its own foundation (`--ftdna-foundation`). Result: a single-root + Y tree (ISOGG-named backbone + decoding-us + full FTDNA depth) + an RSRS-rooted + mt tree. + +### Variant Naming Authority + +DecodingUs owns the `DU` Y-variant prefix. `core.du_variant_name_seq` + +`core.next_du_name()`, a curator naming queue (`UNNAMED`→`PENDING_REVIEW`→`NAMED`, +mint-on-assign with same-coordinate dedup), and a GFF3 propagation export +(`/api/v1/variants/export.gff`). + +### YBrowse ingestion (the central authority document) + +`snps_hg38.gff3` (~3M SNP lines, full snapshot, no deltas) is streamed into the +verbatim **`source.ybrowse_snp` mirror**; `du-db::ybrowse::reconcile` then +*derives* `core.variant` so curator decisions survive every re-ingest: +synonyms fold deterministically (strand-canonical key; INDELs VCF-trim-normalized; +MNPs left intact), existing catalog variants match by name **or coordinate** and +are enriched in place (canonical/`naming_status` locked), and clusters split +across multiple existing rows are flagged for `/curator/reconcile-flags`. + +### Federation (outbound, summaries only) + +A long-lived **Jetstream consumer** mirrors Navigator's published anonymized +computed-summary records into dedicated `fed.*` reporting tables (PII-bearing +records keep typed anonymized columns only). Reports aggregate via query-time SQL. +The inbound credential-holding firehose / PDS-fleet model is **dropped**; +curators submit branch proposals through the machine-auth intake endpoint. + +### Genomics, STR & age + +`du-bio` (BED callable-loci, UCSC chain liftover, VCF). Y-STR per-branch modal +signatures + STR→branch prediction + STR-variance age; a combined branch-age +estimate (McDonald 2021: SNP-Poisson + STR + genealogical/aDNA anchor terms, +inverse-variance combined) gap-filling `tmrca_ybp`. + +### Scheduled jobs (`du-jobs`) + +`db-heartbeat`, `ybrowse-variant-ingest` (GFF3 → mirror → reconcile), +`publication-update` (OpenAlex), `publication-discovery`, +`publication-pubmed-update` (NCBI), `ena-study-enrichment`, +`branch-age-recompute` (STR + combined age) — plus the Jetstream reporting-mirror +consumer. Error-isolated; each registers only when its env config is present. + +### ETL (`du-migrate`) + +Legacy → new schema, preserving PKs and `sample_guid` so FKs carry over 1:1; +idempotent; runs target migrations then the transformers + a reconciliation pass. +Covers the full production surface — catalog (donors, biosamples, variants, tree, +studies, publications), ident/auth (users, RBAC, AT-Protocol OAuth/PDS, consent, +audit), and genomics (labs, instruments, test types, libraries/files, alignment + +pangenome coverage, genotype data, pangenome graph). Validated against a real +production dump — **all aggregates reconcile**. `--skip-tree` omits the legacy +haplogroup tree (it's built ISOGG-founded by `decodingus-tree-init` instead; +biosamples carry their haplogroup names as JSON and resolve at read time). +`decodingus-tree-init` builds the Y tree (ISOGG foundation + graft decoding-us + +graft FTDNA + scrub) and the mt tree (FTDNA foundation). + +## Getting started + +### Prerequisites + +- Rust (stable) — `cargo`. +- A container runtime for the local database. `test-db.sh` prefers **Apple + `container`** (Docker-less, the default on Apple Silicon) and falls back to + **Docker** only if Apple `container` isn't installed. With Apple `container`, + set it up once (and again whenever the service is stopped): + ```sh + container system start # starts the service; installs the Linux kernel on first run + ``` + (Already running Postgres elsewhere? Set `DATABASE_URL` and `test-db.sh` uses + it as-is, starting no container at all.) + +### Run the app + +```sh +# Start Postgres (PostGIS) and print the DATABASE_URL to export: +eval "$(./scripts/test-db.sh up)" # Apple container gives it its own IP + +# Run the web server (connects + applies migrations on startup): +DATABASE_URL=... APP_SECRET=<32+ chars> cargo run -p du-web # serves on :9000 (PORT to change) +``` + +Apple `container` assigns each container its own IP (no `localhost` port +forwarding), so `test-db.sh` discovers it and emits the right `DATABASE_URL` +(e.g. `postgres://postgres:dev@192.168.64.2:5432/decodingus`). Stop it with +`./scripts/test-db.sh down`. + +> **Gotcha:** if a *committed* migration changes, recreate the dev DB — SQLx +> errors on a checksum mismatch. + +### Seed a curator (to use the curator tools) + +```sh +HASH=$(cargo run -q -p du-web --bin decodingus -- hash-password 'yourpassword') +# then insert ident.users + ident.user_login_info(provider_id='credentials', +# provider_key='', password_hash=$HASH) + ident.user_roles('TreeCurator'). +``` + +## Testing + +```sh +eval "$(./scripts/test-db.sh up)" +cargo test --workspace +``` + +Integration tests are gated on `DATABASE_URL`: with it set they run against the +live PostGIS (migrations, JSONB round-trips, query modules, the apply/merge/graft/ +reconcile engines); without it they skip and the suite stays green. The i18n test +enforces that es/fr cover every English key. + +## Running the ETL + +The production source is a self-managed Postgres on EC2. + +```sh +decodingus-migrate \ + --legacy "postgres://user:pass@ec2-host:5432/decodingus?sslmode=require" \ + --target "$DATABASE_URL" # runs transformers + reconciliation + +decodingus-migrate --legacy ... --target ... --verify # counts only +decodingus-migrate --legacy ... --target ... --skip-tree # skip the tree (build via tree-init) +``` + +For the cutover, run **`--skip-tree` first, then `decodingus-tree-init`** (below): +the tree is built ISOGG-founded rather than migrated, and `tree-init`'s foundation +load needs the tree namespace empty. + +> ⚠️ The transformer `SELECT`s encode the production column layout — validate +> against the live EC2 schema (or a current-schema dump) before the production run. + +## Seeding / ingesting the tree & variants + +```sh +# Y tree: ISOGG foundation → graft decoding-us → graft FTDNA (reattach) → scrub: +decodingus-tree-init --isogg /path/isogg_full_tree.json --apply +decodingus-tree-init --merge-prod https://decoding-us.com/api/v1/y-tree --snp-graft --graft --apply +decodingus-tree-init --ftdna /path/ftdna_ytree.json --graft --reattach --apply +decodingus-tree-init --scrub-recurrent --apply +# mt tree: FTDNA is the sole source (single RSRS root) — load as the foundation: +decodingus-tree-init --ftdna /path/ftdna_mttree.json --ftdna-foundation --dna MT --apply +# (any step without --apply is a dry-run; --stage-review routes flags to /curator/reviews) + +# YBrowse variant ingest (mirror + reconcile); deploy-time, large file: +YBROWSE_GFF=/path/snps_hg38.gff3 [YBROWSE_CHAIN_GRCH37=… YBROWSE_CHAIN_HS1=…] cargo run -p du-jobs +``` + +## Deploy + +Multi-stage `Dockerfile` builds a single binary on a slim runtime (no JRE, no C +deps); `compose.yaml` runs it with `postgis/postgis`. `SQLX_OFFLINE=true` for +DB-less builds. The shared crates are git deps (no sibling path needed in the +build context). + +## Roadmap + +**Done** (✅): redesigned schema + temporal tree; `du-db` aggregates; public read +surface + JSON API + OpenAPI; auth + the full curator suite; tree versioning + +merge + SNP-graft + curator merge-review; the **multi-source tree build** +(ISOGG-founded Y + decoding-us + FTDNA graft/reattach + recurrent-link scrub; mt +tree from FTDNA) + ancestral-state / recurrence modeling; the variant naming +authority; YBrowse GFF3 ingestion (mirror + reconcile, synonym/strand/INDEL +handling); federated reporting mirror + reports; STR signature/prediction + +combined branch age; `du-bio` core; the scheduled-job suite; the full production +ETL (verified against a real prod dump, `--skip-tree` cutover option); shared +crates extracted to `decodingus-shared` (git deps); the public **per-sample report** +(`/sample/:slug`, `is_public` gate, mig 0022) over a unified core+fed read model; +static/footer pages reconciled with the legacy content (App Passwords removed). + +**Remaining, in scope** (⬜): + +- [ ] **AT-Protocol OAuth — live handshake.** Client wiring is built and verified + to the consent page against a local PDS; the confidential + `private_key_jwt` round-trip is the **Edge joint test** (see + `docs/atproto-oauth-findings.md`). +- [ ] **ETL cutover — execution.** ETL + `--skip-tree` verified end-to-end against + a real prod dump; what's left is running it for real (freeze prod read-only → + dump → prepare locally → ship to AWS → flip) and **alias-aware + name-resolution** for biosample→haplogroup (mt has no ISOGG-style alias + source; ~15% of mt names need PhyloTree-version mapping). +- [ ] **IBD matching** — the AppView is the only component that can spot IBD + *introduction candidates* across the federation (mine `fed.*` → dual-consent + → coordinate the Edge hand-off → persist match state). Placeholder tables + (`ibd`); logic forward. +- [ ] **Social layer** — messaging/consent threads, notifications, blocks, public + feed, reputation, group projects (`social`). Underpins IBD consent/notify and + stands alone; logic forward. +- [ ] **Sequencer-lab inference** — instrument-ID → lab lookup API (lets Edge nodes + skip a data-entry step) + consensus discovery + curator review. Lab tables + exist; logic forward. +- [ ] **Discovery automation** — the curator review/promote half is built; the + automated half (private-variant capture, consensus, auto-reassignment) is + forward work. +- [ ] **Multi-test-type completion** — taxonomy + chip ingest exist; marker + coverage / confidence scoring tables are forward work. +- [ ] **Region management API + bootstrap-from-CHM13** (the S3/CHM13 pipeline; the + region CRUD UI already exists). + +**Out of scope / not in production** (➖): inbound PDS firehose + fleet, +patronage/billing, manual sample ingestion (Navigator does it), AppView→PDS +backfeed (superseded by the outbound mirror / notify-fetch direction), server-side +BAM/CRAM. Several have placeholder tables but no logic. (IBD, social, and +sequencer-lab inference were **re-scoped IN** — the AppView is their federation +coordinator.) diff --git a/rust/STATUS.md b/rust/STATUS.md new file mode 100644 index 00000000..c310ad8c --- /dev/null +++ b/rust/STATUS.md @@ -0,0 +1,735 @@ +# DecodingUs Rust rewrite — status & handoff + +Living snapshot of the Play/Scala 3 → Rust port. Pairs with `README.md` (roadmap). +Last updated **2026-06-11** (this session: **the McDonald-2021 branch-age model built +out end-to-end** + the **T2T/Hallast Y reference-region pipeline** — all post-launch +catalog refinement, no change to the launch-critical path): +- **Y reference-region ingest** (`du-jobs/yregions`, `run-once yregions`) — loads + T2T-CHM13v2.0 Y structural BEDs (AZF/DYZ, **amplicons v2**, **inverted-repeats v2**, + chrXY sequence-class) into `core.genome_region` via `genome_region::upsert_by_key` + + `prune_source_orphans` (full-snapshot sync). `du_db::variant::refresh_region_overlaps` + flags low-confidence-for-placement variants (`annotations.region_overlaps`), consumed + by `snp_graft` (`UnreliableAnchor` → curator review). (memory `yregions-ingest`.) +- **PDF branch-age engine** (`du_db::pdf`) — discretized age PDFs (poisson / gaussian / + **mixture**, `multiply`=Eq 1, `convolve`=Eq 7, `gaussian_on`/`poisson_on` grid-param), + replacing the inverse-variance shortcut. +- **SNP age = bottom-up tree propagation** (`age::propagate`, Eq 5–8, `HET_MASK`). + **STR age = multi-step `P(g|m)`** (McDonald **Table 1** embedded + ω convolution + fallback, `ystr`) → per-marker Poisson mixture → **tree-propagated** + (`ystr::propagate_str` + §2.5.2 ancestral-motif reconstruction), retiring the + star-phylogeny pooling. +- **COMBINED = direct PDF product** of the SNP / STR / genealogical terms (Eq 1) on a + shared TREE grid — non-Gaussian shape preserved; disjoint terms fall back to the + inverse-variance combine. +- **Hallast 2026 incorporation** — v2 BEDs + callable-mask validation; BEAST **0.76e-9 + cross-check clock** (`age::HALLAST_RATE`, not swapped for Helgason); genealogical + calibration anchors (`scripts/seed-hallast-anchors.sql`, D1 TMRCA 19,450 ybp, model- + dated). P9 palindrome **BLOCKED** on supplementary coords. (`documents/planning/ + y-preprint-hallast-2026-incorporation.md`.) +- **Real STR mutation rates** — `scripts/seed-str-mutation-rates.sql` (137 markers: + Willems 2016 1000G MUTEA + 95% CI primary, YHRD gap-fill for core markers) replaces + the `DEFAULT_STR_RATE` fallback; ω columns stay at the Ballantyne-derived global model. + +Prior (2026-06-07): public per-sample report (`/sample/:slug`, mig 0022); static/footer +pages reconciled with legacy Scala; collaboration-platform design docs (d1–d5); +design-doc triage (superseded docs removed, rest reconciled). +Prior (2026-06-05): FTDNA Y-tree SNP-graft + `--reattach`; recurrent-link scrub; +mtDNA tree wired as an FTDNA RSRS foundation; ETL `--skip-tree` cutover option. + +## TL;DR + +The **spine is done and then some**: redesigned schema, data layer, public +HTML/HTMX surface, auth + curator tools, the full production ETL, the public JSON +API, tree versioning + merge, the SNP-anchored graft + its curator review UIs, the +YBrowse mirror→reconcile catalog pipeline (≈3M variants), federated **reporting** +(mirror **and** web endpoints), branch ages, and Y-STR signatures/prediction/age. + +The launch-critical path is now just two things: **(1) the data cutover** — the +ETL has been **verified end-to-end against a real production dump** (2026-06-04, +363 MB / PG 15): all 34 aggregates reconcile, and the **`--skip-tree` + tree-init** +cutover flow is verified (prod→`decodingus_cutover`: tree empty, non-tree +aggregates reconcile, the multi-source tree builds into the empty namespace). +What's left is *executing* the cutover against live/final data (+ alias-aware mt +name resolution) — and **(2) the live AT Proto OAuth handshake** (the cross-host +"Edge joint test"). +The remaining *feature* mass is post-launch: **haplogroup-discovery automation**, +**multi-test-type completion**, **IBD matching + the social layer**, and +**sequencer-lab inference** (the AppView coordinates IBD introductions, hosts the +social surfaces, and resolves instrument→lab for the Edge — only patronage/billing +is now fully out of scope). See "What's left". + +## Layout + +- **`/Users/jkane/Development/decodingus/rust`** — this workspace (AppView-only crates). + - `du-db`, `du-external`, `du-web`, `du-jobs`, `du-migrate` +- **`/Users/jkane/Development/decodingus-shared/crates`** — shared crates, separate git repo. + - `du-domain` (pure types + algorithms, incl. `merge`), `du-atproto`, `du-bio` + - Pushed to `github.com/JamesKane/decodingus-shared`; consumed via **git deps + pinned to a rev** in `rust/Cargo.toml` (Docker build unblocked — no sibling + path dep needed). To update: push the shared repo, then bump `rev` (or switch + to a pushed tag, e.g. `v0.1.0` — created locally, not yet pushed). For local + co-dev against working-tree changes, add a `[patch]` back to the sibling paths. +- Legacy Scala app: `/Users/jkane/Development/decodingus` (parent dir). Navigator: + `/Users/jkane/Development/scala/DUNavigator`. + +## Local dev / how to run + +Postgres runs under Apple `container` (name `du-pg`), reachable at its own IP +(no localhost forwarding): + +``` +DATABASE_URL="postgres://postgres:dev@192.168.64.2:5432/decodingus?sslmode=disable" +APP_SECRET="" # signs session cookies +``` + +- Run web: `DATABASE_URL=... APP_SECRET=... PORT=9000 cargo run -p du-web` (binary `decodingus`). +- Run jobs scheduler: `DATABASE_URL=... cargo run -p du-jobs` (binary `decodingus-jobs`). + - **One-shot ops:** `decodingus-jobs run-once ` — `ybrowse` (full GFF3 stream + + reconcile; needs `YBROWSE_GFF` [+ optional `YBROWSE_CHAIN_GRCH37/HS1`]), + `reconcile` (re-derive `core.variant` from the loaded mirror without re-streaming), + `yregions` (load the T2T-CHM13 Y reference-region BEDs + refresh region flags), and + `branch-age` (recompute STR signatures + the combined branch ages). +- Tests: `DATABASE_URL=... cargo test -p du-db` (live-DB tests skip/pass if unset). + - **Safe against any DB:** every du-db integration test now provisions a private, + throwaway database via `du_db::testing::ephemeral_db` (migrated, dropped on Drop), + so `cargo test` never touches the catalog `DATABASE_URL` points at. + - `du-domain` tests need no DB (`cargo test -p du-domain`). +- Migrations auto-apply on web/ETL startup; the `du-db` migrations test also applies them. +- **Gotcha:** if a *committed* migration changes, recreate the dev DB + (`decodingus`) — sqlx errors on a checksum mismatch (`VersionMismatch`). + +### Databases in use +- `decodingus` — dev DB (migrations + live tests' base server for ephemeral DBs). +- `decodingus_legacy` — loaded from `scripts/mock-legacy.sql` (current-schema mock). +- `decodingus_etl` — ETL target (the migrate binary recreates/migrates it). + +## What's done (✅) + +- **Schema** — `migrations/0001–0023`. JSONB "document columns" (variant + coordinates/aliases/**evidence**, biosample source_attrs/atproto, haplogroup + provenance, coverage, …). Highlights since the merge work: `ident.audit_log` + (0010), fed reporting (0011–0012), Y-STR (0013–0014), backbone (0015), **variant + naming authority** (0016, nullable `canonical_name` + partial unique index + + `core.next_du_name()`), **variant evidence** (0017), **YBrowse mirror + + reconcile machinery** (0018), **strand-canonical fold** (0019), **INDEL/MNP + canon** (0020), **ancestral-state / per-branch ASR** (0021), **`is_public` + biosample gate** (0022, the public per-sample report), **variant + `defining_haplogroup_id` recurrence model** (0023). +- **`du-db`** — query modules for every aggregate (variant, haplogroup, biosample, + publication, genome_region, coverage, proposal, study, change_set, merge, auth, + naming, ybrowse, wip, ystr, age, fed, consent, support) + `testing` (ephemeral DB). +- **Public HTML/HTMX** (`du-web/routes`) — variants browser, **Y/MT tree as two + server-rendered SVG cladograms** (`tree_layout.rs`; breadcrumb re-root, + orientation cookie toggle, name/variant search, SNP-detail sidebar with + **branch provenance + per-variant locus/anc/der**, backbone/recent coloring + + legend, **full-viewport width**, **client-persisted depth selector** + [localStorage, `?depth=`]), references + per-pub biosamples, biosample map + (PostGIS), coverage benchmarks; i18n (en/es/fr), `HX-Request` fragment + negotiation, vendored assets, **site chrome aligned with the Scala app**. +- **Auth + curator** — signed-cookie sessions, `Curator` RBAC extractor, curator + CRUD for haplogroups/variants/genome-regions, curation proposal + intake→review→promote, and the review surfaces below. +- **Variant Naming Authority** (mig 0016, `du_db::naming`, `/curator/naming`) — + nullable `canonical_name`, DU-name minting (`core.next_du_name()`), lifecycle + (UNNAMED/PENDING_REVIEW/NAMED), same-coordinate dedup; GFF3 propagation at + `GET /api/v1/variants/export.gff`. **Gotcha:** the partial unique index means + every `ON CONFLICT (canonical_name)` carries `WHERE canonical_name IS NOT NULL`. +- **YBrowse ingest = mirror + reconcile** (migs 0017–0020, `du-jobs/ybrowse`, + `du-db/ybrowse`) — streams `snps_hg38.gff3` (≈3.17M lines) into a verbatim + `source.ybrowse_snp` **mirror**, then `reconcile` *derives* `core.variant` + idempotently: synonym-fold by strand-canonical key, coordinate-fallback match, + INDEL trim-normalize / MNP-typing, rank-based canonical, provisional→DU mint; + single matches **enrich existing variants** (coords + mutation_type + evidence, + curator choices locked); multi-match clusters → `source.ybrowse_reconcile_flag` + → **`/curator/reconcile-flags`** → `variant::merge_into`. First real full run: + 2.99M clusters → 2.88M created, **100,968 existing enriched**, 11,406 flagged; + catalog now ~3.0M variants, ~3.0M with coordinates. (See memory + `ybrowse-ingest-mirror`.) +- **Variant coordinate enrichment** — reconcile fills coords/types on any + name-matching existing variant; a `decodingus-tree-init --backfill-prod-coords` + pass fills the b37/hs1 builds the decoding-us API carries that the graft dropped + (complement to YBrowse's GRCh38). Sidebar shows `chrY:pos anc>der [build]`. +- **SNP-anchored graft** (`du-db/snp_graft`, `decodingus-tree-init`) — classifies a + source tree (decoding-us prod) against the catalog by defining-SNP anchor + (enrich-match / graft-novel / review), Phase-4 curator-review export, and stages + flags + name-collisions + graft-blocked items into a DRAFT change-set + (`--stage-review`) triaged at **`/curator/reviews`** (SNP-scatter + tree-preview + + accept-anchor/reparent/merge/defer; `tree.wip_*` enacted by the apply engine's + WIP pass). (See memory `prod-tree-snp-graft`.) +- **Y reference-region pipeline** (`du-jobs/yregions`, `du-db/genome_region`, + `du-db/variant`) — `run-once yregions` loads the T2T-CHM13v2.0 Y structural BEDs + (AZF/DYZ heterochromatin, amplicons v2, inverted-repeats/palindromes v2, chrXY + sequence-class) into `core.genome_region` (`upsert_by_key` + `prune_source_orphans` + = full-snapshot sync). `refresh_region_overlaps` stamps `core.variant.annotations. + region_overlaps` for variants in unreliable-for-placement regions; `snp_graft` + routes anchors whose every supporting SNP is unreliable to curator review + (`UnreliableAnchor`). Empirically validated by Hallast 2026 (Fig 5h-i callable + mask). hs1 coords only (1-based inclusive). (Memory `yregions-ingest`.) +- **Tree sample leaves (YFull-style)** (`du-db/tree_sample`, mig 0037 + `tree.haplogroup_sample`) — places **non-D2C** biosamples (`source <> 'CITIZEN'`) as + leaves under the tree node their **published call** (`core.biosample.original_haplogroups`) + resolves to, reusing `haplogroup::resolve_name_or_variant` (name→alias→defining-SNP→ + normalize). `recompute_placements(dna)` is an advisory-locked declarative engine + (assign+prune, bumps `tree_revision`); unresolvable calls kept `UNPLACED` for curator + triage. Serving: `/api/v1/y-tree` nodes carry a **cumulative `sample_count`**; + `/api/v1/{y,mt}-tree/node/{name}/samples` lists the leaves (accession/alias/source + + paper citation). `du-jobs run-once tree-samples-recompute` + daily. Y now, mt-ready + (dna_type-parameterized; no mt recompute until the mt tree lands). **HTML cladogram + done (2026-06-14):** placed samples render as **YFull-style leaf tips** hanging off their + node (`tree_layout::LaidTip` via `tree_sample::direct_labels`) — each tip reserves a full + node slot (spaced like any leaf), the node centers over children+tips, and tip connectors + share the node's child bus; capped 8/node + a "+N" overflow tip → sidebar. (The JSON API + keeps a `sample_count` per node.) The SNP sidebar lists the placed leaves (label + source + + citation, capped 50 + "+N more"). + **Curator triage (2026-06-13):** `status='CURATED'` (manual placement the recompute + preserves) + Curator-gated `GET /manage/tree-sample/unplaced` (the unresolved-call queue) + + `POST /manage/tree-sample/place` (pin a sample under a chosen node). (Memory + `tree-sample-leaves`.) +- **ETL** (`du-migrate`) — **full production surface**: catalog (donors, biosamples, + variants, tree, studies, publications), ident/auth, genomics. Validated vs the + schema-only `db.schema` and the current-schema mock with data; all aggregates + reconcile. +- **Public JSON API** (`du-web/api.rs`) — read endpoints under `/api/v1/*` + + OpenAPI 3 + Swagger UI at `/api` (utoipa). Includes the federated population + reports `/api/v1/reports/{coverage,ancestry,haplogroups}` aggregated from the + `fed.*` mirror with query-time SQL, plus `haplogroups/:name/{str-signature,age}` + and `POST /api/v1/str/predict`. **Tree cache revalidation (2026-06-12):** the + `{y,mt}-tree[/full]` endpoints emit a strong `ETag` + `Last-Modified` from a + persisted `tree.tree_revision` marker (mig 0024) and honor `If-None-Match` → 304 + *before* the ~28 MB query; the marker is bumped by every tree-mutating op + (change-set apply, coordinate/alias enrichment, reconcile, tree-init). Added + `/api/v1/{y,mt}-tree/version`. Memory `tree-cache-revalidation`. +- **Tree versioning** (`du-db/change_set.rs`, `du-web/routes/versioning.rs` + + `change_sets.rs`) — change-set lifecycle + per-change review + diff + temporal + apply engine; curator-gated machine API at `/manage/change-sets/*` **plus a + two-panel HTMX review UI** at `/curator/change-sets`. Integration-tested. +- **Tree merge** (`du-domain/merge.rs` + `du-db/merge.rs`) — pure Identify-Match- + Graft; `materialize` → change-set via placeholder-chained `tree_change`; endpoints + `/manage/haplogroups/merge[/preview]`. Fixtures + e2e tests pass. +- **Federated reporting** (`du-db/src/fed/`, `du-jobs/jetstream.rs`, migs 0011–0012) + — the AppView **aggregates and reports; it does not analyze.** A long-lived + Jetstream consumer mirrors Navigator's published anonymized computed-summary + records (the `✅ AppView Complete` set) into `fed.*` tables, cursor-resumed, + idempotent+ordered. **Privacy:** typed anonymized columns only, no raw JSONB for + PII-bearing records. Flow (a) proposal intake + (b) reporting ingest + (c) + reporting web endpoints are **all DONE**. (Memory `atproto-federation-direction`.) +- **Y-STR per-branch signatures + prediction + age** — `fed.str_profile` mirror + (Jetstream) + `du-db::ystr` modal-haplotype aggregation → `tree.haplogroup_ + ancestral_str` (mig 0013) via `str-signature-recompute`; STR→branch `predict` + at `POST /api/v1/str/predict`. STR age is the **McDonald multi-step PDF model**: + `P(g|m)` from Table 1 (embedded) + ω convolution fallback → per-marker Poisson + mixture (`du_db::pdf::Pdf::mixture`) → **tree-propagated** TMRCA PDFs + (`ystr::propagate_str`, ancestral-motif reconstruction). Per-marker rates from + `genomics.str_mutation_rate` (seeded, 137 markers; Willems 2016 + YHRD). +- **Combined branch age (McDonald 2021)** (`du-db/age.rs`, migs 0013/0014) — each + evidence term is a **PDF**: SNP TMRCA (bottom-up tree propagation, Eq 5–8, on the + `du_db::pdf` grid), STR TMRCA (`ystr::str_tmrca_pdfs`), and genealogical/aDNA-anchor + Gaussians; `COMBINED` is their **direct product** (Eq 1, shape-preserving; disjoint + → inverse-variance fallback), gap-filling `tree.haplogroup.{formed,tmrca}_ybp` + (curated values never overwritten). `HET_MASK` excises heterochromatic SNPs; + Helgason rate default with Hallast `HALLAST_RATE` as a recorded cross-check. Runs in + `branch-age-recompute` (= `run-once branch-age`). SNP/STR/anchor terms data-gated + (sparse pre-cutover; the dev tree is tree-only, so a live run is a near no-op). +- **`du-jobs`** — tokio scheduler + **`run-once`** one-shot mode; jobs: + `db-heartbeat`, `ybrowse-variant-ingest`, `publication-update`, + `publication-discovery`, `publication-pubmed-update`, `ena-study-enrichment`, + `str-signature-recompute`, `branch-age-recompute`; plus the Jetstream + reporting-mirror consumer (set `JETSTREAM_URL`). +- **`du-external`** — OpenAlex, ENA, NCBI/PubMed; AWS SES + Secrets Manager behind + the `aws` feature. +- **`du-atproto`** — DID/handle resolution, Ed25519 verify, PKCE/DPoP/private-key- + JWT OAuth client + metadata builders (library; HTTP surface = the Edge test below). +- **Public per-sample report** (`/sample/:slug`, `du-web/routes/samples.rs` + + `templates/samples/`) — ExploreYourDNA-style page gated by `core.biosample.is_public` + (mig 0022). `du_db::biosample::report` is the **unified read model**: anchors on the + canonical `core.biosample` (+ donor sex/origin, publications) and attaches the + federated analytics (`fed.biosample`/`fed.sequencerun`/`fed.coverage_summary`/ + `fed.population_breakdown`) via `atproto.uri ↔ *.biosample_ref` — the seam the + eventual core/fed **biosample consolidation** collapses into (memory + `biosample-consolidation`). Sections: identity, Y+mt **haplogroup pathways** + (`du_db::haplogroup::pathway` — root→tip clades + ages + defining SNPs; graceful + "not placed" gap), origin Leaflet map, sequencing/coverage, ancestry stacked bar. + Curator `is_public` toggle (`/curator/samples/:slug/public`); JSON API + `GET /api/v1/samples/:slug`. Tested (`du-db/tests/sample_report.rs`). **Follow-up:** + the report shows one `populationBreakdown`; Navigator now publishes two methods — + pick PCA-GMM (memory `ancestry-method-pick-followup`). +- **Secondary web surfaces** — static pages (about/contact/**reputation**/terms/ + privacy/**cookies**/FAQ; content reconciled with the legacy Scala prose — + **App Passwords removed**), footer nav matching the legacy set, `sitemap.xml`/ + `robots.txt`, GDPR cookie-consent banner, read-only **profile** page, + reCAPTCHA-verified **contact** form. Root README rewritten for the Rust AppView. +- **Testing** — du-domain unit tests (no DB); du-db integration tests isolated to + ephemeral databases (`du_db::testing::ephemeral_db`); du-web i18n parity test + enforces es/fr cover every English key. + +## What's left, in scope (⬜) + +Launch-critical first, then the post-launch feature mass. + +> **Design landscape (2026-06-07).** The post-launch collaboration/IBD layer now has +> drafted build specs: `documents/planning/d1`–`d5` + `design-roadmap-rust-rewrite.md` +> — **D1** encrypted Edge-to-Edge exchange + AppView broker (the shared substrate), +> **D2** PII-free ResearchSubject registry, **D3** IBD impl on D1, **D4** assertion +> store (split PII rails), **D5** group-project ACL. Central invariant: **AppView +> holds no PII — it brokers** (memory `collab-platform-d1-d5`). Two tracks join at +> D1: Platform D1→D2→D4→D5, Match D1→D3; the Catalog track (D6 discovery, D7 +> multi-test, D8 sequencer-lab) is independent. The original planning docs were +> triaged and reconciled/removed — see `documents/{planning,proposals}/*triage*.md`. + +1. **Cutover** (see "Cutover strategy") — ETL verified end-to-end. Chosen strategy: + freeze prod read-only → fresh dump → prepare locally (ETL data + ISOGG-founded + tree build) → `pg_dump` → restore on AWS → flip. **`--skip-tree` DONE** (commit + 0f83dbc): `decodingus-migrate --skip-tree` skips the 3 tree transforms + + reconcile checks (the tree is built by `tree-init` into the empty namespace); + biosamples carry haplogroup names as JSON and resolve at read time; `core.variant` + still migrates (tree-init reuses by `canonical_name`). Cutover order: migrate + `--skip-tree` → tree-init. + **FTDNA descoped (2026-06-12):** beta tree = **ISOGG foundation + decoding-us + graft, no `--reattach`**; **no mt tree at beta**. The FTDNA-heavy subsections + below (mt foundation, 81k hybrid, reattach) are superseded — keep for later. + So name resolution is **Y-only**. + **Name resolution — DONE (2026-06-12).** Diagnosed against the real prod dump: + `public.biosample_haplogroup` (the reconciled FK) is **empty in prod**, so + `original_haplogroups` carries the raw heterogeneous **publication** call text + (FTDNA shorthand `R-M269`, path strings `R-DF27 > Z195 > Z198`, bare SNPs, old + YCC longhand `R1b1a2a1a2c1g`, `n/a`). Only ~20% match a node name directly. + `du_db::haplogroup::resolve_name_or_variant` now has a **normalization fallback** + (`normalize_haplogroup_call`: strip FTDNA prefix, terminal path token, split SNP + synonyms) that resolves the SNP-bearing calls via the existing defining-variant + phase → ~70% of rows (improves the per-sample report AND tree search). Residual: + ~59 YCC-longhand names need an authoritative old-YCC→modern crosswalk (ISOGG file + has only 13 name-aliases — don't hand-guess). Memory `biosample-y-name-resolution`. + **Per-variant upsert perf — DONE (2026-06-12).** The "1s slow-statement" was the + no-op `DO UPDATE SET canonical_name = EXCLUDED.…` rewriting every *pre-existing* + variant row (the catalog is pre-loaded by YBrowse, so the graft/merge/apply calls + nearly all conflict) → MVCC bloat + index churn (~1.9s in bulk, +893 heap pages / + 30k rows). The index is a correct arbiter — not the issue. Fixed: + `du_db::variant::ensure_base_variant_id` (`DO NOTHING` + read-back, zero writes on + conflict); all 3 `get_or_create_variant` route to it. Memory + `variant-upsert-noop-write`. + **YCC→SNP node rename — DONE (2026-06-12).** `tree-init --rename-snp-shorthand` + (`du_db::haplogroup::rename_to_snp_shorthand`) drops YCC-longhand node names + (`R1b1a2`) to `-` (`R-M269`), single major letter + (renormalizes decoding-us `E1b-`→`E-`), keeping the **old YCC name in + `provenance.aliases`** — which also **closes the YCC resolution residual** (the + resolver's alias phase now resolves old biosample YCC calls). Naming SNP: existing + shorthand → ISOGG-designated first variant (`--isogg`) → DB-linked variant + (SNP-shaped only). Macro/backbone nodes, coordinate-name variants, and name + collisions are skipped + flagged (no guessing). Dev-tree dry-run: 10,254/10,516 + renamed; ~185 keep YCC (twin collisions + no-SNP). Run it as a post-graft step in + the cutover tree build. Memory `ycc-to-snp-rename`. +2. **Live AT Protocol OAuth handshake — the cross-host "Edge joint test."** Library + + a dev public-client path are verified locally up to the **consent click** + (gated `decodingus-shared/.../tests/live_pds.rs`: discovery + PAR + DPoP + + `use_dpop_nonce` → `request_uri`, then with a Caddy TLS proxy up to the authorize + page). The confidential web-client `private_key_jwt`-PAR round-trip can't run + under Apple `container` (no `--add-host` for the PDS to resolve our `client_id` + host) → it's the Edge joint test. Token path wired; remainder is the browser + consent + cross-host verify. Runbook: `docs/atproto-oauth-findings.md`, + `docs/atproto-edge-reply.md`. +3. **Haplogroup-discovery AUTOMATION — DONE (2026-06-12).** Both halves now built. + Citizens publish a **`privateVariant` lexicon** record (their variants beyond the + terminal); the Jetstream consumer mirrors it into `fed.private_variant` (mig 0028). + The **discovery consensus engine** (`du_db::discovery`, mig 0029) materializes them + into `tree.biosample_private_variant`, then pools per-sample variant sets into + `tree.proposed_branch` by **variant-set Jaccard** — deterministic, declarative + recompute (idempotent, stable-id UPSERT via a `cluster_key` partial index), config + thresholds from `tree.discovery_config`, real confidence (count + distinct + submitters + variant-set consistency), `READY_FOR_REVIEW`/`SPLIT_CANDIDATE` + transitions, opt-in auto-promote (off by default). On **promotion** the + contributing samples' private variants are marked `PROMOTED` + reassigned to the + new terminal (`discovery::reassign_after_promote`, in `proposal::promote`'s tx) — + which also freezes them out of the recompute loop. Read API + `GET /api/v1/discovery/proposals[/:id]`; the `/curator/proposals` UI now shows + defining variants + confidence + a split banner. Job `du-jobs run-once + discovery-consensus` (+ hourly). Mirrors the sequencer engine's structure. Memory + `discovery-consensus-engine`. **Remaining (future):** split *execution* (flagging + only), a deepest-defined-branch read-path, geographic/temporal confidence signals. +4. **Multi-test-type — DONE (2026-06-12).** The AppView's whole multi-test-type + concern is **call reliability** for the shared genealogy components, with two + inputs (per-test tracking/parsing is Navigator's, not the AppView's). **(a) Coverage + conformance** (below). **(b) Cross-technology consensus** — `fed.haplogroup_reconciliation` + (the donor's call reconciled across all its technologies: consensus_haplogroup + + confidence + snp_concordance + run_count) is now the **authoritative call**. Bridge + = `reconciliation.did = core.biosample.atproto->>'repo_did'` + dna (citizen + self-publish; no schema change). The per-sample **report** resolves + Reconciled→FedConsensus→Original and shows the consensus + N runs + confidence + + concordance (`biosample.rs`, `_pathway.html`, `HaplogroupCallOrigin::Reconciled`). + **Tree evolution gates+weights on it** (mig 0031): the discovery engine **excludes** + contributors below `min_consensus_confidence` (0.5) or `INCOMPATIBLE` (un-reconciled + kept), and **down-weights** proposal confidence by the cluster's mean consensus + reliability (`w_reliability` term) — so the consensus drives the tree, never + individual runs. Memory `discovery-consensus-engine`. + + **(a) Coverage norms & conformance.** Reframed (per the user) from the Scala doc's + haplogroup-marker/accuracy-tier/IBD machinery to grounded coverage QA: **callable + loci + depths per test type vs the norm**. + `genomics.test_type_coverage_norm` (mig 0030) holds the **empirically-derived** + cohort norm per test type (median/p25/p75 depth, median pct tiers, typical Y/mt + marker counts), recomputed from `fed.coverage_summary ⋈ fed.sequencerun` (+ + `fed.genotype` markers) by `du_db::coverage::recompute_norms` (advisory-locked, + declarative; `du-jobs run-once coverage-norms` + hourly). The **per-sample report** + now shows actual depth vs the cohort norm (+ advertised spec) with a BELOW/AT/ABOVE + badge — `conformance()` baselines on the **empirical cohort norm**, not the + advertised aligned bar (a "30× WGS" is a ~90 Gb raw-yield spec; D2C labs don't + target 30× aligned, so an advertised number would mislabel them). Vendor tracking: + `coverage::benchmarks` DTO gains `meets_spec`/`depth_delta` (lab × test type). + Read API `GET /api/v1/test-types[/:code]` (taxonomy + norm). Memory + `test-type-coverage-norms`. **Deferred:** age-contribution wiring (typical SNP + counts captured, not yet fed into `age.rs` — Eq-4 callable-interval); raw-yield + (Gbases) norm; cataloged-coverage union. **Out:** haplogroup marker-coverage / + accuracy-tier, cross-test-type IBD (D1/D3). No `test_type_definition` seed (read + opportunistically; key off the federated test-type string). + (`documents/planning/multi-test-type-roadmap.md`.) +5. **IBD matching — AppView as coordinator (NOT dropped).** The AppView is the + only component with the cross-federation view to identify **introduction + candidates**, so it must: mine `fed.*` for candidate pairs (shared haplogroup, + population overlap, shared-match signals), run the **dual-consent** handshake, + coordinate the Edge hand-off, and **persist match state** (attestations / + overlap scores / suggestions) for ongoing match lists + dedup. It stores **no + raw autosomal data** and does **no** segment comparison — that's Edge-to-Edge. + Schema `ibd` (mig 0007). The **candidate-generation engine is DONE (2026-06-12)** — + the D1-independent first slice: `du_db::ibd::recompute_suggestions` mines + introduction candidates from `fed.*` (population overlap **within ancestry blocks** = + dominant super-pop × z-scored PCA cell; shared terminal Y/mt consensus haplogroup; + 2-hop shared-match over `ibd_discovery_index`), combines + ranks + **caps top-K per + sample** (the no-N:N guarantee), declaratively writing `ibd.match_suggestion` + (preserves DISMISSED/CONVERTED). `du-jobs run-once ibd-discovery-recompute` + daily; + `suggestions_for` reader. Engine-only — **no public API** (candidate pairs gate on + the D1 consent flow). **Federated read API DONE (2026-06-12)** — the entry point of + the whole flow, and it needed **no new auth foundation**: the existing Ed25519 + signed-poll pattern (`verify_signed` + `messages::poll` + 300s window) + the + `core.biosample.atproto->>'repo_did'` bridge the engine already uses dissolved the + apparent DPoP blocker. `du_db::ibd`: `suggestions_for_did` (owner-DID-scoped via the + bridge), `is_suggested_to_did` (introduce authz), `owner_did_of_sample` (server-side + counterpart resolution), `messages::{poll,introduce}`. Endpoints (`routes/ibd.rs`, + signed, **personal scope** — not project-scoped): `GET /api/v1/ibd/suggestions` (own + **pseudonymous** candidates — only `suggested_sample_guid` + non-PII `{signals}` + scores), `POST /api/v1/ibd/introduce` (broker-mediated: resolves the counterpart DID + server-side, calls `exchange::create_request`, **never returns the DID** — caller learns + it only post-mutual-consent via `exchange::pending_for`). **Lifecycle round-off + (2026-06-13):** purpose is now **routed per signal** (HAPLOGROUP→IBD_Y/IBD_MT via the + engine's recorded `hgDnaType`, else IBD_AUTOSOMAL — `introduction_purpose`); introduce + marks the suggestion **CONVERTED**; new `POST /api/v1/ibd/dismiss` → DISMISSED (engine + preserves it). Memory `ibd-candidate-generation`. **Remaining (needs D1/Navigator):** the + daily recompute scheduler exists (confirm cadence); attestation-ingest + + `depth_score` from the tree; PCA-LSH tuning; Navigator consume-UI + + introduce→consent→relay round-trip. Authoritative design: + `documents/planning/d3-ibd-matching-impl.md` + §3 (on `d1-encrypted-edge-exchange.md`). + **D1 exchange BROKER DONE (2026-06-12)** — the shared substrate gating the Match + + Platform tracks. `exchange.*` schema (mig 0032; the unused `ibd.match_*` folded + + dropped) + `du_db::exchange` (publish/fetch X25519 key, request, **dual-consent + gate** → session, **`incoming`** [PENDING requests awaiting a recipient — closes the + introduce→consent loop, **symmetric-blind**: no initiator DID], pending, blind relay + post/pull/ack, TTL `expire`) + `du-web` `/api/v1/exchange/*` endpoints, all + **Ed25519-signature-authenticated** (`crate::sig::verify_signed` — **no OAuth/cookie** + per call, so D1 doesn't wait on the OAuth joint test) + `du-jobs exchange-expire`. + PII-free broker — never sees plaintext/keys, relays opaque ciphertext. Memory + `exchange-broker`. + **DEVICE-KEY AUTH FOUNDATION DONE (2026-06-13)** — fixes the gap that the DID-doc + `#atproto` signing key is PDS-custodied (a desktop client can't sign with it, can't add + its own verificationMethod), so only `did:key` could authenticate. Now a client + publishes its Ed25519 device PUBLIC key as a `com.decodingus.atmosphere.deviceKey` record + in its own repo (repo-write = proof of control over repo_did); the AppView ingests it + (`fed.device_key`, mig 0036) like any `fed.*` record. **`verify_signed(pool, did, msg, + sig)`** now: `did:key` self-certifies; **`did:plc/web` ⇒ match any registered device key** + (`du_db::fed::device_key::keys_for`; none ⇒ 403, the bootstrap), DID-doc resolution + dropped (no per-call network). N keys per DID; **revoke = delete the record** (routes + through `fed::delete`). All 18 signed call sites thread `&st.pool`. PII-free (DID + public + key only). Memory `device-key-auth`. Navigator: generate+keychain a device key → one-time + OAuth `createRecord` → sign all Edge calls with it. **Remaining (not AppView):** the + `du-exchange` crypto crate (X25519/AEAD/X3DH-lite, `decodingus-shared`) + the Navigator Edge + relay client/session driver (DUNavigator) for the end-to-end round-trip. +6. **Collaboration + social layer.** The genealogy-collaboration platform (group + projects, ResearchSubject registry, assertions) is specced in **D2/D4/D5** on the + D1 channel. **D2 ResearchSubject registry DONE (2026-06-12)** — `research.*` schema + (mig 0033: PII-free pseudonymous person nodes + `social.group_project` memberships + + tombstone merge audit + sparse biosample link) + `du_db::research` + `du-web` + `/api/v1/research/*` endpoints, **signature-authenticated** (`crate::sig`, shared + with D1) **and authorized** from existing data (register → project owner; merge → + steward of both; custody → subject steward; read → project participant). PII-free + invariant holds. Memory `research-subject-registry`. **D5 group-project ACL DONE + (2026-06-12)** — `research.project_member` (mig 0034, reusing `social.group_project` + as the project, `owner_did`=founding ADMIN) + `du_db::research` Role/Capability ACL + (`role_of`/`is_team_member`/`can`/`add_member`/`revoke_member`/`members_of`), **wired + in**: D2 register is `ManageSubjects`-gated + subjects read team-gated; D1 + project-scoped request/consent require live team membership; team endpoints + `/api/v1/research/project/{member,member/revoke,members}` (signed, ADMIN-gated). + Memory `group-project-acl`. **D4 assertion store (R2) DONE (2026-06-12)** — + `research.assertion` + `research.subject_current_view` (mig 0035), the attributed, + append-only, scoped claim primitive. `du_db::research`: `Predicate` enum + PII + classifier (`MDKA_IS`/`IDENTITY` have **no** AppView table — R3 P2P only; `NOTE` is + PII-by-default, storable only when `pii_cleared`; a `scan_pii` value scrubber rejects + emails/overlong text regardless) + `record_assertion`/`retract_assertion`/`refold` + (per-(subject,predicate,scope) fold → SETTLED|DISPUTED, **per-project isolated** so a + subject in two projects never bleeds claims) + `accept_same_person` (drives the D2 + `merge_subjects(method=ASSERTION)`, **never** auto-collapsed). Endpoints + `/api/v1/research/{assertion,assertion/retract,assertion/resolve,current-view}` — + signed + role-gated (`WriteAssertions`=ADMIN/CO_ADMIN; `ResolveDispute`=ADMIN/CURATOR). + Memory `assertion-store`. **Deferred (Navigator/R1/later):** R3 PII over D1 + + `assertion_local`; R1 `com.decodingus.research.assertion` lexicon + du-jobs Jetstream + ingest (no publisher yet — `record_uri`/PUBLIC scope already in place); catalog + promotion via `tree.change_set`; shared `du-domain` assertion types. The broader + social surfaces (messaging/feed/reputation/blocks) are the + reconciled forward proposals (`documents/proposals/{group-project-system, + Messaging_and_Feed_System,Reputation_System_Implementation}.md`). Schema `social` + (mig 0009) exists; logic + endpoints to build. **No-PII caveat:** DMs must ride D1 + (or AT-Proto), not a central plaintext `social.message`. +7. **Sequencer-lab inference — AppView lookup + consensus (NOT dropped).** The + **lookup API is DONE (2026-06-12)**: `GET /api/v1/sequencer/lab?instrument_id=…` + (→ `SequencerLabDto`, 404 if unknown) + `GET /api/v1/sequencer/lab-instruments` + (bulk cache seed), resolving via the **preseeded** `genomics.sequencer_instrument. + lab_id` (mig 0025 re-adds it; ETL backfills from the legacy tie; + `du_db::sequencer`). **Seeded (2026-06-13, mig 0038):** the old YDNA-Warehouse d2c + instrument→lab map — **5 labs + 36 instruments** (rows with `n_crams > 2`, max-frequency + lab; canonical full names FTDNA→Family Tree DNA / Dante Labs / Nebula Genomics / Full + Genomes Corporation / YSEQ, all `is_d2c`; `model_name`=export platform, `manufacturer` + derived). Idempotent (`ON CONFLICT (name) DO NOTHING` / `(instrument_id) DO UPDATE`); + the dev DB had 0 labs (legacy `public.sequencing_lab` is empty — hence the need). Source + `instrument_centers.tsv` (repo root, reference only). `lab_instruments.tsv` is just a + different view of the same data — already captured, no separate work. The + proposal/consensus path is **not live anywhere**, so the + lookup uses the direct tie (memory `sequencer-lab-lookup`). The **consensus + engine is DONE (2026-06-12)**: `du_db::sequencer::recompute_consensus` derives + observations from `fed.sequencerun ⋈ fed.biosample.center_name`, aggregates per + instrument into `instrument_association_proposal` (dominant lab, distinct-citizen + counts, confidence, threshold status, conflict→PENDING), run by `du-jobs run-once + sequencer-consensus` (+ hourly). Curator API `/manage/instrument-proposals[/:id[/ + accept|/reject]]` — **accept sets `sequencer_instrument.lab_id`** (closing the loop + to the lookup), audited in-transaction via `du_db::audit::log`. Hardened for + production (mig 0026): is_d2c no longer clobbered, audit joins the mutation tx, + stable proposal ids (UPSERT not DELETE+re-INSERT), `pg_try_advisory_lock` guard, + aggregation/queue indexes. The **curator HTMX review UI is DONE (2026-06-12)** — + two-panel queue at `/curator/instrument-proposals` (status-filter chips, proposal + detail with supporting observations, accept form [lab/manufacturer/model/d2c] + + reject-with-reason), Curator-gated, i18n en/es/fr, on the dashboard. The + **`instrumentObservation` lexicon is DONE (2026-06-12)**: citizens publish + `com.decodingus.atmosphere.instrumentObservation` (real confidence KNOWN/INFERRED/ + GUESSED + `observedAt`); the Jetstream consumer mirrors them into + `fed.instrument_observation` (mig 0027, `du_db::fed::instrument_observation`), and + `recompute_consensus` folds them in next to the implicit `center_name` claims with + **real confidence-level + recency scoring** (the score's recency/level terms were + constants, now computed in SQL). **D8 is complete** — remaining ideas are the + "Future Considerations" list (flowcell-level tracking, geographic inference, + publication cross-ref, reputation weighting). (`documents/planning/sequencer-lab-inference-system.md`.) +8. **Smaller in-scope finishers:** + - **Graft carries coordinates forward** at creation (fold into + `get_or_create_variant`) so the decoding-us backfill isn't needed after each + re-graft. + - **YBrowse reconcile tail:** off-by-one / near-coordinate proximity detection; + an external synonym authority (YFull/ISOGG cross-refs) to assert "X = Y" across + genuinely different coordinates; per-name evidence consolidation. + - **WIP/merge review:** `EDIT_VARIANTS` resolution + cascading a graft-blocked + *subtree* from a single decision. + - **Branch age:** the McDonald model is built end-to-end (PDF engine, SNP + STR + tree propagation, multi-step `P(g|m)`, genealogical anchors, PDF-product combine, + seeded STR rates). Remaining refinements are **data-shaped, not architectural**: + the true b̄ coverage *intersection* (Eq 4 — needs per-sample callable intervals), + the Eq 9/10 causality back-correction, the PDF-at-scale perf check once a + densely-sampled subtree exists, and the lone missing single-copy STR rate (DYS447). + - **API:** surface unnamed variants (cross-repo change — `du-domain::Variant. + canonical_name` `String` → `Option`, shared with Navigator). + - More `fed.*` report shapes (genotype-provider mix, platform/test-type + distribution) as the UI needs them. +9. **Tech debt** — JSONB consolidation is **done** (realized in the de-sprawl, + mig 0002/0004 — that analysis doc was removed); terms/privacy prose now mirrors + the legacy Scala content but is still "subject to legal review"; optional + internal/curator OpenAPI document; harden `du_db::variant::get_by_id` for a NULL + `canonical_name` (unnamed-variant edge). + +## Out of scope / deliberately absent (➖) — do NOT build + +- **Manual sample-ingestion APIs** (biosample create + sequences + publication-link) + — curators use Navigator now; the AppView keeps catalog **review + naming** only. +- **BAM/CRAM extraction + variant calling** — done at the edge (Navigator); the + AppView aggregates summaries/proposals (so `du-bio` is text + coordinate math, no + htslib/noodles). +- **The legacy PDS fleet / raw-data network mirror** — `fed.pds_node` / + `pds_heartbeat` / fleet-admin tables (mig 0008) map to the **dropped** mirror + design; don't build registration/heartbeat/fleet endpoints. +- **AppView→PDS backfeed** — the AppView writes nothing back to PDSes (inbound-only + / notify-fetch direction). +- **Patronage / billing** — not in production (`billing` placeholder; no logic). + **Deferred, not dead:** revive to fund infrastructure past ~a few hundred active + users (`documents/proposals/Patronage_Donation_System.md`; FAQ already names it). +- (IBD matching, the social layer, and sequencer-lab inference are **back in + scope** — see "What's left" items 5–7. Their schemas remain placeholders pending + that build.) + +## Cutover blocker — VERIFIED (2026-06-04) + +The ETL has been run end-to-end against a **real production dump** +(`/Users/jkane/backup_file.sql`, 363 MB, PG 15.18) and **all 34 aggregates +reconcile**. Schema risk was already retired (`~/db.schema` is current prod); +this run retired the data risk too. + +How it was run (repeatable): +1. `CREATE ROLE decoding_us_user;` (the dump owns objects as this role), then a + fresh `decodingus_prod` DB. +2. Load the dump, stripping the two `\restrict`/`\unrestrict` lines — the + container psql is **16.4**, which predates those meta-commands (added in the + Sept-2025 security releases): `grep -vE '^\\(un)?restrict' dump.sql | + container exec -i du-pg psql -U postgres -d decodingus_prod -q`. +3. `decodingus-migrate --legacy --target ` + (recreate the target first; the run migrates + transforms + reconciles). + +**The variant fold (commits fbc298a → cd37657):** legacy `public.variant` is one +row per (SNP, build, mutation DIRECTION); `core.variant` is one row per physical +SNP **site**. The transform folds by site (`dense_rank` over position) and carries +per-branch ancestral/derived onto `tree.haplogroup_variant` (ASR model — see +[[etl-cutover-verified]] / migration 0021). Real-data: variant 3,023,051 → +2,899,782; haplogroup_variant → 86,744; all aggregates reconcile. + +## Cutover strategy (chosen 2026-06) — read-only freeze, prepare local, ship to AWS + +1. **Freeze prod** read-only (no write drift during migration). +2. **Take a fresh dump**; load locally → `decodingus_prod` (role `decoding_us_user` + + strip `\restrict` lines for psql 16; see above). +3. **Prepare locally** (the new-schema DB): + - ETL the **non-tree** data (donors, biosamples, pubs, variants, genomics) — + all reconcile today. + - Build the **tree separately, ISOGG-founded** (the chosen direction — see + "Tree build direction" below): `tree-init --isogg --apply` then + `--merge-prod --snp-graft --graft --apply`. + - (Optional) run the YBrowse ingest for full coordinate coverage. +4. **Ship to AWS:** `pg_dump -Fc` the prepared DB, restore on AWS, point the new + codebase at it, flip. + +**The one ETL change this needs:** the ETL currently *migrates the prod +decoding-us tree*; for the ISOGG-founded build it must **skip the tree transforms** +(`haplogroup` / `haplogroup_relationship` / `haplogroup_variant`) and leave the +tree to `tree-init`. Add a `--skip-tree` flag (or split tree transforms out). NOT +yet built. + +**Two integration points to settle:** +- **Name resolution must be alias-aware.** `biosample→haplogroup` is by **name**, + not FK (`core.biosample.original_haplogroups` JSONB, `fed.biosample.y/mt_haplogroup` + text) — so an ISOGG-founded tree works *because* decoding-us names live as + aliases on the ISOGG nodes. Verify tree-search + biosample views resolve via + aliases once a flip DB has data. +- **Postgres version.** Prod dump is PG 15; the local container is 16. `pg_dump` + 16 → restore into 15 can break — run the new code's AWS instance on **PG 16** + (match local) or pin local to 15. + +### Tree build direction — ISOGG foundation + SNP-graft everything (decided) + +Sources differ in naming (ISOGG path-strings vs decoding-us/FTDNA SNP-names) AND +root depth, so the exact-set name merge (`du_db::haplogroup::merge_into` / +`du_domain::merge`) is useless cross-source — its subtree-scoping cascades a +root-topology mismatch to NEW (matched=1, would duplicate 10,230 nodes). Use the +**SNP-anchored graft** (`du_db::snp_graft`, `tree-init … --snp-graft`). Full +investigation + recipe in memory [[tree-source-merge]]. Decisions: + +- **ISOGG is the foundation** (single `Y` root + curated backbone authority), then + graft decoding-us and FTDNA onto it. The reverse (FTDNA- or decoding-us-founded) + drops the deep-root region, becomes a rooted forest, and/or inverts naming + authority. Build: + `tree-init --isogg --apply` → + `--merge-prod --snp-graft --graft --apply` → + `--ftdna --graft --reattach --apply`. +- **`--reattach` is required for FTDNA** (105k-node complete-topology source, + `/Volumes/nas/FTDNA/`, refreshed weekly). FTDNA merges SNP blocks ISOGG splits, + so a bush's backbone ancestor is often weak-flagged and the graft conservatively + *blocks* it (would drop 56,855 of 70,921). Reattach walks up to the nearest + ancestor the classifier cleanly **MATCHED** and attaches the bush there. (First + cut used a raw SNP→node index and dumped clades onto A00 — the catalog's junk + recurrent links, see "junk links" below, point single SNPs at basal nodes; + MATCH dispositions are vetted by SNP-set + subtree scope, so they don't.) +- **Source tags parameterized** (commit 0e09060) — any source tags its own nodes; + the anchor/collision guard excludes only that source's prior graft. +- **Result `decodingus_hybrid2`: 81,297 nodes, single ISOGG root**, ISOGG-named + backbone + decoding-us + full FTDNA depth (70,748 bushes; 16,117 reattached; + 173 unanchored; ~19 land on `CT`), source names folded in as aliases, ~42k + variants coord-enriched from FTDNA anc/der+position. Spot-verified: + `I-BY136871 → I1a3a1b`, `G-FTH55879 → G2a2b2a1a1b1a1`; basal nodes near-empty. +- **JUNK LINKS — SCRUBBED (commit 7a0487d).** ~1.2k catalog variants were linked + to haplogroups across unrelated macro-clades (decoding-us ASR scatter onto + A00/H/O; also FTDNA shared-SNP blocks), which nearly broke the FTDNA reattach. + `du_db::haplogroup::scrub_recurrent_links` (`tree-init --scrub-recurrent + [--apply]`) keeps each variant's primary (most-concentrated) lineage — by tree + ancestry, not names — and soft-deletes the off-lineage occurrences (self-name + tiebreak for fully-scattered cases, e.g. `CTS9108`). Operates only on + `haplogroup_variant`, never on topology. Applied to `decodingus_hybrid2`: + cross-macro-clade variants 1,200 → 1, 10,908 links pruned, 81,297 nodes + unchanged. The 45 residue on `decodingus_etl` are legitimate basal chains + (Y→I1, NO→O) the ancestry criterion correctly keeps. + +### mtDNA tree — FTDNA-only foundation (wired, commit b7c9748) + +Legacy prod has only Y (2,695 nodes), **zero MT** — no decoding-us mt source, mt +API, or biosample mt assignment. So the mt-tree is FTDNA-only: load FTDNA's single +RSRS-rooted mt haplotree as the **foundation** (merge_into into an empty MT +namespace) — no graft/merge/reattach/scrub. +- `tree-init --ftdna /Volumes/nas/FTDNA/ftdna_mttree.json --ftdna-foundation + --dna MT --apply`. `ftdna_foundation_roots` builds the nested merge tree. +- **Privacy differs from the Y graft:** `kitsCount==0` on RSRS and internal splits + means "no kit terminates here", NOT "private individual" — backbone/internal + nodes are kept; only private LEAVES (kits==0, no kept descendants) drop. +- **NOT scrubbed:** mtDNA homoplasy (16189, 152, …) is real and FTDNA-curated; + 1,759 multi-branch variants are legit recurrence, not ASR junk. +- Variants are RSRS-frame (`G263A`: anc G, der A @263 — rCRS has A there); + coordinates `{chrM, position, ancestral, derived}`. +- Verified on `decodingus_hybrid2`: 4,740 nodes, single RSRS root, 56 backbone + clades, correct PhyloTree topology (L0`. +- **utoipa** kept out of `du-domain` (shared with Navigator/edge); API DTOs + + `From` impls live in `du-web/api.rs`. Recursive `HaplogroupNodeDto.children` + needs `#[schema(no_recursion)]`. +- **Management API namespace**: curator/machine endpoints live under **`/manage/*`** + (not `/api/v1`) and are excluded from the public OpenAPI doc. Auth is + session/`Curator`; unauth → 303 to /login even for JSON endpoints. +- **`DbError::Conflict` → HTTP 422** (mapped in `du-web/error.rs`). +- **ETL preserves PKs** via `OVERRIDING SYSTEM VALUE` + `sample_guid`; sequences + fixed up post-load; idempotent upserts. +- **i18n**: adding a UI string requires es/fr entries — `cargo test -p du-web` + enforces parity. + +## Resume checklist + +1. `eval "$(./scripts/test-db.sh up)"` (or set `DATABASE_URL`); confirm `du-pg` + container is running. +2. `cargo test --workspace` (du-domain needs no DB; du-db live tests provision + ephemeral DBs from `DATABASE_URL`). +3. Pick the next arc — launch-critical is **cutover** + the **OAuth Edge test**. + Post-launch: the **collaboration/IBD platform** starts at **D1** (the shared + encrypted-exchange substrate, `documents/planning/d1-encrypted-edge-exchange.md`), + then D2→D4→D5 (platform) / D3 (IBD); the **Catalog** track (haplogroup-discovery + automation = D6, multi-test = D7, sequencer-lab = D8) is independent. Remaining + doc cleanup: `documents/atmosphere/` still references removed docs (flagged in the + triage reports). +4. Reload the mock if needed: recreate `decodingus_legacy`, load + `scripts/mock-legacy.sql`; recreate `decodingus_etl`; run `decodingus-migrate`. + +## Reference paths + +- **Post-launch design specs:** `documents/planning/d1`–`d5` + + `design-roadmap-rust-rewrite.md` (collaboration/IBD platform, no-PII broker). +- **Design-doc triage reports:** `documents/planning/design-doc-triage-report.md`, + `documents/proposals/triage-report.md` (what was removed/reconciled + remaining + `atmosphere/` ref cleanup). +- Prod schema (authoritative for ETL, confirmed current 2026-06): `~/db.schema` +- Old data dump (may lag — get a fresh one for cutover): `/Volumes/nas/stuff/dump.sql` +- AT Proto notes: `docs/atproto-oauth-findings.md`, `docs/atproto-edge-reply.md` +- **Scala↔Rust functional diff catalog: `docs/scala-vs-rust-diff.md`** +- Navigator atmosphere docs: `/Users/jkane/Development/DUNavigator/documents/atmosphere` diff --git a/rust/compose.yaml b/rust/compose.yaml new file mode 100644 index 00000000..d66ba574 --- /dev/null +++ b/rust/compose.yaml @@ -0,0 +1,43 @@ +# DecodingUs (Rust) — production-ish compose. Works with Docker or Apple +# `container compose`. Mirrors the deployment intent of the legacy compose. +# +# docker compose up --build (or: container compose up --build) + +services: + db: + image: postgis/postgis:16-3.4 + environment: + POSTGRES_PASSWORD: ${DU_PG_PASSWORD:-dev} + POSTGRES_DB: ${DU_PG_DB:-decodingus} + ports: + - "5432:5432" + volumes: + - du-pgdata:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres -d ${DU_PG_DB:-decodingus}"] + interval: 10s + timeout: 5s + retries: 10 + + app: + build: + context: . + dockerfile: Dockerfile + environment: + DATABASE_URL: postgres://postgres:${DU_PG_PASSWORD:-dev}@db:5432/${DU_PG_DB:-decodingus}?sslmode=disable + APP_SECRET: ${APP_SECRET:-changeme} + RUST_LOG: ${RUST_LOG:-info,du_web=debug} + ports: + - "9000:9000" + depends_on: + db: + condition: service_healthy + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:9000/health"] + interval: 30s + timeout: 10s + retries: 3 + +volumes: + du-pgdata: diff --git a/rust/crates/du-db/Cargo.toml b/rust/crates/du-db/Cargo.toml new file mode 100644 index 00000000..35e7fa33 --- /dev/null +++ b/rust/crates/du-db/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "du-db" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true + +# Data-access layer: SQLx pool + per-aggregate query modules. Runtime-checked +# queries for now (no live DB in this environment); migrate to compile-time +# `query_as!` + committed `.sqlx` offline cache once a dev DB is reachable. +[dependencies] +du-domain = { workspace = true } +sqlx = { workspace = true } +tokio = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +uuid = { workspace = true } +chrono = { workspace = true } +thiserror = { workspace = true } +tracing = { workspace = true } + +[dev-dependencies] +chrono = { workspace = true } +du-domain = { workspace = true } diff --git a/rust/crates/du-db/build.rs b/rust/crates/du-db/build.rs new file mode 100644 index 00000000..9271537b --- /dev/null +++ b/rust/crates/du-db/build.rs @@ -0,0 +1,7 @@ +// `sqlx::migrate!` embeds the migrations directory at COMPILE time. Without this +// hint, adding/editing a .sql file does not rebuild du-db, so the embedded set +// goes stale and migrations silently fail to apply. Watch the directory so any +// change forces a recompile. +fn main() { + println!("cargo:rerun-if-changed=../../migrations"); +} diff --git a/rust/crates/du-db/src/age.rs b/rust/crates/du-db/src/age.rs new file mode 100644 index 00000000..8cb3c851 --- /dev/null +++ b/rust/crates/du-db/src/age.rs @@ -0,0 +1,653 @@ +//! Combined branch-age estimation (McDonald 2021 — see +//! `documents/proposals/branch-age-estimation.md`). Independent evidence terms +//! (STR variance, SNP counting, genealogical/aDNA anchors) are each stored as a +//! method-labeled row in `tree.haplogroup_age_estimate`; this module computes the +//! SNP and genealogical terms and **combines all available terms** by the direct +//! product of their PDFs (McDonald Eq 1, `P(t|e)=k·∏P(t|eᵢ)`) — preserving each +//! term's non-Gaussian shape (Poisson skew, STR convergent-mutation tails) rather +//! than inverse-variance-averaging medians. It writes a `COMBINED` estimate and +//! gap-fills `tree.haplogroup.tmrca_ybp` (a curated value is never overwritten). +//! Disjoint terms (no overlapping support) fall back to the inverse-variance +//! Gaussian combine, which can't annihilate. +//! +//! The STR term is produced by [`crate::ystr`]. SNP/genealogical terms are +//! data-gated: they only emit where private-variant/callable-loci or anchor data +//! exists (sparse until ETL cutover / curation), but the framework is correct and +//! extends to the full combined age as that data lands. + +use crate::pdf::Pdf; +use crate::DbError; +use sqlx::PgPool; +use std::collections::{BTreeMap, BTreeSet, HashMap}; + +/// MSY combined SNP mutation rate (SNPs/bp/year, Helgason 2015). This is the rate +/// the model applies — see `documents/proposals/branch-age-estimation.md`. +pub const SNP_RATE: f64 = 8.33e-10; + +/// Independent cross-check clock from Hallast et al. 2026 (142 population-scale Y +/// assemblies, BEAST v1.10.4 strict molecular clock on the X-degenerate mask): +/// **0.76 × 10⁻⁹ sub/site/yr (95% CI 0.67–0.86 × 10⁻⁹)** — ~9% slower than +/// Helgason. Recorded for provenance/comparison **only**; `recompute_combined_ages` +/// does *not* swap to it (a slower clock makes every TMRCA ~9% older). Use it to +/// sanity-check our SNP ages or to bound the rate-uncertainty band, not as the +/// default. CI bounds: [`HALLAST_RATE_LO`], [`HALLAST_RATE_HI`]. +pub const HALLAST_RATE: f64 = 0.76e-9; +pub const HALLAST_RATE_LO: f64 = 0.67e-9; +pub const HALLAST_RATE_HI: f64 = 0.86e-9; + +/// "Before present" reference year (radiocarbon convention) for calendar anchors. +pub const PRESENT_YEAR: i32 = 1950; + +// ── PDF-based tree propagation (McDonald 2021 §2.2, Eq 5–8) ─────────────────── +// +// The SNP age of a clade is built bottom-up: a node's TMRCA is the product over +// its children of (the child's own TMRCA convolved with the parent→child branch +// time), per Eq 8. Each factor is a Poisson age PDF (Eq 3) over the branch's SNP +// count and callable bp. A node's "formed" age is its TMRCA convolved with its +// own branch time — i.e. when its lineage split from its parent. This is the pure +// algorithm; `recompute_combined_ages` (below) supplies the DB-derived inputs. +// +// Not yet modelled here (documented follow-ups): the exact b̄ coverage +// *intersection* across sub-clades (Eq 4 — needs per-sample callable intervals, +// not just totals), and the Eq 9/10 causality back-correction (the bottom-up +// convolution already keeps a parent older than its children in the common case). + +/// One clade (haplogroup node) of the propagation input. +#[derive(Debug, Clone, Default)] +pub struct Clade { + /// SNPs on the edge from this node's parent down to it (`m_{parent→node}`): + /// the branch time when this node feeds its parent, and its own "formed" age. + /// 0 for a root. + pub branch_snps: i64, + /// Effective callable bp (`b̄`) over which this clade's SNPs are counted. + pub callable_bp: f64, + /// Child clade indices. + pub children: Vec, + /// Private-SNP counts of testers sitting directly on this node (terminal tips); + /// each contributes a Poisson age factor (tester birth ≈ present is omitted as + /// a negligible offset). + pub tester_snps: Vec, +} + +/// A clade's computed age PDFs. +#[derive(Debug, Clone)] +pub struct CladeAge { + /// TMRCA of the node's sampled descendants. + pub tmrca: Pdf, + /// When the node's lineage split from its parent (`TMRCA ⊛ branch time`). + pub formed: Pdf, +} + +/// Grid for the whole-tree propagation. Coarser/wider than the PDF default: Y +/// TMRCAs run from recent surname clades to ~300 ky (A00), so 50-yr bins over +/// 350 ky keep convolution affordable while spanning the deepest nodes. +pub const TREE_RESOLUTION_YEARS: f64 = 50.0; +pub const TREE_MAX_AGE_YEARS: f64 = 350_000.0; + +/// Branch-time PDF for clade `x`: `P(t | m_branch)` over its callable bp. +fn branch_time(clades: &[Clade], x: usize, mu: f64, res: f64, max_age: f64) -> Pdf { + Pdf::poisson_on(clades[x].branch_snps, clades[x].callable_bp, mu, res, max_age) +} + +fn compute_tmrca( + i: usize, + clades: &[Clade], + mu: f64, + res: f64, + max_age: f64, + memo: &mut [Option>], +) { + if memo[i].is_some() { + return; + } + memo[i] = Some(None); // guard against accidental cycles + let mut factors: Vec = Vec::new(); + for &ch in &clades[i].children { + compute_tmrca(ch, clades, mu, res, max_age, memo); + if let Some(Some(ct)) = &memo[ch] { + factors.push(ct.convolve(&branch_time(clades, ch, mu, res, max_age))); + } + } + for &s in &clades[i].tester_snps { + factors.push(Pdf::poisson_on(s, clades[i].callable_bp, mu, res, max_age)); + } + let result = factors.split_first().map(|(first, rest)| { + rest.iter().fold(first.clone(), |acc, f| acc.multiply(f)) + }); + memo[i] = Some(result); +} + +/// Compute every clade's TMRCA + formed-age PDFs bottom-up (Eq 8) on a +/// `res`-year grid spanning `[0, max_age]`. A clade with no evidence (no children +/// with ages, no testers) yields `None`. +pub fn propagate(clades: &[Clade], mu: f64, res: f64, max_age: f64) -> Vec> { + let mut memo: Vec>> = vec![None; clades.len()]; + for i in 0..clades.len() { + compute_tmrca(i, clades, mu, res, max_age, &mut memo); + } + (0..clades.len()) + .map(|i| { + let Some(Some(tmrca)) = memo[i].take() else { return None }; + let formed = tmrca.convolve(&branch_time(clades, i, mu, res, max_age)); + Some(CladeAge { tmrca, formed }) + }) + .collect() +} + +/// SNPs in heterochromatic sequence are masked from age counting — they sit +/// outside the callable denominator (`y_xdegen+y_ampliconic+y_palindromic`) and +/// the paper excises recurrent regions self-consistently (Appendix A.2/A.3). +/// Ampliconic and palindromic SNPs are kept (same rate as X-degenerate). This is +/// a SQL fragment testing `core.variant v` for any `heterochromatin:` overlap. +const HET_MASK: &str = "NOT EXISTS (SELECT 1 FROM \ + jsonb_array_elements_text(COALESCE(v.annotations->'region_overlaps','[]'::jsonb)) e \ + WHERE e LIKE 'heterochromatin:%')"; + +/// Build the propagation input from the current Y tree: nodes, parent→child +/// edges, het-masked branch (defining) SNP counts, and per-node tester data +/// (active private-SNP counts + callable bp). Returns `(clades, haplogroup_ids)` +/// where `haplogroup_ids[i]` is the DB id of clade `i`. +async fn build_clades(pool: &PgPool) -> Result<(Vec, Vec), DbError> { + // Stable index over current Y nodes. + let ids: Vec = sqlx::query_scalar( + "SELECT id FROM tree.haplogroup \ + WHERE haplogroup_type='Y_DNA'::core.dna_type AND valid_until IS NULL ORDER BY id", + ) + .fetch_all(pool) + .await?; + let idx: HashMap = ids.iter().enumerate().map(|(i, &id)| (id, i)).collect(); + let mut clades = vec![Clade::default(); ids.len()]; + + // Edges → children (a child carries its own branch SNPs). + let edges: Vec<(i64, i64)> = sqlx::query_as( + "SELECT c.id, p.id FROM tree.haplogroup_relationship r \ + JOIN tree.haplogroup c ON c.id=r.child_haplogroup_id AND c.valid_until IS NULL \ + AND c.haplogroup_type='Y_DNA'::core.dna_type \ + JOIN tree.haplogroup p ON p.id=r.parent_haplogroup_id AND p.valid_until IS NULL \ + WHERE r.valid_until IS NULL", + ) + .fetch_all(pool) + .await?; + for (c, p) in edges { + if let (Some(&ci), Some(&pi)) = (idx.get(&c), idx.get(&p)) { + clades[pi].children.push(ci); + } + } + + // Branch defining-SNP counts (het-masked). + let branch: Vec<(i64, i64)> = sqlx::query_as(&format!( + "SELECT hv.haplogroup_id, count(*)::bigint FROM tree.haplogroup_variant hv \ + JOIN core.variant v ON v.id=hv.variant_id \ + WHERE hv.valid_until IS NULL AND {HET_MASK} GROUP BY hv.haplogroup_id" + )) + .fetch_all(pool) + .await?; + for (hg, n) in branch { + if let Some(&i) = idx.get(&hg) { + clades[i].branch_snps = n; + } + } + + // Testers: per (node, sample) active private-SNP count (het-masked) + that + // sample's Y callable bp (xdegen+ampliconic+palindromic, else total). + let cbp = "COALESCE(NULLIF(COALESCE(cl.y_xdegen_callable_bp,0)+COALESCE(cl.y_ampliconic_callable_bp,0)\ + +COALESCE(cl.y_palindromic_callable_bp,0),0), cl.total_callable_bp, 0)"; + let testers: Vec<(i64, i64, f64)> = sqlx::query_as(&format!( + "SELECT pv.terminal_haplogroup_id, count(*)::bigint, max({cbp})::float8 \ + FROM tree.biosample_private_variant pv \ + JOIN core.variant v ON v.id=pv.variant_id \ + LEFT JOIN genomics.biosample_callable_loci cl \ + ON cl.sample_guid=pv.sample_guid AND cl.chromosome IN ('chrY','Y') \ + WHERE pv.status='ACTIVE' AND pv.haplogroup_type='Y_DNA'::core.dna_type \ + AND pv.terminal_haplogroup_id IS NOT NULL AND {HET_MASK} \ + GROUP BY pv.terminal_haplogroup_id, pv.sample_guid" + )) + .fetch_all(pool) + .await?; + let (mut bp_sum, mut bp_cnt) = (vec![0.0f64; ids.len()], vec![0u32; ids.len()]); + for (hg, snps, b) in testers { + if let (Some(&i), true) = (idx.get(&hg), b > 0.0) { + clades[i].tester_snps.push(snps); + bp_sum[i] += b; + bp_cnt[i] += 1; + } + } + + // Representative b̄ per node: mean of its testers' callable bp, else the + // catalog-wide mean (so SNP-less internal branches still get a branch time). + let default_b: f64 = sqlx::query_scalar::<_, Option>(&format!( + "SELECT avg({cbp})::float8 FROM genomics.biosample_callable_loci cl WHERE cl.chromosome IN ('chrY','Y')" + )) + .fetch_one(pool) + .await? + .filter(|b| *b > 0.0) + .unwrap_or(15_000_000.0); + for i in 0..ids.len() { + clades[i].callable_bp = if bp_cnt[i] > 0 { bp_sum[i] / bp_cnt[i] as f64 } else { default_b }; + } + + Ok((clades, ids)) +} + +/// Combine independent Gaussian age estimates `(mean_ybp, sigma_ybp)` by +/// inverse-variance weighting: `µ = Σ(wᵢµᵢ)/Σwᵢ`, `σ² = 1/Σwᵢ`, `wᵢ = 1/σᵢ²`. +/// A non-positive sigma falls back to 25% of the mean (min 1) so a point estimate +/// without a usable CI still contributes (weakly). Returns `(mean, sigma)`. +pub fn combine(estimates: &[(f64, f64)]) -> Option<(f64, f64)> { + let mut wsum = 0.0; + let mut wxsum = 0.0; + for &(mean, sigma) in estimates { + let s = if sigma > 0.0 { sigma } else { (mean * 0.25).max(1.0) }; + let w = 1.0 / (s * s); + wsum += w; + wxsum += w * mean; + } + if wsum <= 0.0 { + return None; + } + Some((wxsum / wsum, (1.0 / wsum).sqrt())) +} + +#[derive(sqlx::FromRow)] +struct AnchorRow { + haplogroup_id: i64, + date_ce: Option, + carbon_date_bp: Option, + uncertainty_years: Option, +} + +#[derive(Debug, Default)] +pub struct CombineStats { + pub snp: usize, + pub genealogical: usize, + pub combined: usize, +} + +/// Recompute the SNP and genealogical age terms, then the COMBINED estimate for +/// every branch with ≥1 term, gap-filling `tmrca_ybp`. COMBINED is the direct PDF +/// product (Eq 1) of the SNP TMRCA PDF (propagation), the STR TMRCA PDF +/// ([`crate::ystr::str_tmrca_pdfs`]), and the genealogical anchor PDF — all on the +/// shared TREE grid. Full refresh of the computed methods (`SNP_POISSON`, +/// `GENEALOGICAL`, `COMBINED`); `STR_VARIANCE` (from `ystr`) and curated values are +/// left intact. +pub async fn recompute_combined_ages(pool: &PgPool) -> Result { + let mut tx = pool.begin().await?; + let mut stats = CombineStats::default(); + + sqlx::query("DELETE FROM tree.haplogroup_age_estimate WHERE method IN ('SNP_POISSON','GENEALOGICAL','COMBINED')") + .execute(&mut *tx) + .await?; + + // ── SNP-Poisson term: tree propagation (McDonald Eq 5–8) ────────────────── + // Build the clade tree, propagate TMRCA/formed PDFs bottom-up, then store a + // SNP_POISSON term per scored node (median + 95% CI of its TMRCA) and gap-fill + // `formed_ybp`. The COMBINED step below fills `tmrca_ybp`. Heterochromatic SNPs + // are masked from both `m` and (already) the callable denominator (`HET_MASK`). + let (clades, ids) = build_clades(pool).await?; + let ages = propagate(&clades, SNP_RATE, TREE_RESOLUTION_YEARS, TREE_MAX_AGE_YEARS); + // Keep each term's actual PDF (on the shared TREE grid) for the Eq-1 product below. + let mut snp_pdf: HashMap = HashMap::new(); + for (i, age) in ages.iter().enumerate() { + let Some(age) = age else { continue }; + let (med, lo, hi) = age.tmrca.ci95(); + snp_pdf.insert(ids[i], age.tmrca.clone()); + upsert_estimate_ci( + &mut tx, + ids[i], + "SNP_POISSON", + med.round() as i32, + lo.round() as i32, + hi.round() as i32, + clades[i].tester_snps.len() as i32, + ) + .await?; + // Node formation age — gap-fill only (never overwrite a curated value). + sqlx::query("UPDATE tree.haplogroup SET formed_ybp=$2 WHERE id=$1 AND formed_ybp IS NULL") + .bind(ids[i]) + .bind(age.formed.median().round() as i32) + .execute(&mut *tx) + .await?; + stats.snp += 1; + } + + // ── Genealogical / aDNA anchors ─────────────────────────────────────────── + // Per branch, combine its anchors into one GENEALOGICAL term. + let anchors: Vec = sqlx::query_as( + "SELECT haplogroup_id, date_ce, carbon_date_bp, \ + details->>'uncertainty_years' AS uncertainty_years \ + FROM tree.genealogical_anchor", + ) + .fetch_all(&mut *tx) + .await?; + let mut by_hg: BTreeMap> = BTreeMap::new(); + for a in anchors { + let ybp = match (a.carbon_date_bp, a.date_ce) { + (Some(c), _) => c as f64, + (None, Some(d)) => (PRESENT_YEAR - d) as f64, + _ => continue, + }; + if ybp < 0.0 { + continue; + } + // Sigma: explicit uncertainty_years, else 10% of the age (min 25 yr). + let sigma = a + .uncertainty_years + .and_then(|u| u.parse::().ok()) + .filter(|s| *s > 0.0) + .unwrap_or((ybp * 0.10).max(25.0)); + by_hg.entry(a.haplogroup_id).or_default().push((ybp, sigma)); + } + let mut gen_pdf: HashMap = HashMap::new(); + for (hg, ests) in &by_hg { + if let Some((mean, sigma)) = combine(ests) { + let rel = if mean > 0.0 { sigma / mean } else { 0.0 }; + gen_pdf.insert(*hg, Pdf::gaussian_on(mean, sigma, TREE_RESOLUTION_YEARS, TREE_MAX_AGE_YEARS)); + upsert_estimate(&mut tx, *hg, "GENEALOGICAL", mean, rel, None, None).await?; + stats.genealogical += 1; + } + } + + // STR term: tree-propagated TMRCA PDFs on the same grid (the STR_VARIANCE rows + // are written separately by `crate::ystr::recompute_signatures`, from the same + // computation). Any stored STR_VARIANCE row with no fresh PDF — a curated value, + // or one predating profile data — still contributes, reconstructed as a Gaussian. + let mut str_pdf = crate::ystr::str_tmrca_pdfs(pool, TREE_RESOLUTION_YEARS, TREE_MAX_AGE_YEARS).await?; + let str_rows: Vec<(i64, i32, Option, Option)> = sqlx::query_as( + "SELECT haplogroup_id, estimate_ybp, ci_low_ybp, ci_high_ybp \ + FROM tree.haplogroup_age_estimate WHERE method='STR_VARIANCE' AND estimate_ybp IS NOT NULL", + ) + .fetch_all(&mut *tx) + .await?; + for (hg, est, lo, hi) in str_rows { + if str_pdf.contains_key(&hg) { + continue; + } + let mean = est as f64; + let sigma = match (lo, hi) { + (Some(l), Some(h)) if h > l => (h - l) as f64 / (2.0 * 1.96), + _ => (mean * 0.25).max(1.0), + }; + str_pdf.insert(hg, Pdf::gaussian_on(mean, sigma, TREE_RESOLUTION_YEARS, TREE_MAX_AGE_YEARS)); + } + + // ── Combine all method terms per branch (McDonald Eq 1: P(t|all)=k·∏P(t|eᵢ)) ── + // Multiply the actual term PDFs rather than inverse-variance-averaging their + // medians, so non-Gaussian shape (Poisson skew, STR convergent-mutation tails) + // is preserved. If the terms are disjoint (product underflows to zero mass) the + // node falls back to the inverse-variance Gaussian combine, which can't annihilate. + let mut nodes: BTreeSet = BTreeSet::new(); + nodes.extend(snp_pdf.keys().chain(gen_pdf.keys()).chain(str_pdf.keys()).copied()); + for hg in nodes { + let factors: Vec<&Pdf> = + [snp_pdf.get(&hg), gen_pdf.get(&hg), str_pdf.get(&hg)].into_iter().flatten().collect(); + let Some((first, rest)) = factors.split_first() else { continue }; + let product = rest.iter().fold((*first).clone(), |acc, f| acc.multiply(f)); + let combined = if product.total() > 0.0 { + product + } else { + let params: Vec<(f64, f64)> = factors.iter().map(|p| pdf_gaussian_params(p)).collect(); + match combine(¶ms) { + Some((mean, sigma)) => Pdf::gaussian_on(mean, sigma, TREE_RESOLUTION_YEARS, TREE_MAX_AGE_YEARS), + None => (*first).clone(), + } + }; + let (med, lo, hi) = combined.ci95(); + upsert_estimate_ci( + &mut tx, + hg, + "COMBINED", + med.round() as i32, + lo.round() as i32, + hi.round() as i32, + factors.len() as i32, + ) + .await?; + // Gap-fill the authoritative tmrca_ybp (never overwrite a curated value). + sqlx::query("UPDATE tree.haplogroup SET tmrca_ybp = $2 WHERE id = $1 AND tmrca_ybp IS NULL") + .bind(hg) + .bind(med.round() as i32) + .execute(&mut *tx) + .await?; + stats.combined += 1; + } + + tx.commit().await?; + Ok(stats) +} + +/// `(median, sigma)` Gaussian approximation of a PDF (sigma from its 95% CI) — used +/// only for the disjoint-terms fallback in the combine. +fn pdf_gaussian_params(p: &Pdf) -> (f64, f64) { + let (med, lo, hi) = p.ci95(); + (med, ((hi - lo) / (2.0 * 1.96)).max(1.0)) +} + +/// Upsert a point estimate with a relative-error CI. +async fn upsert_estimate( + tx: &mut sqlx::Transaction<'_, sqlx::Postgres>, + hg: i64, + method: &str, + years: f64, + rel: f64, + marker_or_snp_count: Option, + sample_count: Option, +) -> Result<(), DbError> { + let lo = (years * (1.0 - 1.96 * rel)).max(0.0).round() as i32; + let hi = (years * (1.0 + 1.96 * rel)).round() as i32; + sqlx::query( + "INSERT INTO tree.haplogroup_age_estimate \ + (haplogroup_id, method, estimate_ybp, ci_low_ybp, ci_high_ybp, sample_count, marker_count, computed_at) \ + VALUES ($1,$2,$3,$4,$5,$6,$7, now()) \ + ON CONFLICT (haplogroup_id, method) DO UPDATE SET \ + estimate_ybp=EXCLUDED.estimate_ybp, ci_low_ybp=EXCLUDED.ci_low_ybp, ci_high_ybp=EXCLUDED.ci_high_ybp, \ + sample_count=EXCLUDED.sample_count, marker_count=EXCLUDED.marker_count, computed_at=now()", + ) + .bind(hg) + .bind(method) + .bind(years.round() as i32) + .bind(lo) + .bind(hi) + .bind(sample_count) + .bind(marker_or_snp_count) + .execute(&mut **tx) + .await?; + Ok(()) +} + +/// Upsert with explicit CI bounds (the COMBINED term). +async fn upsert_estimate_ci( + tx: &mut sqlx::Transaction<'_, sqlx::Postgres>, + hg: i64, + method: &str, + est: i32, + lo: i32, + hi: i32, + term_count: i32, +) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO tree.haplogroup_age_estimate \ + (haplogroup_id, method, estimate_ybp, ci_low_ybp, ci_high_ybp, sample_count, computed_at) \ + VALUES ($1,$2,$3,$4,$5,$6, now()) \ + ON CONFLICT (haplogroup_id, method) DO UPDATE SET \ + estimate_ybp=EXCLUDED.estimate_ybp, ci_low_ybp=EXCLUDED.ci_low_ybp, ci_high_ybp=EXCLUDED.ci_high_ybp, \ + sample_count=EXCLUDED.sample_count, computed_at=now()", + ) + .bind(hg) + .bind(method) + .bind(est) + .bind(lo) + .bind(hi) + .bind(term_count) + .execute(&mut **tx) + .await?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn inverse_variance_combine() { + // Two equally-precise estimates → mean between them, sigma tighter than either. + let (mean, sigma) = combine(&[(3000.0, 300.0), (3300.0, 300.0)]).unwrap(); + assert!((mean - 3150.0).abs() < 1.0); + assert!(sigma < 300.0 && sigma > 200.0); + // A tighter estimate pulls the mean toward it. + let (mean2, _) = combine(&[(3000.0, 50.0), (5000.0, 1000.0)]).unwrap(); + assert!(mean2 < 3100.0, "tight 3000±50 dominates, got {mean2}"); + assert!(combine(&[]).is_none()); + } + + // Propagation tests use b·µ = 0.01 (b = 1.25e7, µ = 8e-10) so a Poisson age has + // a clean mode of m/(b·µ) = 100·m years. + const B: f64 = 1.25e7; + const MU: f64 = 8e-10; + // Small ages here → use the fine default PDF grid. + const RES: f64 = crate::pdf::RESOLUTION_YEARS; + const MAXA: f64 = crate::pdf::MAX_AGE_YEARS; + + #[test] + fn tmrca_of_single_tester_is_poisson_mode() { + let clades = vec![Clade { branch_snps: 0, callable_bp: B, children: vec![], tester_snps: vec![3] }]; + let ages = propagate(&clades, MU, RES, MAXA); + let tmrca = &ages[0].as_ref().unwrap().tmrca; + assert!((tmrca.mode() - 300.0).abs() <= 10.0, "mode {}", tmrca.mode()); + } + + #[test] + fn parent_is_older_than_child_and_formed_exceeds_tmrca() { + // parent(0) → child(1); child has 2 private SNPs and is 1 SNP below parent. + let clades = vec![ + Clade { branch_snps: 0, callable_bp: B, children: vec![1], tester_snps: vec![] }, + Clade { branch_snps: 1, callable_bp: B, children: vec![], tester_snps: vec![2] }, + ]; + let ages = propagate(&clades, MU, RES, MAXA); + let parent = ages[0].as_ref().unwrap(); + let child = ages[1].as_ref().unwrap(); + // Parent TMRCA = child TMRCA convolved with the branch → strictly older. + assert!(parent.tmrca.median() > child.tmrca.median(), "causality"); + // A node's formed age (split from parent) is older than its own TMRCA. + assert!(child.formed.median() > child.tmrca.median(), "formed > tmrca"); + } + + #[test] + fn more_children_tighten_the_parent_ci() { + let leaf = |b| Clade { branch_snps: 1, callable_bp: b, children: vec![], tester_snps: vec![2] }; + let one = vec![ + Clade { branch_snps: 0, callable_bp: B, children: vec![1], tester_snps: vec![] }, + leaf(B), + ]; + let two = vec![ + Clade { branch_snps: 0, callable_bp: B, children: vec![1, 2], tester_snps: vec![] }, + leaf(B), + leaf(B), + ]; + let width = |ages: &[Option]| { + let (_, lo, hi) = ages[0].as_ref().unwrap().tmrca.ci95(); + hi - lo + }; + assert!( + width(&propagate(&two, MU, RES, MAXA)) < width(&propagate(&one, MU, RES, MAXA)), + "two independent sub-clades give a tighter parent TMRCA than one" + ); + } + + // ── DB-gated: full path over a seeded root→mid→leaf tree ────────────────── + async fn ins_hg(pool: &PgPool, name: &str) -> i64 { + sqlx::query_scalar( + "INSERT INTO tree.haplogroup (name, haplogroup_type) \ + VALUES ($1, 'Y_DNA'::core.dna_type) RETURNING id", + ) + .bind(name) + .fetch_one(pool) + .await + .unwrap() + } + async fn ins_var(pool: &PgPool, name: &str, het: bool) -> i64 { + let ann = if het { + serde_json::json!({ "region_overlaps": ["heterochromatin:DYZ1"] }) + } else { + serde_json::json!({}) + }; + sqlx::query_scalar( + "INSERT INTO core.variant (canonical_name, mutation_type, naming_status, annotations) \ + VALUES ($1, 'SNP'::core.mutation_type, 'NAMED'::core.naming_status, $2) RETURNING id", + ) + .bind(name) + .bind(ann) + .fetch_one(pool) + .await + .unwrap() + } + + /// Seed a 3-node chain with one tester, run the whole pipeline, and check the + /// het-mask, causality (parent older), and formed > tmrca — against real PG. + #[tokio::test] + async fn recompute_over_seeded_tree() { + let Ok(url) = std::env::var("DATABASE_URL") else { + eprintln!("DATABASE_URL unset — skipping seeded age test"); + return; + }; + if url.is_empty() { + return; + } + let db = crate::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + const GUID: &str = "00000000-0000-0000-0000-0000000000aa"; + + let (root, mid, leaf) = + (ins_hg(&pool, "Y-ROOT").await, ins_hg(&pool, "Y-MID").await, ins_hg(&pool, "Y-LEAF").await); + for (p, c) in [(root, mid), (mid, leaf)] { + sqlx::query("INSERT INTO tree.haplogroup_relationship (parent_haplogroup_id, child_haplogroup_id) VALUES ($1,$2)") + .bind(p).bind(c).execute(&pool).await.unwrap(); + } + // Defining (branch) SNPs: mid 4, leaf 3 — plus one heterochromatic defining + // SNP on leaf that must be masked out. + for i in 0..4 { + let v = ins_var(&pool, &format!("MIDDEF{i}"), false).await; + sqlx::query("INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES ($1,$2)").bind(mid).bind(v).execute(&pool).await.unwrap(); + } + for i in 0..3 { + let v = ins_var(&pool, &format!("LEAFDEF{i}"), false).await; + sqlx::query("INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES ($1,$2)").bind(leaf).bind(v).execute(&pool).await.unwrap(); + } + let hetdef = ins_var(&pool, "LEAFDEFHET", true).await; + sqlx::query("INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES ($1,$2)").bind(leaf).bind(hetdef).execute(&pool).await.unwrap(); + + // One tester under leaf: 12.5 Mbp callable, 5 private SNPs + 1 het (masked). + sqlx::query("INSERT INTO core.biosample (sample_guid, source) VALUES ($1::uuid, 'CITIZEN')").bind(GUID).execute(&pool).await.unwrap(); + sqlx::query("INSERT INTO genomics.biosample_callable_loci (sample_guid, chromosome, y_xdegen_callable_bp) VALUES ($1::uuid, 'chrY', 12500000)").bind(GUID).execute(&pool).await.unwrap(); + for i in 0..5 { + let v = ins_var(&pool, &format!("PRIV{i}"), false).await; + sqlx::query("INSERT INTO tree.biosample_private_variant (sample_guid, variant_id, haplogroup_type, terminal_haplogroup_id) VALUES ($1::uuid,$2,'Y_DNA'::core.dna_type,$3)").bind(GUID).bind(v).bind(leaf).execute(&pool).await.unwrap(); + } + let hv = ins_var(&pool, "PRIVHET", true).await; + sqlx::query("INSERT INTO tree.biosample_private_variant (sample_guid, variant_id, haplogroup_type, terminal_haplogroup_id) VALUES ($1::uuid,$2,'Y_DNA'::core.dna_type,$3)").bind(GUID).bind(hv).bind(leaf).execute(&pool).await.unwrap(); + + // (a) build_clades: het-masking + structure. + let (clades, ids) = build_clades(&pool).await.unwrap(); + let at = |id: i64| ids.iter().position(|&x| x == id).unwrap(); + assert_eq!(clades[at(leaf)].tester_snps, vec![5], "het private SNP masked → 5 counted"); + assert_eq!(clades[at(leaf)].branch_snps, 3, "het defining SNP masked → 3"); + assert!(clades[at(mid)].children.contains(&at(leaf))); + assert!(clades[at(root)].children.contains(&at(mid))); + assert!((clades[at(leaf)].callable_bp - 12_500_000.0).abs() < 1.0); + + // (b) full recompute: ages written, causality, formed > tmrca. + let stats = recompute_combined_ages(&pool).await.unwrap(); + assert!(stats.snp >= 3, "root/mid/leaf all scored, got {}", stats.snp); + let rows: Vec<(i64, Option, Option)> = sqlx::query_as( + "SELECT id, tmrca_ybp, formed_ybp FROM tree.haplogroup WHERE id = ANY($1)", + ) + .bind(vec![root, mid, leaf]) + .fetch_all(&pool) + .await + .unwrap(); + let tmrca = |id: i64| rows.iter().find(|r| r.0 == id).unwrap().1.unwrap(); + let formed = |id: i64| rows.iter().find(|r| r.0 == id).unwrap().2.unwrap(); + assert!(tmrca(leaf) > 0, "leaf has a positive TMRCA"); + assert!(tmrca(root) > tmrca(mid) && tmrca(mid) > tmrca(leaf), "causality: root>mid>leaf"); + assert!(formed(leaf) >= tmrca(leaf), "leaf formed age ≥ its TMRCA"); + } +} diff --git a/rust/crates/du-db/src/audit.rs b/rust/crates/du-db/src/audit.rs new file mode 100644 index 00000000..65a8c047 --- /dev/null +++ b/rust/crates/du-db/src/audit.rs @@ -0,0 +1,39 @@ +//! Curator action audit trail (`ident.audit_log`). The first runtime writer; the +//! column set mirrors the legacy backfill in `du-migrate`. Used to record curator +//! decisions (accept/reject) on consensus proposals. + +use crate::DbError; +use sqlx::PgExecutor; +use uuid::Uuid; + +/// Append a curator action to the audit log. `entity_id` is the catalog row id; +/// `action` is a short verb (`ACCEPT`/`REJECT`/`CREATE`/`UPDATE`/`DELETE`). `id` +/// and `created_at` use DB defaults. `executor` is any pool or connection — pass +/// the surrounding `&mut *tx` to keep the audit row atomic with the mutation it +/// records. +#[allow(clippy::too_many_arguments)] +pub async fn log<'e, E: PgExecutor<'e>>( + executor: E, + user_id: Uuid, + entity_type: &str, + entity_id: i64, + action: &str, + old_value: Option<&serde_json::Value>, + new_value: Option<&serde_json::Value>, + comment: Option<&str>, +) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO ident.audit_log (user_id, entity_type, entity_id, action, old_value, new_value, comment) \ + VALUES ($1, $2, $3, $4, $5, $6, $7)", + ) + .bind(user_id) + .bind(entity_type) + .bind(entity_id) + .bind(action) + .bind(old_value) + .bind(new_value) + .bind(comment) + .execute(executor) + .await?; + Ok(()) +} diff --git a/rust/crates/du-db/src/auth.rs b/rust/crates/du-db/src/auth.rs new file mode 100644 index 00000000..b6606c3c --- /dev/null +++ b/rust/crates/du-db/src/auth.rs @@ -0,0 +1,132 @@ +//! Authentication/authorization queries against the `ident` schema. + +use crate::DbError; +use du_domain::ids::UserId; +use sqlx::PgPool; +use uuid::Uuid; + +/// A login credential: the owning user and the stored password hash (None for +/// OAuth-only logins). +pub struct Credential { + pub user_id: UserId, + pub password_hash: Option, +} + +/// Look up a credential by provider key (handle/email) for the `credentials` +/// provider. Returns None if no such active user/login exists. +pub async fn find_credential(pool: &PgPool, provider_key: &str) -> Result, DbError> { + #[derive(sqlx::FromRow)] + struct Row { + user_id: Uuid, + password_hash: Option, + } + let row: Option = sqlx::query_as( + "SELECT li.user_id, li.password_hash \ + FROM ident.user_login_info li \ + JOIN ident.users u ON u.id = li.user_id \ + WHERE li.provider_id = 'credentials' AND li.provider_key = $1 AND u.is_active = true", + ) + .bind(provider_key) + .fetch_optional(pool) + .await?; + Ok(row.map(|r| Credential { + user_id: UserId(r.user_id), + password_hash: r.password_hash, + })) +} + +/// Find-or-create a user by AT Protocol DID (the OAuth login path), ensuring an +/// `atproto` login_info row. Returns the user id. +pub async fn upsert_user_by_did( + pool: &PgPool, + did: &str, + handle: Option<&str>, + display_name: Option<&str>, +) -> Result { + let id: Uuid = sqlx::query_scalar( + "INSERT INTO ident.users (did, handle, display_name) VALUES ($1,$2,$3) \ + ON CONFLICT (did) DO UPDATE SET \ + handle = COALESCE(EXCLUDED.handle, ident.users.handle), \ + display_name = COALESCE(EXCLUDED.display_name, ident.users.display_name), \ + updated_at = now() \ + RETURNING id", + ) + .bind(did) + .bind(handle) + .bind(display_name) + .fetch_one(pool) + .await?; + sqlx::query( + "INSERT INTO ident.user_login_info (user_id, provider_id, provider_key) \ + VALUES ($1, 'atproto', $2) ON CONFLICT (provider_id, provider_key) DO NOTHING", + ) + .bind(id) + .bind(did) + .execute(pool) + .await?; + Ok(UserId(id)) +} + +/// A user's profile for their own account view. +pub struct Profile { + pub display_name: Option, + pub email: Option, + pub did: Option, + pub handle: Option, + pub created_at: chrono::DateTime, +} + +/// Update a user's display name. Returns whether a row changed. +pub async fn update_display_name(pool: &PgPool, user_id: UserId, name: &str) -> Result { + let n = sqlx::query("UPDATE ident.users SET display_name = $2, updated_at = now() WHERE id = $1") + .bind(user_id.0) + .bind(name) + .execute(pool) + .await? + .rows_affected(); + Ok(n > 0) +} + +/// Fetch a user's profile fields (None if the user no longer exists). +pub async fn profile(pool: &PgPool, user_id: UserId) -> Result, DbError> { + #[derive(sqlx::FromRow)] + struct Row { + display_name: Option, + email: Option, + did: Option, + handle: Option, + created_at: chrono::DateTime, + } + let row: Option = sqlx::query_as( + "SELECT display_name, email::text AS email, did, handle, created_at \ + FROM ident.users WHERE id = $1", + ) + .bind(user_id.0) + .fetch_optional(pool) + .await?; + Ok(row.map(|r| Profile { + display_name: r.display_name, + email: r.email, + did: r.did, + handle: r.handle, + created_at: r.created_at, + })) +} + +/// The display name + role names for a user (for the session). +pub async fn session_info(pool: &PgPool, user_id: UserId) -> Result<(Option, Vec), DbError> { + let display_name: Option = + sqlx::query_scalar("SELECT display_name FROM ident.users WHERE id = $1") + .bind(user_id.0) + .fetch_optional(pool) + .await? + .flatten(); + let roles: Vec = sqlx::query_scalar( + "SELECT r.name FROM ident.user_roles ur \ + JOIN ident.roles r ON r.id = ur.role_id WHERE ur.user_id = $1 ORDER BY r.name", + ) + .bind(user_id.0) + .fetch_all(pool) + .await?; + Ok((display_name, roles)) +} diff --git a/rust/crates/du-db/src/biosample.rs b/rust/crates/du-db/src/biosample.rs new file mode 100644 index 00000000..e9c3fb88 --- /dev/null +++ b/rust/crates/du-db/src/biosample.rs @@ -0,0 +1,693 @@ +//! Queries for the unified `core.biosample`. + +use crate::{parse_pg_enum, DbError, Page}; +use du_domain::biosample::{Biosample, GeoPoint}; +use du_domain::enums::{BiosampleSource, DnaType}; +use du_domain::ids::{PublicationId, SampleGuid}; +use sqlx::PgPool; +use uuid::Uuid; + +#[derive(sqlx::FromRow)] +struct BiosampleRow { + sample_guid: Uuid, + source: String, + accession: Option, + alias: Option, + description: Option, + center_name: Option, + locked: bool, + source_attrs: serde_json::Value, + atproto: Option, +} + +impl BiosampleRow { + fn into_domain(self) -> Result { + Ok(Biosample { + sample_guid: SampleGuid(self.sample_guid), + source: parse_pg_enum(&self.source, "source")?, + accession: self.accession, + alias: self.alias, + description: self.description, + center_name: self.center_name, + locked: self.locked, + source_attrs: self.source_attrs, + atproto: self.atproto, + }) + } +} + +const SELECT: &str = "SELECT sample_guid, source::text AS source, accession, alias, description, \ + center_name, locked, source_attrs, atproto FROM core.biosample WHERE deleted = false"; + +pub async fn get_by_guid(pool: &PgPool, guid: SampleGuid) -> Result, DbError> { + let row: Option = sqlx::query_as(&format!("{SELECT} AND sample_guid = $1")) + .bind(guid.0) + .fetch_optional(pool) + .await?; + row.map(BiosampleRow::into_domain).transpose() +} + +/// All mappable biosample locations. PostGIS `ST_X`/`ST_Y` extract lon/lat from +/// the donor's `geocoord` (geometry Point, 4326). Backs the biosample map. +pub async fn geo_points(pool: &PgPool) -> Result, DbError> { + #[derive(sqlx::FromRow)] + struct GeoRow { + lat: f64, + lon: f64, + accession: Option, + source: String, + } + let rows: Vec = sqlx::query_as( + "SELECT ST_Y(d.geocoord) AS lat, ST_X(d.geocoord) AS lon, b.accession, \ + b.source::text AS source \ + FROM core.biosample b JOIN core.specimen_donor d ON d.id = b.donor_id \ + WHERE d.geocoord IS NOT NULL AND b.deleted = false", + ) + .fetch_all(pool) + .await?; + rows.into_iter() + .map(|r| { + Ok(GeoPoint { + lat: r.lat, + lon: r.lon, + accession: r.accession, + source: parse_pg_enum(&r.source, "source")?, + }) + }) + .collect() +} + +/// Paginated biosamples linked to a publication (the biosample report). +pub async fn for_publication( + pool: &PgPool, + publication_id: PublicationId, + page: i64, + page_size: i64, +) -> Result, DbError> { + let offset = Page::<()>::offset(page, page_size); + let limit = page_size.clamp(1, 200); + + let total: i64 = sqlx::query_scalar( + "SELECT count(*) FROM pubs.publication_biosample pb \ + JOIN core.biosample b ON b.sample_guid = pb.sample_guid \ + WHERE pb.publication_id = $1 AND b.deleted = false", + ) + .bind(publication_id.0) + .fetch_one(pool) + .await?; + + let rows: Vec = sqlx::query_as( + "SELECT b.sample_guid, b.source::text AS source, b.accession, b.alias, b.description, \ + b.center_name, b.locked, b.source_attrs, b.atproto \ + FROM pubs.publication_biosample pb \ + JOIN core.biosample b ON b.sample_guid = pb.sample_guid \ + WHERE pb.publication_id = $1 AND b.deleted = false \ + ORDER BY b.accession NULLS LAST, b.sample_guid LIMIT $2 OFFSET $3", + ) + .bind(publication_id.0) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + + let items = rows + .into_iter() + .map(BiosampleRow::into_domain) + .collect::, _>>()?; + Ok(Page { items, total, page: page.max(1), page_size: limit }) +} + +// ── Public per-sample report (unified read path) ────────────────────────────── +// The canonical `core.biosample` (identity, the `is_public` gate, publications) +// joined to the federated analytics mirror (`fed.*`) via `atproto.uri ↔ *.biosample_ref`. +// Callers never touch `fed.*` directly — this is the seam the eventual full +// core/fed consolidation collapses into (only the query bodies change). + +/// Origin of a sample's haplogroup call — provenance shown to the reader. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum HaplogroupCallOrigin { + /// `fed.haplogroup_reconciliation` — the donor's call reconciled across all its + /// sequencing technologies (the authoritative cross-technology consensus). + Reconciled, + /// `fed.biosample.y/mt_haplogroup` (a single Navigator call, not reconciled). + FedConsensus, + /// `core.biosample.original_haplogroups` (per-publication original call). + Original, +} + +/// A called haplogroup name plus its lineage. The phylogenetic pathway is +/// resolved separately by [`crate::haplogroup::pathway`] so the SQL layer stays +/// free of tree-walking. The reliability fields are populated only for a +/// `Reconciled` call (the cross-technology consensus). +#[derive(Debug, Clone)] +pub struct HaplogroupCall { + pub name: String, + pub dna_type: DnaType, + pub origin: HaplogroupCallOrigin, + /// Consensus confidence ∈ [0,1] (reconciled calls only). + pub confidence: Option, + /// Number of sequencing runs reconciled into the consensus. + pub run_count: Option, + /// SNP concordance across the reconciled runs ∈ [0,1]. + pub snp_concordance: Option, + /// `COMPATIBLE` / `MINOR_DIVERGENCE` / `INCOMPATIBLE` … + pub compatibility_level: Option, +} + +/// A reconciliation consensus call, before it's lifted to a [`HaplogroupCall`]. +struct ReconCall { + name: String, + confidence: Option, + run_count: Option, + snp_concordance: Option, + compatibility_level: Option, +} + +/// Lift a reconciliation consensus to a `Reconciled`-origin [`HaplogroupCall`]. +fn reconciled_call(r: Option, dna_type: DnaType) -> Option { + r.map(|r| HaplogroupCall { + name: r.name, + dna_type, + origin: HaplogroupCallOrigin::Reconciled, + confidence: r.confidence, + run_count: r.run_count, + snp_concordance: r.snp_concordance, + compatibility_level: r.compatibility_level, + }) +} + +/// WGS84 origin point (from the donor's `geocoord`). +#[derive(Debug, Clone, Copy)] +pub struct LatLon { + pub lat: f64, + pub lon: f64, +} + +#[derive(Debug, Clone)] +pub struct ReportIdentity { + pub sample_guid: SampleGuid, + pub source: BiosampleSource, + pub accession: Option, + pub alias: Option, + pub description: Option, + pub center_name: Option, + /// The Postgres `biological_sex` label (`MALE`/`FEMALE`/`INTERSEX`), as text. + pub sex: Option, + pub origin: Option, + pub is_public: bool, + /// atproto link present AND a matching `fed.biosample` row was found. + pub is_federated: bool, +} + +#[derive(Debug, Clone)] +pub struct SequencingRun { + pub platform_name: Option, + pub instrument_model: Option, + pub test_type: Option, + pub library_layout: Option, + pub total_reads: Option, + pub read_length: Option, + pub mean_insert_size: Option, + /// at:// uri of the run (join key to its coverage summary). + pub at_uri: String, +} + +#[derive(Debug, Clone)] +pub struct CoverageSummary { + pub reference_build: Option, + pub aligner: Option, + pub mean_coverage: Option, + pub median_coverage: Option, + pub pct_10x: Option, + pub pct_20x: Option, + pub pct_30x: Option, + /// at:// uri of the sequencing run this coverage belongs to (may be NULL). + pub sequence_run_ref: Option, + /// The run's test type, when resolvable (drives the conformance check). + pub test_type: Option, + /// Advertised minimum depth for the test type (`test_type_definition`), if set. + pub expected_min_depth: Option, + /// The empirical cohort median depth for the test type (`test_type_coverage_norm`). + pub norm_median_depth: Option, + /// Conformance vs. the advertised spec (or cohort norm when no spec): `BELOW` / + /// `AT` / `ABOVE`, or `None` when there's nothing to compare against. + pub conformance: Option, +} + +/// Classify a sample's aligned mean depth against the **empirical cohort norm** +/// for its test type (preferred), falling back to the advertised spec only when no +/// cohort norm exists yet. The cohort norm is the fair baseline: an advertised +/// "30× WGS" is really a raw-yield spec (~90 Gb of reads), which aligns to less than +/// 30× after QC/dedup, and D2C lab products don't target 30× aligned at all — so +/// comparing aligned depth to a literal advertised number would mislabel them. The +/// cohort norm is measured in the same aligned-depth units and reflects what each +/// test type actually delivers. ±5% of the baseline counts as `AT`. +fn conformance(mean: Option, expected: Option, norm: Option) -> Option { + let mean = mean?; + let baseline = norm.or(expected)?; + if baseline <= 0.0 { + return None; + } + Some(if mean < baseline * 0.95 { + "BELOW" + } else if mean > baseline * 1.05 { + "ABOVE" + } else { + "AT" + } + .to_string()) +} + +#[derive(Debug, Clone)] +pub struct AncestryBreakdown { + pub analysis_method: Option, + pub panel_type: Option, + pub confidence_level: Option, + /// Continental rollup: `[{superPopulation, percentage}]`. + pub super_populations: serde_json::Value, + /// Sub-continental percentages (payload shape not asserted — render defensively). + pub components: serde_json::Value, +} + +#[derive(Debug, Clone)] +pub struct ReportPublication { + pub id: PublicationId, + pub title: String, + pub doi: Option, + pub url: Option, + pub publication_date: Option, +} + +/// Everything the public per-sample report needs, assembled from the canonical +/// biosample plus its federated analytics. `is_public` is carried (not filtered) +/// so the web layer can let curators preview private samples; every public +/// surface MUST check `identity.is_public` itself. +#[derive(Debug, Clone)] +pub struct SampleReport { + pub identity: ReportIdentity, + pub y: Option, + pub mt: Option, + pub sequencing: Vec, + pub coverage: Vec, + pub ancestry: Option, + pub publications: Vec, +} + +/// Pick the first non-null call from an `original_haplogroups` JSONB array, +/// tolerating both shapes (standard `{y, mt, y_result, mt_result}` and citizen +/// `{y_result, mt_result}`, all keys null-stripped): prefer `primary`, else `fallback`. +pub(crate) fn pick_original_call(arr: &serde_json::Value, primary: &str, fallback: &str) -> Option { + let entries = arr.as_array()?; + entries.iter().find_map(|e| { + let take = |k: &str| { + e.get(k) + .and_then(serde_json::Value::as_str) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + }; + take(primary).or_else(|| take(fallback)) + }) +} + +/// Resolve a slug/accession/alias/sample_guid string to a single `sample_guid`. +/// Prefers public, then non-deleted rows deterministically (earliest guid). +pub async fn resolve_guid(pool: &PgPool, identifier: &str) -> Result, DbError> { + let id = identifier.trim(); + if let Ok(uuid) = Uuid::parse_str(id) { + let exists: Option = + sqlx::query_scalar("SELECT sample_guid FROM core.biosample WHERE sample_guid = $1 AND deleted = false") + .bind(uuid) + .fetch_optional(pool) + .await?; + return Ok(exists.map(SampleGuid)); + } + let guid: Option = sqlx::query_scalar( + "SELECT sample_guid FROM core.biosample \ + WHERE deleted = false AND (lower(accession) = lower($1) OR lower(alias) = lower($1)) \ + ORDER BY is_public DESC, sample_guid LIMIT 1", + ) + .bind(id) + .fetch_optional(pool) + .await?; + Ok(guid.map(SampleGuid)) +} + +/// Assemble the report for one sample by guid, or `None` if it doesn't exist / +/// is deleted. Does NOT filter on `is_public` — the caller gates visibility. +pub async fn report_by_guid(pool: &PgPool, guid: SampleGuid) -> Result, DbError> { + // ── Q1: identity + gate flags (joins the donor for sex/origin) ── + #[derive(sqlx::FromRow)] + struct IdRow { + sample_guid: Uuid, + source: String, + accession: Option, + alias: Option, + description: Option, + center_name: Option, + is_public: bool, + at_uri: Option, + repo_did: Option, + original_haplogroups: serde_json::Value, + sex: Option, + lat: Option, + lon: Option, + } + let id_row: Option = sqlx::query_as( + "SELECT b.sample_guid, b.source::text AS source, b.accession, b.alias, b.description, \ + b.center_name, b.is_public, b.atproto->>'uri' AS at_uri, b.atproto->>'repo_did' AS repo_did, \ + b.original_haplogroups, \ + d.sex::text AS sex, ST_Y(d.geocoord) AS lat, ST_X(d.geocoord) AS lon \ + FROM core.biosample b \ + LEFT JOIN core.specimen_donor d ON d.id = b.donor_id \ + WHERE b.sample_guid = $1 AND b.deleted = false", + ) + .bind(guid.0) + .fetch_optional(pool) + .await?; + let Some(idr) = id_row else { return Ok(None) }; + + // ── Q2: federated consensus haplogroups (only when atproto-linked) ── + let mut fed_y: Option = None; + let mut fed_mt: Option = None; + let mut is_federated = false; + if let Some(at_uri) = idr.at_uri.as_deref() { + let fed: Option<(Option, Option)> = + sqlx::query_as("SELECT y_haplogroup, mt_haplogroup FROM fed.biosample WHERE at_uri = $1") + .bind(at_uri) + .fetch_optional(pool) + .await?; + if let Some((y, mt)) = fed { + is_federated = true; + fed_y = y; + fed_mt = mt; + } + } + + // ── Q2b: the cross-technology consensus (the authoritative call). Keyed by the + // citizen's repo DID = the reconciliation publisher's DID; pick the best per arm. + let mut recon_y: Option = None; + let mut recon_mt: Option = None; + if let Some(repo_did) = idr.repo_did.as_deref() { + #[derive(sqlx::FromRow)] + struct ReconRow { + dna_type: Option, + consensus_haplogroup: Option, + confidence: Option, + run_count: Option, + snp_concordance: Option, + compatibility_level: Option, + } + let rows: Vec = sqlx::query_as( + "SELECT DISTINCT ON (dna_type) dna_type, consensus_haplogroup, confidence, run_count, \ + snp_concordance, compatibility_level \ + FROM fed.haplogroup_reconciliation \ + WHERE did = $1 AND consensus_haplogroup IS NOT NULL \ + ORDER BY dna_type, run_count DESC NULLS LAST, time_us DESC", + ) + .bind(repo_did) + .fetch_all(pool) + .await?; + for r in rows { + let call = r.consensus_haplogroup.map(|name| ReconCall { + name, + confidence: r.confidence, + run_count: r.run_count, + snp_concordance: r.snp_concordance, + compatibility_level: r.compatibility_level, + }); + match r.dna_type.as_deref() { + Some("Y_DNA") => recon_y = call, + Some("MT_DNA") => recon_mt = call, + _ => {} + } + } + if recon_y.is_some() || recon_mt.is_some() { + is_federated = true; + } + } + + // Call precedence: cross-technology consensus, else the single federated call, + // else the newest original publication call. + let y = reconciled_call(recon_y, DnaType::YDna) + .or_else(|| { + fed_y.map(|name| HaplogroupCall { + name, + dna_type: DnaType::YDna, + origin: HaplogroupCallOrigin::FedConsensus, + confidence: None, + run_count: None, + snp_concordance: None, + compatibility_level: None, + }) + }) + .or_else(|| { + pick_original_call(&idr.original_haplogroups, "y", "y_result").map(|name| HaplogroupCall { + name, + dna_type: DnaType::YDna, + origin: HaplogroupCallOrigin::Original, + confidence: None, + run_count: None, + snp_concordance: None, + compatibility_level: None, + }) + }); + let mt = reconciled_call(recon_mt, DnaType::MtDna) + .or_else(|| { + fed_mt.map(|name| HaplogroupCall { + name, + dna_type: DnaType::MtDna, + origin: HaplogroupCallOrigin::FedConsensus, + confidence: None, + run_count: None, + snp_concordance: None, + compatibility_level: None, + }) + }) + .or_else(|| { + pick_original_call(&idr.original_haplogroups, "mt", "mt_result").map(|name| HaplogroupCall { + name, + dna_type: DnaType::MtDna, + origin: HaplogroupCallOrigin::Original, + confidence: None, + run_count: None, + snp_concordance: None, + compatibility_level: None, + }) + }); + + // ── Q3/Q4/Q5: federated sequencing, coverage, ancestry (only when linked) ── + let mut sequencing = Vec::new(); + let mut coverage = Vec::new(); + let mut ancestry = None; + if let Some(at_uri) = idr.at_uri.as_deref() { + #[derive(sqlx::FromRow)] + struct SeqRow { + at_uri: String, + platform_name: Option, + instrument_model: Option, + test_type: Option, + library_layout: Option, + total_reads: Option, + read_length: Option, + mean_insert_size: Option, + } + let seq: Vec = sqlx::query_as( + "SELECT at_uri, platform_name, instrument_model, test_type, library_layout, \ + total_reads, read_length, mean_insert_size \ + FROM fed.sequencerun WHERE biosample_ref = $1 ORDER BY record_created_at DESC NULLS LAST", + ) + .bind(at_uri) + .fetch_all(pool) + .await?; + sequencing = seq + .into_iter() + .map(|r| SequencingRun { + platform_name: r.platform_name, + instrument_model: r.instrument_model, + test_type: r.test_type, + library_layout: r.library_layout, + total_reads: r.total_reads, + read_length: r.read_length, + mean_insert_size: r.mean_insert_size, + at_uri: r.at_uri, + }) + .collect(); + + #[derive(sqlx::FromRow)] + struct CovRow { + reference_build: Option, + aligner: Option, + mean_coverage: Option, + median_coverage: Option, + pct_10x: Option, + pct_20x: Option, + pct_30x: Option, + sequence_run_ref: Option, + test_type: Option, + expected_min_depth: Option, + norm_median_depth: Option, + } + // Resolve each coverage row's test type (via its run), the advertised spec + // (test_type_definition, opportunistic), and the empirical cohort norm. + let cov: Vec = sqlx::query_as( + "SELECT cs.reference_build, cs.aligner, cs.mean_coverage, cs.median_coverage, \ + cs.pct_10x, cs.pct_20x, cs.pct_30x, cs.sequence_run_ref, \ + sr.test_type AS test_type, ttd.expected_min_depth AS expected_min_depth, \ + n.median_mean_depth AS norm_median_depth \ + FROM fed.coverage_summary cs \ + LEFT JOIN fed.sequencerun sr ON sr.at_uri = cs.sequence_run_ref \ + LEFT JOIN genomics.test_type_definition ttd ON upper(ttd.code) = upper(sr.test_type) \ + LEFT JOIN genomics.test_type_coverage_norm n ON n.test_type = sr.test_type \ + WHERE cs.biosample_ref = $1 ORDER BY cs.mean_coverage DESC NULLS LAST", + ) + .bind(at_uri) + .fetch_all(pool) + .await?; + coverage = cov + .into_iter() + .map(|r| CoverageSummary { + conformance: conformance(r.mean_coverage, r.expected_min_depth, r.norm_median_depth), + reference_build: r.reference_build, + aligner: r.aligner, + mean_coverage: r.mean_coverage, + median_coverage: r.median_coverage, + pct_10x: r.pct_10x, + pct_20x: r.pct_20x, + pct_30x: r.pct_30x, + sequence_run_ref: r.sequence_run_ref, + test_type: r.test_type, + expected_min_depth: r.expected_min_depth, + norm_median_depth: r.norm_median_depth, + }) + .collect(); + + #[derive(sqlx::FromRow)] + struct AncRow { + analysis_method: Option, + panel_type: Option, + confidence_level: Option, + super_population_summary: serde_json::Value, + components: serde_json::Value, + } + let anc: Option = sqlx::query_as( + "SELECT analysis_method, panel_type, confidence_level, super_population_summary, components \ + FROM fed.population_breakdown WHERE biosample_ref = $1 \ + ORDER BY record_created_at DESC NULLS LAST LIMIT 1", + ) + .bind(at_uri) + .fetch_optional(pool) + .await?; + ancestry = anc.map(|r| AncestryBreakdown { + analysis_method: r.analysis_method, + panel_type: r.panel_type, + confidence_level: r.confidence_level, + super_populations: r.super_population_summary, + components: r.components, + }); + } + + // ── Q6: source publications ── + #[derive(sqlx::FromRow)] + struct PubRow { + id: i64, + title: String, + doi: Option, + url: Option, + publication_date: Option, + } + let pubs: Vec = sqlx::query_as( + "SELECT p.id, p.title, p.doi, p.url, p.publication_date \ + FROM pubs.publication_biosample pb JOIN pubs.publication p ON p.id = pb.publication_id \ + WHERE pb.sample_guid = $1 ORDER BY p.publication_date DESC NULLS LAST", + ) + .bind(guid.0) + .fetch_all(pool) + .await?; + let publications = pubs + .into_iter() + .map(|p| ReportPublication { + id: PublicationId(p.id), + title: p.title, + doi: p.doi, + url: p.url, + publication_date: p.publication_date, + }) + .collect(); + + let origin = match (idr.lat, idr.lon) { + (Some(lat), Some(lon)) => Some(LatLon { lat, lon }), + _ => None, + }; + let identity = ReportIdentity { + sample_guid: SampleGuid(idr.sample_guid), + source: parse_pg_enum(&idr.source, "source")?, + accession: idr.accession, + alias: idr.alias, + description: idr.description, + center_name: idr.center_name, + sex: idr.sex, + origin, + is_public: idr.is_public, + is_federated, + }; + Ok(Some(SampleReport { identity, y, mt, sequencing, coverage, ancestry, publications })) +} + +/// Resolve an identifier (slug/accession/alias/guid) and assemble its report. +pub async fn report(pool: &PgPool, identifier: &str) -> Result, DbError> { + match resolve_guid(pool, identifier).await? { + Some(guid) => report_by_guid(pool, guid).await, + None => Ok(None), + } +} + +/// Set the public-visibility flag on a sample. Returns whether a row changed. +pub async fn set_public(pool: &PgPool, guid: SampleGuid, value: bool) -> Result { + let affected = sqlx::query( + "UPDATE core.biosample SET is_public = $2, updated_at = now() \ + WHERE sample_guid = $1 AND deleted = false", + ) + .bind(guid.0) + .bind(value) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} + +/// Lookup by accession or alias (the private biosample search). +pub async fn find_by_alias_or_accession( + pool: &PgPool, + query: &str, +) -> Result, DbError> { + let like = format!("%{}%", query.trim()); + let rows: Vec = + sqlx::query_as(&format!("{SELECT} AND (accession ILIKE $1 OR alias ILIKE $1) ORDER BY accession LIMIT 50")) + .bind(&like) + .fetch_all(pool) + .await?; + rows.into_iter().map(BiosampleRow::into_domain).collect() +} + +#[cfg(test)] +mod tests { + use super::conformance; + + #[test] + fn conformance_prefers_cohort_norm_then_spec() { + // The cohort norm wins over the advertised spec: a sample at 28× whose test + // type's cohort delivers ~29× is AT — NOT flagged BELOW against an advertised + // 30× aligned bar (the D2C case the user called out). + assert_eq!(conformance(Some(28.0), Some(30.0), Some(29.0)).as_deref(), Some("AT")); + // Genuinely under its cohort. + assert_eq!(conformance(Some(20.0), Some(30.0), Some(29.0)).as_deref(), Some("BELOW")); + // Above its cohort. + assert_eq!(conformance(Some(33.0), None, Some(29.0)).as_deref(), Some("ABOVE")); + // No cohort norm yet → fall back to the advertised spec. + assert_eq!(conformance(Some(20.0), Some(30.0), None).as_deref(), Some("BELOW")); + // Nothing to compare against. + assert_eq!(conformance(Some(30.0), None, None), None); + assert_eq!(conformance(None, Some(30.0), Some(29.0)), None); + } +} diff --git a/rust/crates/du-db/src/change_set.rs b/rust/crates/du-db/src/change_set.rs new file mode 100644 index 00000000..587f4a4f --- /dev/null +++ b/rust/crates/du-db/src/change_set.rs @@ -0,0 +1,597 @@ +//! Tree versioning: change-set lifecycle + apply engine. +//! +//! A change set groups proposed `tree_change` rows (CREATE/UPDATE/DELETE/ +//! REPARENT/VARIANT_EDIT). Curators review each change, then *apply* the set: +//! approved changes are written to the production tree using the temporal edge +//! model (close the current edge with `valid_until = now()`, open a new one). +//! +//! Lifecycle: DRAFT → READY_FOR_REVIEW → UNDER_REVIEW → APPLIED, with DISCARDED +//! reachable from any non-applied state. +//! +//! Scope: this is the direct `tree_change` path — changes reference *existing* +//! production haplogroup ids (REPARENT/UPDATE/DELETE/VARIANT_EDIT) or create a +//! node under an existing parent (CREATE). The WIP staging path (placeholder ids +//! + conflict resolutions) is produced by the merge algorithm and lands with it. + +use crate::{DbError, Page}; +use serde_json::Value; +use sqlx::types::chrono::{DateTime, Utc}; +use sqlx::{PgPool, Postgres, Transaction}; + +// ── views ──────────────────────────────────────────────────────────────────── + +#[derive(Debug, Clone, sqlx::FromRow, serde::Serialize)] +pub struct ChangeSetSummary { + pub id: i64, + pub source: String, + pub haplogroup_type: Option, + pub status: String, + pub description: Option, + pub change_count: i64, + pub created_by: Option, + pub created_at: DateTime, + pub promoted_by: Option, + pub promoted_at: Option>, +} + +#[derive(Debug, Clone, sqlx::FromRow, serde::Serialize)] +pub struct TreeChangeView { + pub id: i64, + pub change_type: String, + pub haplogroup_id: Option, + pub haplogroup_name: Option, + pub old_values: Option, + pub new_values: Option, + pub status: String, +} + +#[derive(Debug, Clone, sqlx::FromRow, serde::Serialize)] +pub struct CommentView { + pub id: i64, + pub commented_by: String, + pub comment: String, + pub created_at: DateTime, +} + +#[derive(Debug, Clone, serde::Serialize)] +pub struct ChangeSetDetail { + pub summary: ChangeSetSummary, + pub changes: Vec, + pub comments: Vec, +} + +#[derive(Debug, Clone, Default, serde::Serialize)] +pub struct DiffSummary { + pub added: i64, + pub removed: i64, + pub modified: i64, + pub reparented: i64, +} + +#[derive(Debug, Clone, serde::Serialize)] +pub struct DiffEntry { + pub diff_type: String, + pub name: String, + pub detail: Value, +} + +#[derive(Debug, Clone, serde::Serialize)] +pub struct TreeDiff { + pub entries: Vec, + pub summary: DiffSummary, +} + +#[derive(Debug, Clone, Default, serde::Serialize)] +pub struct ApplyResult { + pub created: i64, + pub updated: i64, + pub deleted: i64, + pub reparented: i64, + pub variant_edits: i64, + pub skipped: i64, +} + +const CS_COLS: &str = "id, source, haplogroup_type::text AS haplogroup_type, status::text AS status, \ + description, change_count::bigint AS change_count, created_by, created_at, promoted_by, promoted_at"; + +// ── lifecycle ───────────────────────────────────────────────────────────────── + +pub async fn create( + pool: &PgPool, + source: &str, + haplogroup_type: Option<&str>, + description: Option<&str>, + created_by: &str, +) -> Result { + let id: i64 = sqlx::query_scalar( + "INSERT INTO tree.change_set (source, haplogroup_type, description, created_by) \ + VALUES ($1, $2::core.dna_type, $3, $4) RETURNING id", + ) + .bind(source) + .bind(haplogroup_type) + .bind(description) + .bind(created_by) + .fetch_one(pool) + .await?; + Ok(id) +} + +/// Author a change within a set (also bumps `change_count`). Returns its id. +pub async fn add_change( + pool: &PgPool, + change_set_id: i64, + change_type: &str, + haplogroup_id: Option, + old_values: Option<&Value>, + new_values: Option<&Value>, +) -> Result { + let mut tx = pool.begin().await?; + let id: i64 = sqlx::query_scalar( + "INSERT INTO tree.tree_change (change_set_id, change_type, haplogroup_id, old_values, new_values) \ + VALUES ($1, $2::tree.tree_change_type, $3, $4, $5) RETURNING id", + ) + .bind(change_set_id) + .bind(change_type) + .bind(haplogroup_id) + .bind(old_values) + .bind(new_values) + .fetch_one(&mut *tx) + .await?; + sqlx::query("UPDATE tree.change_set SET change_count = change_count + 1 WHERE id = $1") + .bind(change_set_id) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(id) +} + +pub async fn list( + pool: &PgPool, + haplogroup_type: Option<&str>, + status: Option<&str>, + page: i64, + page_size: i64, +) -> Result, DbError> { + let offset = Page::<()>::offset(page, page_size); + let limit = page_size.clamp(1, 200); + let where_sql = "WHERE ($1::text IS NULL OR haplogroup_type::text = $1) \ + AND ($2::text IS NULL OR status::text = $2)"; + let total: i64 = sqlx::query_scalar(&format!("SELECT count(*) FROM tree.change_set {where_sql}")) + .bind(haplogroup_type) + .bind(status) + .fetch_one(pool) + .await?; + let items: Vec = sqlx::query_as(&format!( + "SELECT {CS_COLS} FROM tree.change_set {where_sql} ORDER BY created_at DESC, id DESC LIMIT $3 OFFSET $4" + )) + .bind(haplogroup_type) + .bind(status) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + Ok(Page { items, total, page: page.max(1), page_size: limit }) +} + +pub async fn get(pool: &PgPool, id: i64) -> Result, DbError> { + let summary: Option = + sqlx::query_as(&format!("SELECT {CS_COLS} FROM tree.change_set WHERE id = $1")) + .bind(id) + .fetch_optional(pool) + .await?; + let Some(summary) = summary else { return Ok(None) }; + + let changes: Vec = sqlx::query_as( + "SELECT tc.id, tc.change_type::text AS change_type, tc.haplogroup_id, h.name AS haplogroup_name, \ + tc.old_values, tc.new_values, tc.status \ + FROM tree.tree_change tc LEFT JOIN tree.haplogroup h ON h.id = tc.haplogroup_id \ + WHERE tc.change_set_id = $1 ORDER BY tc.id", + ) + .bind(id) + .fetch_all(pool) + .await?; + + let comments: Vec = sqlx::query_as( + "SELECT id, commented_by, comment, created_at FROM tree.change_set_comment \ + WHERE change_set_id = $1 ORDER BY created_at, id", + ) + .bind(id) + .fetch_all(pool) + .await?; + + Ok(Some(ChangeSetDetail { summary, changes, comments })) +} + +pub async fn add_comment(pool: &PgPool, id: i64, by: &str, comment: &str) -> Result { + Ok(sqlx::query_scalar( + "INSERT INTO tree.change_set_comment (change_set_id, commented_by, comment) VALUES ($1,$2,$3) RETURNING id", + ) + .bind(id) + .bind(by) + .bind(comment) + .fetch_one(pool) + .await?) +} + +/// DRAFT/READY_FOR_REVIEW -> UNDER_REVIEW. +pub async fn start_review(pool: &PgPool, id: i64) -> Result { + let n = sqlx::query( + "UPDATE tree.change_set SET status = 'UNDER_REVIEW' \ + WHERE id = $1 AND status IN ('DRAFT','READY_FOR_REVIEW')", + ) + .bind(id) + .execute(pool) + .await? + .rows_affected(); + Ok(n > 0) +} + +/// Any non-applied state -> DISCARDED. +pub async fn discard(pool: &PgPool, id: i64, by: &str) -> Result { + let n = sqlx::query( + "UPDATE tree.change_set SET status = 'DISCARDED', promoted_by = $2 \ + WHERE id = $1 AND status <> 'APPLIED'", + ) + .bind(id) + .bind(by) + .execute(pool) + .await? + .rows_affected(); + Ok(n > 0) +} + +/// Set a single change's review status. `approve` -> APPROVED, else REJECTED. +pub async fn review_change(pool: &PgPool, change_id: i64, approve: bool) -> Result { + let status = if approve { "APPROVED" } else { "REJECTED" }; + let n = sqlx::query( + "UPDATE tree.tree_change SET status = $2 \ + FROM tree.change_set cs \ + WHERE tree_change.id = $1 AND tree_change.change_set_id = cs.id \ + AND cs.status NOT IN ('APPLIED','DISCARDED')", + ) + .bind(change_id) + .bind(status) + .execute(pool) + .await? + .rows_affected(); + Ok(n > 0) +} + +/// Approve all PENDING changes in a (non-applied) set. Returns the count. +pub async fn approve_all(pool: &PgPool, id: i64) -> Result { + let n = sqlx::query( + "UPDATE tree.tree_change SET status = 'APPROVED' \ + FROM tree.change_set cs \ + WHERE tree_change.change_set_id = $1 AND cs.id = $1 \ + AND tree_change.status = 'PENDING' AND cs.status NOT IN ('APPLIED','DISCARDED')", + ) + .bind(id) + .execute(pool) + .await? + .rows_affected(); + Ok(n) +} + +// ── diff ────────────────────────────────────────────────────────────────────── + +pub async fn diff(pool: &PgPool, id: i64) -> Result { + let changes: Vec = sqlx::query_as( + "SELECT tc.id, tc.change_type::text AS change_type, tc.haplogroup_id, h.name AS haplogroup_name, \ + tc.old_values, tc.new_values, tc.status \ + FROM tree.tree_change tc LEFT JOIN tree.haplogroup h ON h.id = tc.haplogroup_id \ + WHERE tc.change_set_id = $1 AND tc.status <> 'REJECTED' ORDER BY tc.id", + ) + .bind(id) + .fetch_all(pool) + .await?; + + let mut summary = DiffSummary::default(); + let mut entries = Vec::with_capacity(changes.len()); + for c in changes { + let name = c + .haplogroup_name + .clone() + .or_else(|| c.new_values.as_ref().and_then(|v| jstr(v, "name"))) + .unwrap_or_else(|| "(unnamed)".to_string()); + let diff_type = match c.change_type.as_str() { + "CREATE" => { + summary.added += 1; + "ADDED" + } + "DELETE" => { + summary.removed += 1; + "REMOVED" + } + "REPARENT" => { + summary.reparented += 1; + "REPARENTED" + } + _ => { + summary.modified += 1; + "MODIFIED" + } + }; + entries.push(DiffEntry { + diff_type: diff_type.to_string(), + name, + detail: serde_json::json!({ "change_type": c.change_type, "old": c.old_values, "new": c.new_values }), + }); + } + Ok(TreeDiff { entries, summary }) +} + +// ── apply ───────────────────────────────────────────────────────────────────── + +/// Apply all APPROVED changes to the production tree (temporal model) and mark +/// the set APPLIED. Idempotent on status: re-applying an APPLIED set is a no-op +/// error. The whole apply runs in one transaction. +pub async fn apply(pool: &PgPool, id: i64, by: &str) -> Result { + let mut tx = pool.begin().await?; + + // Lock the set; gate on a reviewable status. + let (status, cs_dna): (String, Option) = sqlx::query_as( + "SELECT status::text, haplogroup_type::text FROM tree.change_set WHERE id = $1 FOR UPDATE", + ) + .bind(id) + .fetch_optional(&mut *tx) + .await? + .ok_or_else(|| DbError::Conflict(format!("change set {id} not found")))?; + if !matches!(status.as_str(), "UNDER_REVIEW" | "READY_FOR_REVIEW") { + return Err(DbError::Conflict(format!( + "change set must be UNDER_REVIEW or READY_FOR_REVIEW to apply (is {status})" + ))); + } + + let changes: Vec = sqlx::query_as( + "SELECT id, change_type::text AS change_type, haplogroup_id, new_values \ + FROM tree.tree_change WHERE change_set_id = $1 AND status = 'APPROVED' ORDER BY id", + ) + .bind(id) + .fetch_all(&mut *tx) + .await?; + + let mut result = ApplyResult::default(); + // Maps a CREATE's negative placeholder id to the real id it gets, so later + // changes in the set (children, reparents) can reference nodes created + // earlier in this same apply. Changes are ordered by id = insertion order = + // parent-before-child (the merge emits them that way). + let mut placeholders: std::collections::HashMap = std::collections::HashMap::new(); + for c in &changes { + apply_change(&mut tx, c, cs_dna.as_deref(), &mut placeholders, &mut result).await?; + sqlx::query("UPDATE tree.tree_change SET status = 'APPLIED' WHERE id = $1") + .bind(c.id) + .execute(&mut *tx) + .await?; + } + + sqlx::query("UPDATE tree.change_set SET status = 'APPLIED', promoted_by = $2, promoted_at = now() WHERE id = $1") + .bind(id) + .bind(by) + .execute(&mut *tx) + .await?; + + // The applied change-set altered the served tree — bump the revision marker + // in-transaction so caches (the Edge ETag) revalidate. Atomic with the apply. + crate::tree_revision::bump(&mut *tx).await?; + + tx.commit().await?; + Ok(result) +} + +#[derive(sqlx::FromRow)] +struct TreeChangeRow { + id: i64, + change_type: String, + haplogroup_id: Option, + new_values: Option, +} + +async fn apply_change( + tx: &mut Transaction<'_, Postgres>, + c: &TreeChangeRow, + cs_dna: Option<&str>, + placeholders: &mut std::collections::HashMap, + result: &mut ApplyResult, +) -> Result<(), DbError> { + let nv = c.new_values.clone().unwrap_or(Value::Null); + match c.change_type.as_str() { + "CREATE" => { + let name = jstr(&nv, "name") + .ok_or_else(|| DbError::Conflict("CREATE change missing new_values.name".into()))?; + let dna = jstr(&nv, "haplogroup_type") + .or_else(|| cs_dna.map(str::to_string)) + .ok_or_else(|| DbError::Conflict("CREATE change has no haplogroup_type".into()))?; + // `is_backbone` / `provenance` are optional in new_values: the merge + // engine omits them (COALESCE keeps the column defaults), while the + // SNP-graft writer carries the source's curated backbone flag and a + // provenance record (source name, source_updated). + let new_id: i64 = sqlx::query_scalar( + "INSERT INTO tree.haplogroup (name, haplogroup_type, lineage, source, formed_ybp, tmrca_ybp, is_backbone, provenance) \ + VALUES ($1, $2::core.dna_type, $3, $4, $5, $6, COALESCE($7, false), COALESCE($8, '{}'::jsonb)) RETURNING id", + ) + .bind(&name) + .bind(&dna) + .bind(jstr(&nv, "lineage")) + .bind(jstr(&nv, "source")) + .bind(jint(&nv, "formed_ybp").map(|v| v as i32)) + .bind(jint(&nv, "tmrca_ybp").map(|v| v as i32)) + .bind(jbool(&nv, "is_backbone")) + .bind(jval(&nv, "provenance")) + .fetch_one(&mut **tx) + .await?; + // Parent may be an existing id or a placeholder created earlier in + // this set; None makes a root (no parent edge). + let parent = resolve_ref(&nv, placeholders, "parent_haplogroup_id", "parent_placeholder")?; + if parent.is_some() { + open_edge(tx, new_id, parent, jstr(&nv, "source").as_deref()).await?; + } + for vid in jids(&nv, "variant_ids") { + link_variant(tx, new_id, vid).await?; + } + if let Some(ph) = jint(&nv, "placeholder") { + placeholders.insert(ph, new_id); + } + result.created += 1; + } + "UPDATE" => { + let hid = c.haplogroup_id.ok_or_else(|| DbError::Conflict("UPDATE change missing haplogroup_id".into()))?; + // COALESCE keeps existing values when a field is absent from new_values. + sqlx::query( + "UPDATE tree.haplogroup SET name = COALESCE($2, name), lineage = COALESCE($3, lineage), \ + source = COALESCE($4, source), formed_ybp = COALESCE($5, formed_ybp), \ + tmrca_ybp = COALESCE($6, tmrca_ybp) WHERE id = $1", + ) + .bind(hid) + .bind(jstr(&nv, "name")) + .bind(jstr(&nv, "lineage")) + .bind(jstr(&nv, "source")) + .bind(jint(&nv, "formed_ybp").map(|v| v as i32)) + .bind(jint(&nv, "tmrca_ybp").map(|v| v as i32)) + .execute(&mut **tx) + .await?; + result.updated += 1; + } + "DELETE" => { + let hid = c.haplogroup_id.ok_or_else(|| DbError::Conflict("DELETE change missing haplogroup_id".into()))?; + // Temporal delete: expire the node, then detach by closing all + // current edges + variant links. The tree-navigation queries + // (roots/children/subtree) exclude expired nodes. + sqlx::query("UPDATE tree.haplogroup SET valid_until = now() WHERE id = $1 AND valid_until IS NULL") + .bind(hid) + .execute(&mut **tx) + .await?; + close_current_edges_for(tx, hid).await?; + sqlx::query( + "UPDATE tree.haplogroup_variant SET valid_until = now() \ + WHERE haplogroup_id = $1 AND valid_until IS NULL", + ) + .bind(hid) + .execute(&mut **tx) + .await?; + result.deleted += 1; + } + "REPARENT" => { + let hid = c.haplogroup_id.ok_or_else(|| DbError::Conflict("REPARENT change missing haplogroup_id".into()))?; + let new_parent = match resolve_ref(&nv, placeholders, "new_parent_haplogroup_id", "new_parent_placeholder")? { + Some(p) => Some(p), + None => jint(&nv, "parent_haplogroup_id"), + }; + // Close the current parent edge, then open the new one. + sqlx::query( + "UPDATE tree.haplogroup_relationship SET valid_until = now() \ + WHERE child_haplogroup_id = $1 AND valid_until IS NULL", + ) + .bind(hid) + .execute(&mut **tx) + .await?; + open_edge(tx, hid, new_parent, jstr(&nv, "source").as_deref()).await?; + result.reparented += 1; + } + "VARIANT_EDIT" => { + let hid = c.haplogroup_id.ok_or_else(|| DbError::Conflict("VARIANT_EDIT change missing haplogroup_id".into()))?; + for vid in jids(&nv, "add") { + link_variant(tx, hid, vid).await?; + } + let remove = jids(&nv, "remove"); + if !remove.is_empty() { + sqlx::query( + "UPDATE tree.haplogroup_variant SET valid_until = now() \ + WHERE haplogroup_id = $1 AND variant_id = ANY($2) AND valid_until IS NULL", + ) + .bind(hid) + .bind(&remove) + .execute(&mut **tx) + .await?; + } + result.variant_edits += 1; + } + other => { + tracing::warn!(change_type = other, "unknown tree_change type; skipped"); + result.skipped += 1; + } + } + Ok(()) +} + +/// Open a new current edge (child under parent). `parent` None makes a root. +async fn open_edge( + tx: &mut Transaction<'_, Postgres>, + child: i64, + parent: Option, + source: Option<&str>, +) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO tree.haplogroup_relationship (child_haplogroup_id, parent_haplogroup_id, source) \ + VALUES ($1, $2, $3)", + ) + .bind(child) + .bind(parent) + .bind(source) + .execute(&mut **tx) + .await?; + Ok(()) +} + +/// Close every current edge touching a node (as child or parent). +async fn close_current_edges_for(tx: &mut Transaction<'_, Postgres>, hid: i64) -> Result<(), DbError> { + sqlx::query( + "UPDATE tree.haplogroup_relationship SET valid_until = now() \ + WHERE (child_haplogroup_id = $1 OR parent_haplogroup_id = $1) AND valid_until IS NULL", + ) + .bind(hid) + .execute(&mut **tx) + .await?; + Ok(()) +} + +async fn link_variant(tx: &mut Transaction<'_, Postgres>, hid: i64, vid: i64) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES ($1, $2) \ + ON CONFLICT DO NOTHING", + ) + .bind(hid) + .bind(vid) + .execute(&mut **tx) + .await?; + Ok(()) +} + +// ── small JSON helpers ──────────────────────────────────────────────────────── + +/// Resolve a node reference that may be an existing id (`id_key`) or a +/// placeholder (`ph_key`) created earlier in this apply. A placeholder with no +/// mapping (its CREATE was rejected/not applied) is an unsatisfied dependency. +fn resolve_ref( + nv: &Value, + placeholders: &std::collections::HashMap, + id_key: &str, + ph_key: &str, +) -> Result, DbError> { + if let Some(ph) = jint(nv, ph_key) { + return placeholders + .get(&ph) + .copied() + .map(Some) + .ok_or_else(|| DbError::Conflict(format!("unresolved placeholder {ph} (its CREATE was not applied)"))); + } + Ok(jint(nv, id_key)) +} + +fn jstr(v: &Value, k: &str) -> Option { + v.get(k).and_then(Value::as_str).map(str::to_string) +} +fn jint(v: &Value, k: &str) -> Option { + v.get(k).and_then(Value::as_i64) +} +fn jbool(v: &Value, k: &str) -> Option { + v.get(k).and_then(Value::as_bool) +} +fn jval(v: &Value, k: &str) -> Option { + v.get(k).filter(|x| !x.is_null()).cloned() +} +fn jids(v: &Value, k: &str) -> Vec { + v.get(k) + .and_then(Value::as_array) + .map(|a| a.iter().filter_map(Value::as_i64).collect()) + .unwrap_or_default() +} diff --git a/rust/crates/du-db/src/consent.rs b/rust/crates/du-db/src/consent.rs new file mode 100644 index 00000000..b35de11a --- /dev/null +++ b/rust/crates/du-db/src/consent.rs @@ -0,0 +1,29 @@ +//! GDPR cookie-consent records. The banner POSTs an accept/decline; we persist +//! an audit row in `ident.cookie_consents` (attributed to the signed-in user when +//! there is one, else anonymous) alongside the client-side consent cookie. + +use crate::DbError; +use sqlx::PgPool; +use uuid::Uuid; + +/// Record a consent decision. `user_id` is the signed-in user, if any. +pub async fn record( + pool: &PgPool, + user_id: Option, + consent_given: bool, + policy_version: &str, + user_agent: Option<&str>, +) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO ident.cookie_consents \ + (user_id, consent_given, policy_version, user_agent) \ + VALUES ($1, $2, $3, $4)", + ) + .bind(user_id) + .bind(consent_given) + .bind(policy_version) + .bind(user_agent) + .execute(pool) + .await?; + Ok(()) +} diff --git a/rust/crates/du-db/src/coverage.rs b/rust/crates/du-db/src/coverage.rs new file mode 100644 index 00000000..92517c4e --- /dev/null +++ b/rust/crates/du-db/src/coverage.rs @@ -0,0 +1,250 @@ +//! Coverage benchmark aggregation over `genomics.alignment_metadata.coverage` +//! (JSONB), grouped by sequencing lab and test type. The `meanDepth` expression +//! index from migration 0004 accelerates the JSONB extraction. + +use crate::DbError; +use du_domain::coverage::CoverageBenchmark; +use sqlx::PgPool; +use std::collections::HashMap; + +pub async fn benchmarks(pool: &PgPool) -> Result, DbError> { + #[derive(sqlx::FromRow)] + struct Row { + lab: Option, + test_type: Option, + library_count: i64, + avg_mean_depth: Option, + avg_cov_10x: Option, + expected_min_depth: Option, + } + + let rows: Vec = sqlx::query_as( + "SELECT l.name AS lab, \ + ttd.display_name AS test_type, \ + count(DISTINCT sl.id) AS library_count, \ + avg((am.coverage->>'meanDepth')::double precision) AS avg_mean_depth, \ + avg((am.coverage->>'percent_coverage_at_10x')::double precision) AS avg_cov_10x, \ + ttd.expected_min_depth AS expected_min_depth \ + FROM genomics.alignment_metadata am \ + JOIN genomics.sequence_file sf ON sf.id = am.sequence_file_id \ + JOIN genomics.sequence_library sl ON sl.id = sf.library_id \ + LEFT JOIN genomics.sequencing_lab l ON l.id = sl.lab_id \ + LEFT JOIN genomics.test_type_definition ttd ON ttd.id = sl.test_type_id \ + WHERE am.metric_level = 'CONTIG_OVERALL' \ + GROUP BY l.name, ttd.display_name, ttd.expected_min_depth \ + ORDER BY l.name NULLS LAST, ttd.display_name NULLS LAST", + ) + .fetch_all(pool) + .await?; + + Ok(rows + .into_iter() + .map(|r| CoverageBenchmark { + lab: r.lab, + test_type: r.test_type, + library_count: r.library_count, + avg_mean_depth: r.avg_mean_depth, + avg_cov_10x: r.avg_cov_10x, + expected_min_depth: r.expected_min_depth, + }) + .collect()) +} + +// ── empirical per-test-type coverage norms (D7) ────────────────────────────────── +// +// The cohort norm for each test type, DERIVED from the federated coverage already +// mirrored in `fed.coverage_summary` (joined to `fed.sequencerun.test_type`) plus +// `fed.genotype` marker counts — not hand-curated advertised numbers. Persisted to +// `genomics.test_type_coverage_norm` by a recompute job; read at report-render time +// to compare a sample's actual coverage against what its test type typically +// achieves. Mirrors the sequencer engine's advisory-lock + declarative-recompute +// discipline. + +/// Advisory-lock key guarding concurrent norm recomputes. +const NORMS_ADVISORY_KEY: i64 = 0x434F_5645_524E; // "COVERN" + +/// One test type's empirical coverage norm. +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct CoverageNorm { + pub test_type: String, + pub sample_count: i32, + pub median_mean_depth: Option, + pub p25_mean_depth: Option, + pub p75_mean_depth: Option, + pub median_pct_10x: Option, + pub median_pct_20x: Option, + pub median_pct_30x: Option, + pub typical_y_markers: Option, + pub typical_mt_markers: Option, +} + +/// Outcome of [`recompute_norms`]. +#[derive(Debug, Default, Clone)] +pub struct NormReport { + pub test_types: u64, + pub pruned: u64, +} + +/// Every persisted test-type coverage norm, ordered by test type. +pub async fn norms(pool: &PgPool) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT test_type, sample_count, median_mean_depth, p25_mean_depth, p75_mean_depth, \ + median_pct_10x, median_pct_20x, median_pct_30x, typical_y_markers, typical_mt_markers \ + FROM genomics.test_type_coverage_norm ORDER BY test_type", + ) + .fetch_all(pool) + .await?) +} + +/// The persisted norm for one test type (report-time conformance lookup). +pub async fn norm_for(pool: &PgPool, test_type: &str) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT test_type, sample_count, median_mean_depth, p25_mean_depth, p75_mean_depth, \ + median_pct_10x, median_pct_20x, median_pct_30x, typical_y_markers, typical_mt_markers \ + FROM genomics.test_type_coverage_norm WHERE test_type = $1", + ) + .bind(test_type) + .fetch_optional(pool) + .await?) +} + +/// Recompute the empirical norms from the federated cohort. Single-flighted by an +/// advisory lock (a second caller no-ops); unlocks on every path. +pub async fn recompute_norms(pool: &PgPool) -> Result { + let mut lock = pool.acquire().await?; + let locked: bool = sqlx::query_scalar("SELECT pg_try_advisory_lock($1)") + .bind(NORMS_ADVISORY_KEY) + .fetch_one(&mut *lock) + .await?; + if !locked { + return Ok(NormReport::default()); + } + let result = recompute_norms_locked(pool).await; + let _ = sqlx::query("SELECT pg_advisory_unlock($1)") + .bind(NORMS_ADVISORY_KEY) + .execute(&mut *lock) + .await; + result +} + +async fn recompute_norms_locked(pool: &PgPool) -> Result { + // Depth/coverage norms from the federated alignment cohort, keyed by test type. + #[derive(sqlx::FromRow)] + struct CovAgg { + test_type: String, + sample_count: i32, + median_mean_depth: Option, + p25_mean_depth: Option, + p75_mean_depth: Option, + median_pct_10x: Option, + median_pct_20x: Option, + median_pct_30x: Option, + } + let cov: Vec = sqlx::query_as( + "SELECT sr.test_type AS test_type, count(*)::int AS sample_count, \ + percentile_cont(0.5) WITHIN GROUP (ORDER BY cs.mean_coverage) AS median_mean_depth, \ + percentile_cont(0.25) WITHIN GROUP (ORDER BY cs.mean_coverage) AS p25_mean_depth, \ + percentile_cont(0.75) WITHIN GROUP (ORDER BY cs.mean_coverage) AS p75_mean_depth, \ + percentile_cont(0.5) WITHIN GROUP (ORDER BY cs.pct_10x) AS median_pct_10x, \ + percentile_cont(0.5) WITHIN GROUP (ORDER BY cs.pct_20x) AS median_pct_20x, \ + percentile_cont(0.5) WITHIN GROUP (ORDER BY cs.pct_30x) AS median_pct_30x \ + FROM fed.coverage_summary cs \ + JOIN fed.sequencerun sr ON sr.at_uri = cs.sequence_run_ref \ + WHERE cs.mean_coverage IS NOT NULL AND sr.test_type IS NOT NULL AND btrim(sr.test_type) <> '' \ + GROUP BY sr.test_type", + ) + .fetch_all(pool) + .await?; + + // Typical Y/mt marker counts per test type (for the deferred age weighting). + #[derive(sqlx::FromRow)] + struct MarkerAgg { + test_type: String, + typical_y_markers: Option, + typical_mt_markers: Option, + } + let markers: Vec = sqlx::query_as( + "SELECT test_type_code AS test_type, \ + percentile_cont(0.5) WITHIN GROUP (ORDER BY y_markers_called::double precision)::int AS typical_y_markers, \ + percentile_cont(0.5) WITHIN GROUP (ORDER BY mt_markers_called::double precision)::int AS typical_mt_markers \ + FROM fed.genotype \ + WHERE test_type_code IS NOT NULL AND btrim(test_type_code) <> '' \ + GROUP BY test_type_code", + ) + .fetch_all(pool) + .await?; + + // Merge the two aggregates by test type. + let mut by_type: HashMap = HashMap::new(); + for c in cov { + by_type.insert( + c.test_type.clone(), + CoverageNorm { + test_type: c.test_type, + sample_count: c.sample_count, + median_mean_depth: c.median_mean_depth, + p25_mean_depth: c.p25_mean_depth, + p75_mean_depth: c.p75_mean_depth, + median_pct_10x: c.median_pct_10x, + median_pct_20x: c.median_pct_20x, + median_pct_30x: c.median_pct_30x, + typical_y_markers: None, + typical_mt_markers: None, + }, + ); + } + for m in markers { + let e = by_type.entry(m.test_type.clone()).or_insert_with(|| CoverageNorm { + test_type: m.test_type.clone(), + sample_count: 0, + median_mean_depth: None, + p25_mean_depth: None, + p75_mean_depth: None, + median_pct_10x: None, + median_pct_20x: None, + median_pct_30x: None, + typical_y_markers: None, + typical_mt_markers: None, + }); + e.typical_y_markers = m.typical_y_markers; + e.typical_mt_markers = m.typical_mt_markers; + } + + // Declarative upsert (assign, never accumulate) + prune dropped test types. + let mut tx = pool.begin().await?; + let mut kept: Vec = Vec::new(); + for n in by_type.into_values() { + sqlx::query( + "INSERT INTO genomics.test_type_coverage_norm \ + (test_type, sample_count, median_mean_depth, p25_mean_depth, p75_mean_depth, \ + median_pct_10x, median_pct_20x, median_pct_30x, typical_y_markers, typical_mt_markers, computed_at) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10, now()) \ + ON CONFLICT (test_type) DO UPDATE SET \ + sample_count = EXCLUDED.sample_count, median_mean_depth = EXCLUDED.median_mean_depth, \ + p25_mean_depth = EXCLUDED.p25_mean_depth, p75_mean_depth = EXCLUDED.p75_mean_depth, \ + median_pct_10x = EXCLUDED.median_pct_10x, median_pct_20x = EXCLUDED.median_pct_20x, \ + median_pct_30x = EXCLUDED.median_pct_30x, typical_y_markers = EXCLUDED.typical_y_markers, \ + typical_mt_markers = EXCLUDED.typical_mt_markers, computed_at = now()", + ) + .bind(&n.test_type) + .bind(n.sample_count) + .bind(n.median_mean_depth) + .bind(n.p25_mean_depth) + .bind(n.p75_mean_depth) + .bind(n.median_pct_10x) + .bind(n.median_pct_20x) + .bind(n.median_pct_30x) + .bind(n.typical_y_markers) + .bind(n.typical_mt_markers) + .execute(&mut *tx) + .await?; + kept.push(n.test_type); + } + let pruned = sqlx::query("DELETE FROM genomics.test_type_coverage_norm WHERE test_type <> ALL($1)") + .bind(&kept) + .execute(&mut *tx) + .await? + .rows_affected(); + tx.commit().await?; + Ok(NormReport { test_types: kept.len() as u64, pruned }) +} diff --git a/rust/crates/du-db/src/denovo.rs b/rust/crates/du-db/src/denovo.rs new file mode 100644 index 00000000..f2eeae02 --- /dev/null +++ b/rust/crates/du-db/src/denovo.rs @@ -0,0 +1,476 @@ +//! De-novo tree foundation loader. +//! +//! Ingests the normalized JSON emitted by `~/Genomics/ytree/bin/68_export_ingest.py` +//! (schema: `documents/proposals/denovo-tree-ingestion.md`) as the **sole tree +//! foundation** — the tree we built ourselves from genotypes, not an import to +//! graft onto. Greenfield by design: the caller clears the lineage +//! ([`crate::haplogroup::clear_dna`]) first; this inserts nodes, edges, and +//! defining-variant links, then recomputes the backbone. +//! +//! Builds the full lineage: topology + defining SNPs, sample-leaf placement of the +//! tips (`tree.haplogroup_sample`), and de-novo-vs-reference conflicts for curator +//! triage (`tree.denovo_conflict`). +//! +//! Variant identity: each defining SNP is matched to `core.variant` by **hs1 +//! coordinate** (contig + position + allele-set), reusing the YBrowse-loaded +//! catalog so known SNPs keep their `canonical_name`; unmatched SNPs are minted +//! as de-novo coordinate-named variants. + +use std::collections::{HashMap, HashSet}; + +use du_domain::enums::DnaType; +use serde::Deserialize; +use serde_json::json; +use sqlx::PgPool; +use uuid::Uuid; + +use crate::{pg_enum_label, DbError, Page}; + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct DenovoTree { + pub chromosome: String, + pub haplogroup_type: String, + pub build: String, + pub source: String, + pub root: String, + pub nodes: Vec, + #[serde(default)] + pub tips: Vec, + #[serde(default)] + pub conflicts: Vec, +} + +/// A de-novo-vs-reference placement conflict, surfaced for curator triage. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct DenovoConflict { + pub isogg: Option, + pub label: Option, + pub n_tips: i32, + pub magnitude: i32, + pub home_node: Option, + pub foreign_in: i32, + pub members_away: i32, +} + +/// A tree tip = a cohort sample, placed as a leaf under its `parent_node`. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct DenovoTip { + pub sample: String, + pub parent_node: String, + pub cohort: Option, + pub sex: Option, + pub terminal_label: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct DenovoNode { + pub id: String, + pub parent: Option, + pub support: Option, + pub branch_length: Option, + pub label: Option, + pub isogg: Option, + pub markers_matched: Option, + pub markers_expected: Option, + pub n_mut: Option, + pub n_reversion: Option, + #[serde(default)] + pub defining_variants: Vec, + /// SNPs from weakly-supported branches collapsed below this node — recorded as + /// a tagged block in provenance, NOT as strict defining links (the node's other + /// children don't carry them; their exact placement in the subtree is unresolved). + #[serde(default)] + pub unresolved_variants: Vec, +} + +#[derive(Debug, Deserialize)] +pub struct DenovoVariant { + pub chrom: String, + pub pos: i64, + #[serde(rename = "ref")] + pub ref_: String, + pub alt: String, + pub ancestral: String, + pub derived: String, + #[serde(default)] + pub reversion: bool, + pub polarity: Option, +} + +#[derive(Debug, Default)] +pub struct LoadReport { + pub nodes: usize, + pub edges: usize, + pub variant_links: usize, + pub variants_reused: usize, + pub variants_created: usize, + /// Collapsed-branch SNPs recorded as tagged provenance blocks (not links). + pub unresolved_block: usize, + /// Tips placed as `tree.haplogroup_sample` leaves under their terminal node. + pub tips_placed: usize, + /// New `core.biosample` rows minted for tips (vs reused by accession). + pub biosamples_created: usize, + /// De-novo-vs-reference conflicts recorded for curator triage. + pub conflicts_loaded: usize, +} + +/// UFBoot support → the `confidence_level` text bucket. +fn confidence(support: Option) -> &'static str { + match support { + Some(s) if s >= 95 => "HIGH", + Some(s) if s >= 70 => "MEDIUM", + Some(_) => "LOW", + None => "UNKNOWN", + } +} + +/// Load a de-novo tree document as the tree foundation. Assumes `tree.*` is +/// already cleared (greenfield). Commits the topology, then recomputes backbone +/// and bumps the tree revision. +pub async fn load(pool: &PgPool, doc: &DenovoTree) -> Result { + let dna: DnaType = match doc.haplogroup_type.as_str() { + "Y_DNA" => DnaType::YDna, + "MT_DNA" => DnaType::MtDna, + other => return Err(DbError::Decode(format!("unknown haplogroupType {other:?}"))), + }; + let dna_label = pg_enum_label(&dna)?; + let mut rep = LoadReport::default(); + + let mut tx = pool.begin().await?; + + // 1. Resolve every defining SNP to a core.variant id (catalog reuse or mint), + // caching by (contig, pos, ancestral, derived). Records each node's links + // and remembers any catalog name (for SNP-based node naming below). + // Cache key = hs1 (contig, position, ancestral, derived); value = (variant_id, catalog name). + type VariantKey = (String, i64, String, String); + type ResolvedVariant = (i64, Option); + let mut vcache: HashMap = HashMap::new(); + // node id -> Vec<(variant_id, ancestral, derived)> + let mut node_links: HashMap<&str, Vec<(i64, &str, &str)>> = HashMap::new(); + // node id -> first catalog SNP name (by position order) for naming fallback + let mut node_snp_name: HashMap<&str, String> = HashMap::new(); + + for node in &doc.nodes { + let mut links = Vec::with_capacity(node.defining_variants.len()); + // position-ordered so the naming fallback is deterministic + let mut vars: Vec<&DenovoVariant> = node.defining_variants.iter().collect(); + vars.sort_by_key(|v| v.pos); + for v in vars { + let key = (v.chrom.clone(), v.pos, v.ancestral.clone(), v.derived.clone()); + let (vid, name) = if let Some(hit) = vcache.get(&key) { + hit.clone() + } else { + let resolved = resolve_variant(&mut tx, v, &mut rep).await?; + vcache.insert(key, resolved.clone()); + resolved + }; + if let Some(n) = &name { + node_snp_name.entry(node.id.as_str()).or_insert_with(|| n.clone()); + } + links.push((vid, v.ancestral.as_str(), v.derived.as_str())); + } + node_links.insert(node.id.as_str(), links); + } + + // 2. Assign a display name to each node: ISOGG/PhyloTree label → a catalog SNP + // name on the defining branch → the de-novo NodeN id. Disambiguate + // collisions with the (globally-unique) NodeN. Labeled nodes are processed + // first so they claim their name before fallbacks. + let mut order: Vec<&DenovoNode> = doc.nodes.iter().collect(); + order.sort_by_key(|n| (n.label.is_none(), n.id.as_str().to_string())); + let mut used: HashSet = HashSet::new(); + let mut name_of: HashMap<&str, String> = HashMap::new(); + for node in order { + let base = node + .label + .clone() + .or_else(|| node_snp_name.get(node.id.as_str()).cloned()) + .unwrap_or_else(|| node.id.clone()); + let name = if used.insert(base.clone()) { + base + } else { + // collision → suffix the unique de-novo id + let alt = format!("{base} [{}]", node.id); + used.insert(alt.clone()); + alt + }; + name_of.insert(node.id.as_str(), name); + } + + // 3. Insert nodes, building the NodeN -> db id map. + let mut idmap: HashMap<&str, i64> = HashMap::new(); + for node in &doc.nodes { + let name = &name_of[node.id.as_str()]; + // Collapsed-branch SNPs lifted to this node: a tagged provenance block, not + // defining links (so they don't pollute the strict defining-SNP model). + let unresolved: Vec = node + .unresolved_variants + .iter() + .map(|v| format!("{}:{}{}>{}", v.chrom, v.pos, v.ancestral, v.derived)) + .collect(); + rep.unresolved_block += unresolved.len(); + let prov = json!({ + "source": doc.source, + "denovo_node": node.id, + "isogg": node.isogg, + "label": node.label, + "support": node.support, + "branch_length": node.branch_length, + "markers_matched": node.markers_matched, + "markers_expected": node.markers_expected, + "n_mut": node.n_mut, + "n_reversion": node.n_reversion, + "unresolved_count": unresolved.len(), + "unresolved_variants": unresolved, + "aliases": node.isogg.iter().chain(node.label.iter()).filter(|a| *a != name).collect::>(), + }); + let id: i64 = sqlx::query_scalar( + "INSERT INTO tree.haplogroup \ + (name, haplogroup_type, source, confidence_level, is_backbone, provenance) \ + VALUES ($1, $2::core.dna_type, $3, $4, false, $5) RETURNING id", + ) + .bind(name) + .bind(&dna_label) + .bind(&doc.source) + .bind(confidence(node.support)) + .bind(&prov) + .fetch_one(&mut *tx) + .await?; + idmap.insert(node.id.as_str(), id); + rep.nodes += 1; + } + + // 4. Insert edges (parent → child). + for node in &doc.nodes { + let Some(parent) = &node.parent else { continue }; + let child_id = idmap[node.id.as_str()]; + let parent_id = *idmap + .get(parent.as_str()) + .ok_or_else(|| DbError::Decode(format!("node {} references unknown parent {parent}", node.id)))?; + sqlx::query( + "INSERT INTO tree.haplogroup_relationship (child_haplogroup_id, parent_haplogroup_id, source) \ + VALUES ($1, $2, $3)", + ) + .bind(child_id) + .bind(parent_id) + .bind(&doc.source) + .execute(&mut *tx) + .await?; + rep.edges += 1; + } + + // 5. Link defining variants to their node (the branch leading to it). + for node in &doc.nodes { + let hg_id = idmap[node.id.as_str()]; + for (vid, anc, der) in &node_links[node.id.as_str()] { + sqlx::query( + "INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id, ancestral_allele, derived_allele) \ + VALUES ($1, $2, $3, $4)", + ) + .bind(hg_id) + .bind(vid) + .bind(*anc) + .bind(*der) + .execute(&mut *tx) + .await?; + rep.variant_links += 1; + } + } + + // 6. Place tips as biosample leaves under their terminal (surviving) node. A + // sample (1000G/HGDP/…) is one biosample shared across lineages — get-or-create + // by accession; the placement is dna-scoped (cleared by clear_dna). The de-novo + // tree position is authoritative, so this is a direct placement, not a + // call-resolution (unlike tree_sample::recompute_placements). + for tip in &doc.tips { + let Some(&hg_id) = idmap.get(tip.parent_node.as_str()) else { continue }; + // Public reference panels (PRJEB*) → EXTERNAL/public; anything else (e.g. an + // own genome) stays STANDARD/private. + let (src, is_public) = match tip.cohort.as_deref() { + Some(c) if c.starts_with("PRJEB") => ("EXTERNAL", true), + _ => ("STANDARD", false), + }; + let attrs = json!({ "cohort": tip.cohort, "sex": tip.sex, "denovo": true }); + let guid: Uuid = match sqlx::query_scalar( + "INSERT INTO core.biosample (source, accession, center_name, source_attrs, is_public) \ + VALUES ($1::core.biosample_source, $2, $3, $4, $5) \ + ON CONFLICT (accession) WHERE accession IS NOT NULL DO NOTHING RETURNING sample_guid", + ) + .bind(src) + .bind(&tip.sample) + .bind(tip.cohort.as_deref()) + .bind(&attrs) + .bind(is_public) + .fetch_optional(&mut *tx) + .await? + { + Some(g) => { + rep.biosamples_created += 1; + g + } + None => { + sqlx::query_scalar("SELECT sample_guid FROM core.biosample WHERE accession = $1") + .bind(&tip.sample) + .fetch_one(&mut *tx) + .await? + } + }; + let call = tip.terminal_label.clone().unwrap_or_else(|| tip.sample.clone()); + sqlx::query( + "INSERT INTO tree.haplogroup_sample (sample_guid, dna_type, haplogroup_id, call_text, status, refreshed_at) \ + VALUES ($1, $2::core.dna_type, $3, $4, 'PLACED', now()) \ + ON CONFLICT (sample_guid, dna_type) DO UPDATE \ + SET haplogroup_id = EXCLUDED.haplogroup_id, call_text = EXCLUDED.call_text, \ + status = 'PLACED', refreshed_at = now()", + ) + .bind(guid) + .bind(&dna_label) + .bind(hg_id) + .bind(&call) + .execute(&mut *tx) + .await?; + rep.tips_placed += 1; + } + + // 7. Record de-novo-vs-reference conflicts for curator triage (this dna's prior + // rows were cleared by clear_dna before the load). + for c in &doc.conflicts { + sqlx::query( + "INSERT INTO tree.denovo_conflict \ + (dna_type, haplogroup, label, n_tips, magnitude, home_node, foreign_in, members_away, source) \ + VALUES ($1::core.dna_type, $2, $3, $4, $5, $6, $7, $8, $9)", + ) + .bind(&dna_label) + .bind(c.isogg.as_deref().unwrap_or("?")) + .bind(c.label.as_deref()) + .bind(c.n_tips) + .bind(c.magnitude) + .bind(c.home_node.as_deref()) + .bind(c.foreign_in) + .bind(c.members_away) + .bind(&doc.source) + .execute(&mut *tx) + .await?; + rep.conflicts_loaded += 1; + } + + tx.commit().await?; + + // 8. Post-process (own transactions): backbone + revision bump. + crate::haplogroup::recompute_backbone(pool, dna).await?; + crate::tree_revision::bump(pool).await?; + + Ok(rep) +} + +/// Resolve one de-novo SNP to a `core.variant` id: reuse a catalog row at the +/// same hs1 site (contig + position + allele-set, biallelic, polarity-agnostic), +/// else mint a de-novo coordinate-named variant. Returns `(id, canonical_name?)`. +async fn resolve_variant( + tx: &mut sqlx::PgConnection, + v: &DenovoVariant, + rep: &mut LoadReport, +) -> Result<(i64, Option), DbError> { + // Catalog match by hs1 coordinate. The GIN @> handles the contig+position + // prefilter; the allele-set check disambiguates multiallelic sites. Prefer a + // named row over an unnamed one. + if let Some((id, name)) = sqlx::query_as::<_, (i64, Option)>( + "SELECT id, canonical_name FROM core.variant \ + WHERE coordinates @> jsonb_build_object('hs1', jsonb_build_object('contig', $1::text, 'position', $2::bigint)) \ + AND defining_haplogroup_id IS NULL \ + AND ( (coordinates->'hs1'->>'ancestral' = $3 AND coordinates->'hs1'->>'derived' = $4) \ + OR (coordinates->'hs1'->>'ancestral' = $4 AND coordinates->'hs1'->>'derived' = $3) ) \ + ORDER BY (canonical_name IS NULL), id LIMIT 1", + ) + .bind(&v.chrom) + .bind(v.pos) + .bind(&v.ancestral) + .bind(&v.derived) + .fetch_optional(&mut *tx) + .await? + { + rep.variants_reused += 1; + return Ok((id, name)); + } + + // Mint a de-novo, coordinate-named variant (hs1 frame). Deterministic synthetic + // name so re-runs dedupe; UNNAMED so the curator can later fold it onto a real + // name. Same no-op-write discipline as ensure_variant_by_coords. + let synth = format!("{}:{}{}>{}", v.chrom, v.pos, v.ancestral, v.derived); + let coords = json!({ "hs1": { + "contig": v.chrom, "position": v.pos, "ancestral": v.ancestral, "derived": v.derived + }}); + if let Some(id) = sqlx::query_scalar::<_, i64>( + "INSERT INTO core.variant (canonical_name, mutation_type, naming_status, coordinates) \ + VALUES ($1, 'SNP'::core.mutation_type, 'UNNAMED'::core.naming_status, $2) \ + ON CONFLICT (canonical_name, COALESCE(defining_haplogroup_id, -1)) WHERE canonical_name IS NOT NULL \ + DO NOTHING RETURNING id", + ) + .bind(&synth) + .bind(&coords) + .fetch_optional(&mut *tx) + .await? + { + rep.variants_created += 1; + return Ok((id, Some(synth))); + } + let id = sqlx::query_scalar::<_, i64>( + "SELECT id FROM core.variant WHERE canonical_name = $1 AND defining_haplogroup_id IS NULL", + ) + .bind(&synth) + .fetch_one(&mut *tx) + .await?; + Ok((id, Some(synth))) +} + +/// A row of the curator de-novo-conflict queue. +#[derive(Debug, sqlx::FromRow)] +pub struct ConflictRow { + pub id: i64, + pub dna_type: String, + pub haplogroup: String, + pub label: Option, + pub n_tips: i32, + pub magnitude: i32, + pub home_node: Option, + pub foreign_in: i32, + pub members_away: i32, +} + +/// Paginated de-novo-vs-reference conflicts, worst (highest magnitude) first, +/// optionally filtered to one lineage. Read-only triage queue for `/curator/denovo-conflicts`. +pub async fn list_conflicts( + pool: &PgPool, + dna: Option, + page: i64, + page_size: i64, +) -> Result, DbError> { + let offset = Page::<()>::offset(page, page_size); + let limit = page_size.clamp(1, 200); + let dna_label = match dna { + Some(d) => Some(pg_enum_label(&d)?), + None => None, + }; + let total: i64 = sqlx::query_scalar( + "SELECT count(*) FROM tree.denovo_conflict WHERE ($1::text IS NULL OR dna_type::text = $1)", + ) + .bind(&dna_label) + .fetch_one(pool) + .await?; + let items: Vec = sqlx::query_as( + "SELECT id, dna_type::text AS dna_type, haplogroup, label, n_tips, magnitude, \ + home_node, foreign_in, members_away \ + FROM tree.denovo_conflict WHERE ($1::text IS NULL OR dna_type::text = $1) \ + ORDER BY magnitude DESC, n_tips DESC, id LIMIT $2 OFFSET $3", + ) + .bind(&dna_label) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + Ok(Page { items, total, page: page.max(1), page_size: limit }) +} diff --git a/rust/crates/du-db/src/discovery.rs b/rust/crates/du-db/src/discovery.rs new file mode 100644 index 00000000..f0510910 --- /dev/null +++ b/rust/crates/du-db/src/discovery.rs @@ -0,0 +1,581 @@ +//! Haplogroup-discovery **variant-set consensus engine** (D6). +//! +//! Citizens publish their private variants (mutations beyond their terminal +//! haplogroup) as `com.decodingus.atmosphere.privateVariant` records; the Jetstream +//! consumer mirrors them into `fed.private_variant`. This engine: +//! 1. **materializes** them into `tree.biosample_private_variant` (resolving each +//! variant to a `core.variant` id, the biosample to its sample_guid, and the +//! terminal haplogroup name to an id), and +//! 2. **pools** the per-sample variant sets into `tree.proposed_branch` by +//! variant-set **Jaccard** similarity — clustering samples that share a private +//! branch, scoring confidence, and flagging partial overlaps for curator split. +//! +//! Structure mirrors [`crate::sequencer`]'s consensus engine: an advisory-lock +//! wrapper, a **declarative** recompute (counts are *recomputed and assigned* each +//! run, never incremented — so repeated runs are idempotent), stable proposal ids +//! via a partial-unique `cluster_key`, and preservation of curator decisions +//! (`ACCEPTED`/`REJECTED`/`PROMOTED` are never auto-touched). Coexists with the +//! event-driven [`crate::proposal::submit`] path (whose proposals have a NULL +//! `cluster_key` and are excluded from the engine's upsert space). + +use crate::DbError; +use serde_json::Value; +use sqlx::PgPool; +use std::collections::HashMap; +use uuid::Uuid; + +/// Advisory-lock key guarding concurrent recomputes (hourly job vs. manual run). +const DISCOVERY_ADVISORY_KEY: i64 = 0x4453_4356_5253; // "DSCVRS" + +/// Per-DNA-arm consensus thresholds (seeded in `tree.discovery_config`, mig 0029). +#[derive(Debug, Clone)] +pub struct DnaThresholds { + pub consensus_threshold: i64, + pub auto_promote_threshold: i64, + pub confidence_threshold: f64, + pub similarity_match_threshold: f64, + pub similarity_split_threshold: f64, +} + +impl Default for DnaThresholds { + fn default() -> Self { + Self { + consensus_threshold: 3, + auto_promote_threshold: 10, + confidence_threshold: 0.95, + similarity_match_threshold: 0.80, + similarity_split_threshold: 0.50, + } + } +} + +/// Discovery engine configuration (thresholds + confidence weights + flags). +#[derive(Debug, Clone)] +pub struct DiscoveryConfig { + pub y: DnaThresholds, + pub mt: DnaThresholds, + /// Auto-promote unanimous, named, well-supported clusters (default off). + pub auto_promote: bool, + /// A contributor whose cross-technology consensus confidence is below this floor + /// (or whose reconciliation is `INCOMPATIBLE`) is excluded from pooling — its + /// shaky calls can't drive a branch. Un-reconciled samples are kept (un-gated). + pub min_consensus_confidence: f64, + pub w_count: f64, + pub w_submitters: f64, + pub w_consistency: f64, + /// Weight on the cluster's mean consensus reliability (un-reconciled = full). + pub w_reliability: f64, +} + +impl Default for DiscoveryConfig { + fn default() -> Self { + Self { + y: DnaThresholds::default(), + mt: DnaThresholds::default(), + auto_promote: false, + min_consensus_confidence: 0.5, + w_count: 0.35, + w_submitters: 0.2, + w_consistency: 0.25, + w_reliability: 0.2, + } + } +} + +impl DiscoveryConfig { + fn thresholds(&self, dna: &str) -> &DnaThresholds { + if dna == "MT_DNA" { + &self.mt + } else { + &self.y + } + } +} + +/// Read `tree.discovery_config` (key → JSONB), falling back to [`Default`]. +pub async fn load_config(pool: &PgPool) -> Result { + let rows: Vec<(String, Value)> = + sqlx::query_as("SELECT config_key, config_value FROM tree.discovery_config") + .fetch_all(pool) + .await?; + let map: HashMap = rows.into_iter().collect(); + let mut cfg = DiscoveryConfig::default(); + let parse = |v: Option<&Value>, base: DnaThresholds| -> DnaThresholds { + let Some(v) = v else { return base }; + DnaThresholds { + consensus_threshold: v.get("consensus_threshold").and_then(Value::as_i64).unwrap_or(base.consensus_threshold), + auto_promote_threshold: v.get("auto_promote_threshold").and_then(Value::as_i64).unwrap_or(base.auto_promote_threshold), + confidence_threshold: v.get("confidence_threshold").and_then(Value::as_f64).unwrap_or(base.confidence_threshold), + similarity_match_threshold: v.get("similarity_match_threshold").and_then(Value::as_f64).unwrap_or(base.similarity_match_threshold), + similarity_split_threshold: v.get("similarity_split_threshold").and_then(Value::as_f64).unwrap_or(base.similarity_split_threshold), + } + }; + cfg.y = parse(map.get("thresholds_Y_DNA"), DnaThresholds::default()); + cfg.mt = parse(map.get("thresholds_MT_DNA"), DnaThresholds::default()); + if let Some(w) = map.get("confidence_weights") { + cfg.w_count = w.get("w_count").and_then(Value::as_f64).unwrap_or(cfg.w_count); + cfg.w_submitters = w.get("w_submitters").and_then(Value::as_f64).unwrap_or(cfg.w_submitters); + cfg.w_consistency = w.get("w_consistency").and_then(Value::as_f64).unwrap_or(cfg.w_consistency); + cfg.w_reliability = w.get("w_reliability").and_then(Value::as_f64).unwrap_or(cfg.w_reliability); + } + if let Some(e) = map.get("engine") { + cfg.auto_promote = e.get("auto_promote").and_then(Value::as_bool).unwrap_or(cfg.auto_promote); + cfg.min_consensus_confidence = + e.get("min_consensus_confidence").and_then(Value::as_f64).unwrap_or(cfg.min_consensus_confidence); + } + Ok(cfg) +} + +/// Outcome of [`recompute_consensus`]. +#[derive(Debug, Default, Clone)] +pub struct DiscoveryReport { + pub bpv_upserted: u64, + pub bpv_pruned: u64, + pub samples_unresolved: u64, + pub proposals_active: u64, + pub proposals_ready: u64, + pub split_flagged: u64, + pub auto_promoted: u64, +} + +/// Recompute the discovery consensus from `fed.private_variant`. Single-flighted by +/// a session advisory lock (a second caller no-ops). Unlocks on every path. +pub async fn recompute_consensus(pool: &PgPool, cfg: &DiscoveryConfig) -> Result { + let mut lock_conn = pool.acquire().await?; + let locked: bool = sqlx::query_scalar("SELECT pg_try_advisory_lock($1)") + .bind(DISCOVERY_ADVISORY_KEY) + .fetch_one(&mut *lock_conn) + .await?; + if !locked { + return Ok(DiscoveryReport::default()); + } + let result = recompute_locked(pool, cfg).await; + let _ = sqlx::query("SELECT pg_advisory_unlock($1)") + .bind(DISCOVERY_ADVISORY_KEY) + .execute(&mut *lock_conn) + .await; + result +} + +async fn recompute_locked(pool: &PgPool, cfg: &DiscoveryConfig) -> Result { + let mut rep = DiscoveryReport::default(); + materialize(pool, &mut rep).await?; + let auto_ids = pool_and_propose(pool, cfg, &mut rep).await?; + // Auto-promote (off by default; only named clusters succeed — promote requires a + // name, so unnamed engine proposals stay ACCEPTED for a curator to name+promote). + for id in auto_ids { + match crate::proposal::promote(pool, id, "discovery-engine").await { + Ok(_) => rep.auto_promoted += 1, + Err(e) => tracing::info!(proposal = id, error = %e, "auto-promote skipped (likely unnamed)"), + } + } + Ok(rep) +} + +// ── materialization: fed.private_variant → tree.biosample_private_variant ───────── + +/// A fed.private_variant row joined to its resolved sample_guid (NULL when the +/// biosample isn't mirrored): (sample_guid, dna_type, terminal_haplogroup, variants). +type FedRow = (Option, Option, Option, Value); + +async fn materialize(pool: &PgPool, rep: &mut DiscoveryReport) -> Result<(), DbError> { + let rows: Vec = sqlx::query_as( + "SELECT b.sample_guid, pv.dna_type, pv.terminal_haplogroup, pv.variants \ + FROM fed.private_variant pv \ + LEFT JOIN core.biosample b ON b.atproto->>'uri' = pv.biosample_ref", + ) + .fetch_all(pool) + .await?; + + let mut tx = pool.begin().await?; + // (sample_guid, variant_id) pairs materialized this run — the prune keep-set. + let mut keep_samples: Vec = Vec::new(); + let mut keep_variants: Vec = Vec::new(); + + for (sample_guid, dna_type, terminal, variants) in &rows { + let Some(sample_guid) = sample_guid else { + rep.samples_unresolved += 1; + continue; + }; + let dna = dna_type.as_deref().unwrap_or("Y_DNA"); + let Some(items) = variants.as_array() else { continue }; + for v in items { + // Resolve to a core.variant id: named -> by name; else by coordinates. + let variant_id = if let Some(name) = v.get("name").and_then(Value::as_str).filter(|s| !s.is_empty()) { + crate::variant::ensure_base_variant_id(&mut tx, name).await? + } else if let (Some(contig), Some(pos)) = + (v.get("contig").and_then(Value::as_str), v.get("position").and_then(Value::as_i64)) + { + let anc = v.get("ancestral").and_then(Value::as_str); + let der = v.get("derived").and_then(Value::as_str); + crate::variant::ensure_variant_by_coords(&mut tx, contig, pos, anc, der).await? + } else { + continue; // malformed element + }; + + sqlx::query( + "INSERT INTO tree.biosample_private_variant \ + (sample_guid, variant_id, haplogroup_type, terminal_haplogroup_id, status) \ + VALUES ($1, $2, $3::core.dna_type, \ + (SELECT id FROM tree.haplogroup WHERE name = $4 AND haplogroup_type = $3::core.dna_type \ + AND valid_until IS NULL LIMIT 1), 'ACTIVE') \ + ON CONFLICT (sample_guid, variant_id, haplogroup_type) DO UPDATE SET \ + terminal_haplogroup_id = EXCLUDED.terminal_haplogroup_id, \ + status = CASE WHEN tree.biosample_private_variant.status IN ('INVALIDATED','PROMOTED') \ + THEN tree.biosample_private_variant.status ELSE 'ACTIVE' END", + ) + .bind(sample_guid) + .bind(variant_id) + .bind(dna) + .bind(terminal) + .execute(&mut *tx) + .await?; + rep.bpv_upserted += 1; + keep_samples.push(*sample_guid); + keep_variants.push(variant_id); + } + } + + // Prune ACTIVE rows no longer backed by a current fed record (paired arrays as + // the keep-set; an empty set prunes all ACTIVE, which is correct when fed empties). + rep.bpv_pruned = sqlx::query( + "DELETE FROM tree.biosample_private_variant bpv \ + WHERE bpv.status = 'ACTIVE' AND NOT EXISTS ( \ + SELECT 1 FROM unnest($1::uuid[], $2::bigint[]) AS k(sg, vid) \ + WHERE k.sg = bpv.sample_guid AND k.vid = bpv.variant_id)", + ) + .bind(&keep_samples) + .bind(&keep_variants) + .execute(&mut *tx) + .await? + .rows_affected(); + + tx.commit().await?; + Ok(()) +} + +/// After a proposal is promoted to a new terminal haplogroup, freeze its +/// contributing samples: mark their `tree.biosample_private_variant` rows for the +/// promoted (defining) variants `PROMOTED` and bump their `terminal_haplogroup_id` +/// to the new branch. This is the only writable per-sample assignment record (there +/// is no `biosample_haplogroup` table), and marking them `PROMOTED` also freezes +/// them out of the recompute loop (materialize/prune/pool all act on `ACTIVE` only), +/// so a promoted contribution never re-pools into a fresh proposal. Runs inside the +/// promote transaction. Returns the number of rows reassigned. +pub async fn reassign_after_promote( + conn: &mut sqlx::PgConnection, + proposed_branch_id: i64, + new_haplogroup_id: i64, +) -> Result { + let affected = sqlx::query( + "UPDATE tree.biosample_private_variant bpv \ + SET status = 'PROMOTED', terminal_haplogroup_id = $2 \ + WHERE bpv.status = 'ACTIVE' \ + AND bpv.sample_guid IN ( \ + SELECT unnest(discovery_sample_guids) FROM tree.proposed_branch WHERE id = $1) \ + AND bpv.variant_id IN ( \ + SELECT variant_id FROM tree.proposed_branch_variant WHERE proposed_branch_id = $1)", + ) + .bind(proposed_branch_id) + .bind(new_haplogroup_id) + .execute(conn) + .await? + .rows_affected(); + Ok(affected) +} + +// ── pooling: per-sample variant sets → proposed branches (Jaccard) ─────────────── + +/// One sample's private-variant set under a terminal. +struct SampleSet { + sample_guid: Uuid, + vset: Vec, // sorted ascending +} + +/// A cluster of samples that share a private branch. +struct Cluster { + members: Vec, + /// Union of member variant ids (sorted) — the proposed branch's defining set. + union: Vec, + /// supporting_sample_count per union variant (parallel to `union`). + counts: Vec, + /// Mean member completeness vs. the union (∈ (0,1]); the consistency signal. + consistency: f64, + /// A diverging peer was seen (Jaccard in [split, match)) — flag for curator split. + split: bool, +} + +fn jaccard(a: &[i64], b: &[i64]) -> f64 { + // Both sorted ascending; merge to count intersection. + let (mut i, mut j, mut inter) = (0usize, 0usize, 0usize); + while i < a.len() && j < b.len() { + match a[i].cmp(&b[j]) { + std::cmp::Ordering::Less => i += 1, + std::cmp::Ordering::Greater => j += 1, + std::cmp::Ordering::Equal => { + inter += 1; + i += 1; + j += 1; + } + } + } + let union = a.len() + b.len() - inter; + if union == 0 { + 0.0 + } else { + inter as f64 / union as f64 + } +} + +/// Deterministic seed-based clustering of one bucket's sample-sets. Sets are sorted +/// (size desc, then min variant id, then guid) so the result is independent of DB +/// row order. Each unassigned set seeds a cluster; later sets join if their Jaccard +/// with the seed ≥ match; sets in [split, match) flag the seed for split review. +fn cluster_bucket(mut sets: Vec, th: &DnaThresholds) -> Vec { + sets.sort_by(|a, b| { + b.vset.len().cmp(&a.vset.len()) + .then_with(|| a.vset.first().cmp(&b.vset.first())) + .then_with(|| a.sample_guid.cmp(&b.sample_guid)) + }); + let n = sets.len(); + let mut assigned = vec![false; n]; + let mut clusters = Vec::new(); + + for i in 0..n { + if assigned[i] { + continue; + } + assigned[i] = true; + let seed = sets[i].vset.clone(); + let mut members = vec![sets[i].sample_guid]; + let mut member_sets = vec![sets[i].vset.clone()]; + let mut split = false; + for (j, item) in sets.iter().enumerate().skip(i + 1) { + if assigned[j] { + continue; + } + let sim = jaccard(&seed, &item.vset); + if sim >= th.similarity_match_threshold { + assigned[j] = true; + members.push(item.sample_guid); + member_sets.push(item.vset.clone()); + } else if sim >= th.similarity_split_threshold { + split = true; // diverging peer — leave it to seed its own cluster + } + } + // Union + per-variant support across cluster members. + let mut support: std::collections::BTreeMap = std::collections::BTreeMap::new(); + for ms in &member_sets { + for &vid in ms { + *support.entry(vid).or_insert(0) += 1; + } + } + let union: Vec = support.keys().copied().collect(); + let counts: Vec = union.iter().map(|v| support[v]).collect(); + let consistency = if union.is_empty() { + 0.0 + } else { + member_sets.iter().map(|ms| ms.len() as f64 / union.len() as f64).sum::() / member_sets.len() as f64 + }; + clusters.push(Cluster { members, union, counts, consistency, split }); + } + clusters +} + +/// Pool ACTIVE private variants into proposed branches; returns proposal ids eligible +/// for auto-promotion (when `cfg.auto_promote`). +async fn pool_and_propose(pool: &PgPool, cfg: &DiscoveryConfig, rep: &mut DiscoveryReport) -> Result, DbError> { + // Per-sample variant sets under a resolved terminal. + let raw: Vec<(Uuid, i64, String, Vec)> = sqlx::query_as( + "SELECT sample_guid, terminal_haplogroup_id, haplogroup_type::text AS dna, \ + array_agg(variant_id ORDER BY variant_id) AS vset \ + FROM tree.biosample_private_variant \ + WHERE status = 'ACTIVE' AND terminal_haplogroup_id IS NOT NULL \ + GROUP BY sample_guid, terminal_haplogroup_id, haplogroup_type", + ) + .fetch_all(pool) + .await?; + + // sample_guid → repo_did (distinct-submitter signal). + let submitter: HashMap> = sqlx::query_as::<_, (Uuid, Option)>( + "SELECT sample_guid, atproto->>'repo_did' FROM core.biosample WHERE atproto IS NOT NULL", + ) + .fetch_all(pool) + .await? + .into_iter() + .collect(); + + // (sample_guid, dna) → cross-technology consensus reliability (confidence, + // compatibility), via the citizen's repo DID = the reconciliation publisher's DID. + // Drives both the exclusion gate and the confidence down-weight below. + let reliability: HashMap<(Uuid, String), (Option, Option)> = + sqlx::query_as::<_, (Uuid, String, Option, Option)>( + "SELECT DISTINCT ON (b.sample_guid, r.dna_type) b.sample_guid, r.dna_type, r.confidence, r.compatibility_level \ + FROM core.biosample b \ + JOIN fed.haplogroup_reconciliation r ON r.did = b.atproto->>'repo_did' \ + WHERE b.atproto IS NOT NULL AND r.dna_type IS NOT NULL \ + ORDER BY b.sample_guid, r.dna_type, r.run_count DESC NULLS LAST, r.time_us DESC", + ) + .fetch_all(pool) + .await? + .into_iter() + .map(|(g, dna, conf, compat)| ((g, dna), (conf, compat))) + .collect(); + + // Bucket by (parent terminal, dna). + let mut buckets: HashMap<(i64, String), Vec> = HashMap::new(); + for (sample_guid, terminal, dna, vset) in raw { + buckets.entry((terminal, dna)).or_default().push(SampleSet { sample_guid, vset }); + } + + let mut tx = pool.begin().await?; + let mut kept_ids: Vec = Vec::new(); + let mut auto_ids: Vec = Vec::new(); + + // Deterministic bucket order. + let mut keys: Vec<(i64, String)> = buckets.keys().cloned().collect(); + keys.sort(); + + for key in keys { + let (parent_id, dna) = key.clone(); + let th = cfg.thresholds(&dna); + let sets = buckets.remove(&key).unwrap(); + // Reliability gate: drop members whose consensus is INCOMPATIBLE or below the + // confidence floor. Un-reconciled samples (no entry) are kept (un-gated). + let sets: Vec = sets + .into_iter() + .filter(|s| match reliability.get(&(s.sample_guid, dna.clone())) { + Some((conf, compat)) => { + compat.as_deref() != Some("INCOMPATIBLE") + && conf.map(|c| c >= cfg.min_consensus_confidence).unwrap_or(true) + } + None => true, + }) + .collect(); + if sets.is_empty() { + continue; + } + for c in cluster_bucket(sets, th) { + let count = c.members.len() as i64; + let submitters = c + .members + .iter() + .filter_map(|g| submitter.get(g).and_then(|d| d.clone())) + .collect::>() + .len() + .max(c.members.len()) as i64; // fall back to sample count if dids missing + let f_count = (count as f64 / th.consensus_threshold.max(1) as f64).min(1.0); + let f_sub = (submitters as f64 / th.consensus_threshold.max(1) as f64).min(1.0); + // Mean cross-technology consensus reliability of the cluster's members + // (un-reconciled = full credit, so the unknown isn't penalized). + let mean_rel = c + .members + .iter() + .map(|g| reliability.get(&(*g, dna.clone())).and_then(|(c, _)| *c).unwrap_or(1.0)) + .sum::() + / count as f64; + let confidence = (cfg.w_count * f_count + + cfg.w_submitters * f_sub + + cfg.w_consistency * c.consistency + + cfg.w_reliability * mean_rel) + .min(1.0); + let ready = count >= th.consensus_threshold && confidence >= th.confidence_threshold; + let auto = cfg.auto_promote + && count >= th.auto_promote_threshold + && confidence >= th.confidence_threshold + && c.consistency >= 0.999; + let status = if auto { + "ACCEPTED" + } else if c.split { + "SPLIT_CANDIDATE" + } else if ready { + "READY_FOR_REVIEW" + } else { + "PROPOSED" + }; + let cluster_key: String = c.union.iter().map(|v| v.to_string()).collect::>().join(","); + let guids: Vec = c.members.clone(); + + let id: i64 = sqlx::query_scalar( + "INSERT INTO tree.proposed_branch \ + (parent_haplogroup_id, haplogroup_type, cluster_key, discovery_sample_guids, \ + evidence_count, confidence, proposed_by, status) \ + VALUES ($1, $2::core.dna_type, $3, $4, $5, $6::float8::numeric, 'discovery-engine', $7) \ + ON CONFLICT (parent_haplogroup_id, haplogroup_type, cluster_key) \ + WHERE status IN ('PROPOSED','UNDER_REVIEW','READY_FOR_REVIEW','SPLIT_CANDIDATE') AND cluster_key IS NOT NULL \ + DO UPDATE SET discovery_sample_guids = EXCLUDED.discovery_sample_guids, \ + evidence_count = EXCLUDED.evidence_count, confidence = EXCLUDED.confidence, \ + status = EXCLUDED.status \ + RETURNING id", + ) + .bind(parent_id) + .bind(&dna) + .bind(&cluster_key) + .bind(&guids) + .bind(count as i32) + .bind(confidence) + .bind(status) + .fetch_one(&mut *tx) + .await?; + + // Rebuild the defining-variant set (declarative — DELETE then insert). + sqlx::query("DELETE FROM tree.proposed_branch_variant WHERE proposed_branch_id = $1") + .bind(id) + .execute(&mut *tx) + .await?; + sqlx::query( + "INSERT INTO tree.proposed_branch_variant (proposed_branch_id, variant_id, supporting_sample_count) \ + SELECT $1, vid, cnt FROM unnest($2::bigint[], $3::int[]) AS t(vid, cnt)", + ) + .bind(id) + .bind(&c.union) + .bind(&c.counts) + .execute(&mut *tx) + .await?; + + // Refresh the engine's own split-candidate evidence row (idempotent). + sqlx::query( + "DELETE FROM tree.proposed_branch_evidence \ + WHERE proposed_branch_id = $1 AND evidence_type = 'SPLIT_CANDIDATE'", + ) + .bind(id) + .execute(&mut *tx) + .await?; + if c.split { + rep.split_flagged += 1; + sqlx::query( + "INSERT INTO tree.proposed_branch_evidence (proposed_branch_id, evidence_type, evidence_detail) \ + VALUES ($1, 'SPLIT_CANDIDATE', $2)", + ) + .bind(id) + .bind(serde_json::json!({ "note": "a sample partially overlaps this branch (Jaccard in [split, match))" })) + .execute(&mut *tx) + .await?; + } + + kept_ids.push(id); + rep.proposals_active += 1; + if ready { + rep.proposals_ready += 1; + } + if auto { + auto_ids.push(id); + } + } + } + + // Prune engine-owned open proposals not regenerated this run. Preserves + // curator-decided states and submit()-created (NULL cluster_key) proposals. + sqlx::query( + "DELETE FROM tree.proposed_branch \ + WHERE cluster_key IS NOT NULL \ + AND status IN ('PROPOSED','UNDER_REVIEW','READY_FOR_REVIEW','SPLIT_CANDIDATE') \ + AND id <> ALL($1)", + ) + .bind(&kept_ids) + .execute(&mut *tx) + .await?; + + tx.commit().await?; + Ok(auto_ids) +} diff --git a/rust/crates/du-db/src/exchange.rs b/rust/crates/du-db/src/exchange.rs new file mode 100644 index 00000000..e029340c --- /dev/null +++ b/rust/crates/du-db/src/exchange.rs @@ -0,0 +1,361 @@ +//! D1 encrypted-exchange **broker** (AppView side) — PII-free. The AppView never +//! sees plaintext or session keys: it records consent (the handler verifies the +//! Ed25519 DID signatures), mirrors published X25519 keys, gates **dual consent** +//! into a session, and blind-relays ciphertext envelopes. This module is pure +//! storage + state transitions; the signature verification lives in the `du-web` +//! handler (it needs async DID resolution). +//! +//! **Canonical signed messages** ([`messages`]) are a cross-repo contract: the +//! Navigator Edge must sign byte-identical strings. Keep them stable. + +use crate::DbError; +use serde_json::Value; +use sqlx::PgPool; +use uuid::Uuid; + +/// The exact bytes each record's Ed25519 signature is computed over. A cross-repo +/// contract with the Navigator Edge — do not reorder or reformat. +pub mod messages { + pub fn request(request_uri: &str, initiator_did: &str, partner_did: &str, purpose: &str, scope: Option<&str>) -> String { + format!("exchange-request\n{request_uri}\n{initiator_did}\n{partner_did}\n{purpose}\n{}", scope.unwrap_or("")) + } + pub fn consent(request_uri: &str, consenting_did: &str, given: bool) -> String { + format!("exchange-consent\n{request_uri}\n{consenting_did}\n{given}") + } + pub fn publickey(did: &str, x25519_pub_b64: &str, key_uri: Option<&str>) -> String { + format!("exchange-publickey\n{did}\n{x25519_pub_b64}\n{}", key_uri.unwrap_or("")) + } + /// Replay-guarded poll: caller proves it is `did` at `ts` (unix seconds). + pub fn poll(did: &str, ts: i64) -> String { + format!("exchange-poll\n{did}\n{ts}") + } + pub fn relay(session_id: &str, from_did: &str, to_did: &str, seq: i32, blob_sha256_b64: &str) -> String { + format!("exchange-relay\n{session_id}\n{from_did}\n{to_did}\n{seq}\n{blob_sha256_b64}") + } + pub fn ack(did: &str, envelope_id: i64) -> String { + format!("exchange-ack\n{did}\n{envelope_id}") + } +} + +// ── published X25519 keys ───────────────────────────────────────────────────── + +/// Upsert a DID's published X25519 key (the handler has verified the signature). +pub async fn publish_key(pool: &PgPool, did: &str, x25519_pub: &[u8], key_uri: Option<&str>) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO exchange.exchange_publickey (did, x25519_pub, key_uri, sig_verified_at) \ + VALUES ($1, $2, $3, now()) \ + ON CONFLICT (did) DO UPDATE SET x25519_pub = EXCLUDED.x25519_pub, key_uri = EXCLUDED.key_uri, \ + sig_verified_at = now()", + ) + .bind(did) + .bind(x25519_pub) + .bind(key_uri) + .execute(pool) + .await?; + Ok(()) +} + +/// A peer's published exchange key. +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct PublicKey { + pub did: String, + pub x25519_pub: Vec, + pub key_uri: Option, +} + +pub async fn key_for(pool: &PgPool, did: &str) -> Result, DbError> { + Ok(sqlx::query_as("SELECT did, x25519_pub, key_uri FROM exchange.exchange_publickey WHERE did = $1") + .bind(did) + .fetch_optional(pool) + .await?) +} + +// ── requests + dual-consent ─────────────────────────────────────────────────── + +/// A new (verified) exchange request. +pub struct NewRequest<'a> { + pub request_uri: &'a str, + pub initiator_did: &'a str, + pub partner_did: &'a str, + pub purpose: &'a str, + pub scope: Option<&'a str>, + pub details: Value, +} + +/// A request's routing metadata — used by the D5 ACL gate (the `scope` carries +/// `project:` when the exchange is project-scoped). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct RequestMeta { + pub initiator_did: String, + pub partner_did: String, + pub scope: Option, +} + +/// Fetch a request's routing metadata (for the project-scope ACL check). +pub async fn request_meta(pool: &PgPool, request_uri: &str) -> Result, DbError> { + Ok(sqlx::query_as("SELECT initiator_did, partner_did, scope FROM exchange.exchange_request WHERE request_uri = $1") + .bind(request_uri) + .fetch_optional(pool) + .await?) +} + +/// Record a signed exchange request (idempotent on `request_uri`). +pub async fn create_request(pool: &PgPool, r: &NewRequest<'_>) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO exchange.exchange_request \ + (request_uri, initiator_did, partner_did, purpose, scope, details) \ + VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT (request_uri) DO NOTHING", + ) + .bind(r.request_uri) + .bind(r.initiator_did) + .bind(r.partner_did) + .bind(r.purpose) + .bind(r.scope) + .bind(&r.details) + .execute(pool) + .await?; + Ok(()) +} + +/// Outcome of recording a consent. +#[derive(Debug, PartialEq, Eq)] +pub enum ConsentOutcome { + /// Recorded; still waiting on the other party. + Recorded, + /// Both parties consented — a session was created. + Consented(Uuid), + /// This party declined — the request is dead. + Declined, + /// No such request. + Unknown, +} + +/// Record a signed consent and apply the **dual-consent gate**: when both the +/// initiator and partner have a `consent_given = true` consent, flip the request to +/// `CONSENTED` and open an `exchange_session`. A `false` consent declines the request. +pub async fn record_consent( + pool: &PgPool, + request_uri: &str, + consenting_did: &str, + given: bool, + consent_uri: Option<&str>, + signature: &str, +) -> Result { + let mut tx = pool.begin().await?; + let req: Option<(String, String, String)> = sqlx::query_as( + "SELECT initiator_did, partner_did, status FROM exchange.exchange_request WHERE request_uri = $1 FOR UPDATE", + ) + .bind(request_uri) + .fetch_optional(&mut *tx) + .await?; + let Some((initiator, partner, status)) = req else { + return Ok(ConsentOutcome::Unknown); + }; + + sqlx::query( + "INSERT INTO exchange.exchange_consent (request_uri, consenting_did, consent_given, consent_uri, signature) \ + VALUES ($1, $2, $3, $4, $5) \ + ON CONFLICT (request_uri, consenting_did) DO UPDATE SET \ + consent_given = EXCLUDED.consent_given, consent_uri = EXCLUDED.consent_uri, signature = EXCLUDED.signature", + ) + .bind(request_uri) + .bind(consenting_did) + .bind(given) + .bind(consent_uri) + .bind(signature) + .execute(&mut *tx) + .await?; + + if !given { + sqlx::query("UPDATE exchange.exchange_request SET status = 'DECLINED', updated_at = now() WHERE request_uri = $1") + .bind(request_uri) + .execute(&mut *tx) + .await?; + tx.commit().await?; + return Ok(ConsentOutcome::Declined); + } + + // Both participants must have an affirmative consent on record. + let yes_count: i64 = sqlx::query_scalar( + "SELECT count(*) FROM exchange.exchange_consent \ + WHERE request_uri = $1 AND consent_given = true AND consenting_did IN ($2, $3)", + ) + .bind(request_uri) + .bind(&initiator) + .bind(&partner) + .fetch_one(&mut *tx) + .await?; + + if yes_count >= 2 && status == "PENDING" { + sqlx::query("UPDATE exchange.exchange_request SET status = 'CONSENTED', updated_at = now() WHERE request_uri = $1") + .bind(request_uri) + .execute(&mut *tx) + .await?; + let session_id: Uuid = sqlx::query_scalar( + "INSERT INTO exchange.exchange_session (request_uri, status, expires_at) \ + VALUES ($1, 'ESTABLISHING', now() + interval '7 days') RETURNING session_id", + ) + .bind(request_uri) + .fetch_one(&mut *tx) + .await?; + tx.commit().await?; + return Ok(ConsentOutcome::Consented(session_id)); + } + tx.commit().await?; + Ok(ConsentOutcome::Recorded) +} + +/// An exchange-ready session for a participant (the partner DID + its key pointer). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct ExchangeReady { + pub session_id: Uuid, + pub request_uri: String, + pub purpose: String, + pub partner_did: String, + pub partner_key_uri: Option, +} + +// ── blind relay (ciphertext store-and-forward) ──────────────────────────────── + +/// Store an opaque ciphertext envelope for the recipient. The caller (`from_did`) +/// and `to_did` must be the session's two participants and the session open; the +/// blob is treated as opaque (the broker can't decrypt it). Flips the session to +/// `ACTIVE`. Returns the envelope id. +pub async fn post_envelope( + pool: &PgPool, + session_id: Uuid, + from_did: &str, + to_did: &str, + seq: i32, + blob: &[u8], +) -> Result { + let mut tx = pool.begin().await?; + let row: Option<(String, String, String)> = sqlx::query_as( + "SELECT r.initiator_did, r.partner_did, s.status \ + FROM exchange.exchange_session s JOIN exchange.exchange_request r ON r.request_uri = s.request_uri \ + WHERE s.session_id = $1 FOR UPDATE OF s", + ) + .bind(session_id) + .fetch_optional(&mut *tx) + .await?; + let Some((initiator, partner, status)) = row else { + return Err(DbError::Conflict("no such session".into())); + }; + if !matches!(status.as_str(), "ESTABLISHING" | "ACTIVE") { + return Err(DbError::Conflict(format!("session is {status}"))); + } + let parties = [initiator.as_str(), partner.as_str()]; + if from_did == to_did || !parties.contains(&from_did) || !parties.contains(&to_did) { + return Err(DbError::Conflict("not a session participant".into())); + } + let id: i64 = sqlx::query_scalar( + "INSERT INTO exchange.relay_envelope (session_id, from_did, to_did, seq, size_bytes, blob, expires_at) \ + VALUES ($1, $2, $3, $4, $5, $6, (SELECT expires_at FROM exchange.exchange_session WHERE session_id = $1)) \ + RETURNING id", + ) + .bind(session_id) + .bind(from_did) + .bind(to_did) + .bind(seq) + .bind(blob.len() as i32) + .bind(blob) + .fetch_one(&mut *tx) + .await?; + sqlx::query("UPDATE exchange.exchange_session SET status = 'ACTIVE' WHERE session_id = $1 AND status = 'ESTABLISHING'") + .bind(session_id) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(id) +} + +/// An undelivered ciphertext envelope (the recipient's pull). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct Envelope { + pub id: i64, + pub from_did: String, + pub seq: i32, + pub blob: Vec, +} + +/// Pull the undelivered envelopes addressed to `to_did` in a session, ordered by seq. +pub async fn pull_envelopes(pool: &PgPool, session_id: Uuid, to_did: &str) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT id, from_did, seq, blob FROM exchange.relay_envelope \ + WHERE session_id = $1 AND to_did = $2 AND delivered_at IS NULL ORDER BY seq", + ) + .bind(session_id) + .bind(to_did) + .fetch_all(pool) + .await?) +} + +/// Ack (delete) a delivered envelope — only its own recipient may ack it. +pub async fn ack_envelope(pool: &PgPool, envelope_id: i64, did: &str) -> Result { + let affected = sqlx::query("DELETE FROM exchange.relay_envelope WHERE id = $1 AND to_did = $2") + .bind(envelope_id) + .bind(did) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} + +/// TTL cleanup: drop expired envelopes and expired sessions (cascading their +/// envelopes). Returns (envelopes_dropped, sessions_dropped). +pub async fn expire(pool: &PgPool) -> Result<(u64, u64), DbError> { + let envelopes = sqlx::query("DELETE FROM exchange.relay_envelope WHERE expires_at IS NOT NULL AND expires_at < now()") + .execute(pool) + .await? + .rows_affected(); + let sessions = sqlx::query("DELETE FROM exchange.exchange_session WHERE expires_at IS NOT NULL AND expires_at < now()") + .execute(pool) + .await? + .rows_affected(); + Ok((envelopes, sessions)) +} + +/// An incoming request awaiting `did`'s consent — **symmetric-blind**: the initiator is +/// NOT revealed (no `initiator_did`), only an opaque handle + purpose + time. Identities +/// reveal to both sides only after mutual consent (see [`pending_for`]). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct IncomingRequest { + pub request_uri: String, + pub purpose: String, + pub created_at: chrono::DateTime, +} + +/// PENDING requests addressed to `did` that `did` has not yet acted on — the +/// counterpart-discovery path that closes the introduce→consent loop. Deliberately omits +/// the initiator DID (the recipient consents blind). +pub async fn incoming_for(pool: &PgPool, did: &str) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT r.request_uri, r.purpose, r.created_at \ + FROM exchange.exchange_request r \ + WHERE r.partner_did = $1 AND r.status = 'PENDING' \ + AND NOT EXISTS (SELECT 1 FROM exchange.exchange_consent c \ + WHERE c.request_uri = r.request_uri AND c.consenting_did = $1) \ + ORDER BY r.created_at", + ) + .bind(did) + .fetch_all(pool) + .await?) +} + +/// Sessions ready for `did` to start (CONSENTED + an open session), with the partner. +pub async fn pending_for(pool: &PgPool, did: &str) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT s.session_id, r.request_uri, r.purpose, \ + CASE WHEN r.initiator_did = $1 THEN r.partner_did ELSE r.initiator_did END AS partner_did, \ + pk.key_uri AS partner_key_uri \ + FROM exchange.exchange_session s \ + JOIN exchange.exchange_request r ON r.request_uri = s.request_uri \ + LEFT JOIN exchange.exchange_publickey pk \ + ON pk.did = CASE WHEN r.initiator_did = $1 THEN r.partner_did ELSE r.initiator_did END \ + WHERE (r.initiator_did = $1 OR r.partner_did = $1) AND s.status IN ('ESTABLISHING','ACTIVE') \ + ORDER BY s.created_at DESC", + ) + .bind(did) + .fetch_all(pool) + .await?) +} diff --git a/rust/crates/du-db/src/fed/analytics.rs b/rust/crates/du-db/src/fed/analytics.rs new file mode 100644 index 00000000..edd3769a --- /dev/null +++ b/rust/crates/du-db/src/fed/analytics.rs @@ -0,0 +1,214 @@ +//! Computed-analytics records: genotype summary stats, population/ancestry +//! breakdown, and donor-level haplogroup reconciliation. +//! +//! These are already anonymized computed summaries (no raw genotypes/reads), so +//! the mirror keeps extracted scalar columns for indexed reporting **plus** the +//! computed payload as JSONB (the consumer strips `files` before storing). +//! Ordered+idempotent upsert; deletes go through [`super::delete`]. + +use super::Common; +use crate::DbError; +use serde_json::Value; +use sqlx::PgPool; + +/// Genotype (chip/array) summary statistics. +pub struct Genotype { + pub common: Common, + pub biosample_ref: Option, + pub provider: Option, + pub test_type_code: Option, + pub chip_version: Option, + pub total_markers_called: Option, + pub total_markers_possible: Option, + pub no_call_rate: Option, + pub y_markers_called: Option, + pub mt_markers_called: Option, + pub autosomal_markers_called: Option, + pub het_rate: Option, + pub build_version: Option, + pub y_haplogroup: Option, + pub mt_haplogroup: Option, + pub population_breakdown_ref: Option, + /// Full record minus `files`. + pub record: Value, +} + +pub async fn upsert_genotype(pool: &PgPool, g: &Genotype) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.genotype \ + (did, rkey, at_uri, cid, biosample_ref, provider, test_type_code, chip_version, \ + total_markers_called, total_markers_possible, no_call_rate, y_markers_called, \ + mt_markers_called, autosomal_markers_called, het_rate, build_version, \ + y_haplogroup, mt_haplogroup, population_breakdown_ref, record, record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, biosample_ref = EXCLUDED.biosample_ref, \ + provider = EXCLUDED.provider, test_type_code = EXCLUDED.test_type_code, \ + chip_version = EXCLUDED.chip_version, total_markers_called = EXCLUDED.total_markers_called, \ + total_markers_possible = EXCLUDED.total_markers_possible, no_call_rate = EXCLUDED.no_call_rate, \ + y_markers_called = EXCLUDED.y_markers_called, mt_markers_called = EXCLUDED.mt_markers_called, \ + autosomal_markers_called = EXCLUDED.autosomal_markers_called, het_rate = EXCLUDED.het_rate, \ + build_version = EXCLUDED.build_version, y_haplogroup = EXCLUDED.y_haplogroup, \ + mt_haplogroup = EXCLUDED.mt_haplogroup, population_breakdown_ref = EXCLUDED.population_breakdown_ref, \ + record = EXCLUDED.record, record_created_at = EXCLUDED.record_created_at, \ + time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.genotype.time_us", + ) + .bind(&g.common.did) + .bind(&g.common.rkey) + .bind(&g.common.at_uri) + .bind(&g.common.cid) + .bind(&g.biosample_ref) + .bind(&g.provider) + .bind(&g.test_type_code) + .bind(&g.chip_version) + .bind(g.total_markers_called) + .bind(g.total_markers_possible) + .bind(g.no_call_rate) + .bind(g.y_markers_called) + .bind(g.mt_markers_called) + .bind(g.autosomal_markers_called) + .bind(g.het_rate) + .bind(&g.build_version) + .bind(&g.y_haplogroup) + .bind(&g.mt_haplogroup) + .bind(&g.population_breakdown_ref) + .bind(&g.record) + .bind(g.common.record_created_at) + .bind(g.common.time_us) + .execute(pool) + .await?; + Ok(()) +} + +/// Population/ancestry breakdown (PCA projection → sub-continental percentages). +pub struct PopulationBreakdown { + pub common: Common, + pub biosample_ref: Option, + pub analysis_method: Option, + pub panel_type: Option, + pub reference_populations: Option, + pub snps_analyzed: Option, + pub snps_with_genotype: Option, + pub snps_missing: Option, + pub confidence_level: Option, + pub components: Value, + pub super_population_summary: Value, + pub pca_coordinates: Option, +} + +pub async fn upsert_population_breakdown(pool: &PgPool, p: &PopulationBreakdown) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.population_breakdown \ + (did, rkey, at_uri, cid, biosample_ref, analysis_method, panel_type, reference_populations, \ + snps_analyzed, snps_with_genotype, snps_missing, confidence_level, components, \ + super_population_summary, pca_coordinates, record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, biosample_ref = EXCLUDED.biosample_ref, \ + analysis_method = EXCLUDED.analysis_method, panel_type = EXCLUDED.panel_type, \ + reference_populations = EXCLUDED.reference_populations, snps_analyzed = EXCLUDED.snps_analyzed, \ + snps_with_genotype = EXCLUDED.snps_with_genotype, snps_missing = EXCLUDED.snps_missing, \ + confidence_level = EXCLUDED.confidence_level, components = EXCLUDED.components, \ + super_population_summary = EXCLUDED.super_population_summary, pca_coordinates = EXCLUDED.pca_coordinates, \ + record_created_at = EXCLUDED.record_created_at, time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.population_breakdown.time_us", + ) + .bind(&p.common.did) + .bind(&p.common.rkey) + .bind(&p.common.at_uri) + .bind(&p.common.cid) + .bind(&p.biosample_ref) + .bind(&p.analysis_method) + .bind(&p.panel_type) + .bind(&p.reference_populations) + .bind(p.snps_analyzed) + .bind(p.snps_with_genotype) + .bind(p.snps_missing) + .bind(p.confidence_level) + .bind(&p.components) + .bind(&p.super_population_summary) + .bind(&p.pca_coordinates) + .bind(p.common.record_created_at) + .bind(p.common.time_us) + .execute(pool) + .await?; + Ok(()) +} + +/// Donor-level multi-run haplogroup reconciliation (consensus call). +pub struct Reconciliation { + pub common: Common, + pub specimen_donor_ref: Option, + pub dna_type: Option, + pub compatibility_level: Option, + pub consensus_haplogroup: Option, + pub confidence: Option, + pub branch_compatibility_score: Option, + pub snp_concordance: Option, + pub run_count: Option, + pub record: Value, +} + +pub async fn upsert_reconciliation(pool: &PgPool, r: &Reconciliation) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.haplogroup_reconciliation \ + (did, rkey, at_uri, cid, specimen_donor_ref, dna_type, compatibility_level, \ + consensus_haplogroup, confidence, branch_compatibility_score, snp_concordance, \ + run_count, record, record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, specimen_donor_ref = EXCLUDED.specimen_donor_ref, \ + dna_type = EXCLUDED.dna_type, compatibility_level = EXCLUDED.compatibility_level, \ + consensus_haplogroup = EXCLUDED.consensus_haplogroup, confidence = EXCLUDED.confidence, \ + branch_compatibility_score = EXCLUDED.branch_compatibility_score, \ + snp_concordance = EXCLUDED.snp_concordance, run_count = EXCLUDED.run_count, \ + record = EXCLUDED.record, record_created_at = EXCLUDED.record_created_at, \ + time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.haplogroup_reconciliation.time_us", + ) + .bind(&r.common.did) + .bind(&r.common.rkey) + .bind(&r.common.at_uri) + .bind(&r.common.cid) + .bind(&r.specimen_donor_ref) + .bind(&r.dna_type) + .bind(&r.compatibility_level) + .bind(&r.consensus_haplogroup) + .bind(r.confidence) + .bind(r.branch_compatibility_score) + .bind(r.snp_concordance) + .bind(r.run_count) + .bind(&r.record) + .bind(r.common.record_created_at) + .bind(r.common.time_us) + .execute(pool) + .await?; + Ok(()) +} + +/// Population-level ancestry report: average super-population percentage across +/// all mirrored breakdowns, with a contributing-sample count. Aggregates over the +/// `super_population_summary` JSONB with a lateral unnest — query-time SQL, no +/// per-PDS fetch. +#[derive(Debug, sqlx::FromRow)] +pub struct SuperPopulationShare { + pub super_population: Option, + pub samples: i64, + pub avg_percentage: Option, +} + +pub async fn super_population_distribution(pool: &PgPool) -> Result, DbError> { + let rows = sqlx::query_as::<_, SuperPopulationShare>( + "SELECT sp->>'superPopulation' AS super_population, \ + count(*) AS samples, \ + avg((sp->>'percentage')::double precision) AS avg_percentage \ + FROM fed.population_breakdown pb, \ + jsonb_array_elements(pb.super_population_summary) AS sp \ + GROUP BY sp->>'superPopulation' \ + ORDER BY avg_percentage DESC NULLS LAST", + ) + .fetch_all(pool) + .await?; + Ok(rows) +} diff --git a/rust/crates/du-db/src/fed/core.rs b/rust/crates/du-db/src/fed/core.rs new file mode 100644 index 00000000..5f952151 --- /dev/null +++ b/rust/crates/du-db/src/fed/core.rs @@ -0,0 +1,203 @@ +//! Core container records: biosample, sequencerun, project, workspace. +//! +//! These carry potential donor PII (donorIdentifier / sampleAccession / +//! description / file paths), so the mirror keeps **only typed, non-identifying +//! columns** — no raw record JSONB — and the consumer never populates the PII +//! fields. Ordered+idempotent upsert (overwrite only on a `time_us` >= the stored +//! one); deletes go through [`super::delete`]. + +use super::Common; +use crate::DbError; +use sqlx::PgPool; + +/// A population count of one consensus haplogroup across mirrored biosamples. +#[derive(Debug, sqlx::FromRow)] +pub struct HaplogroupCount { + pub dna_type: String, + pub haplogroup: String, + pub samples: i64, +} + +/// Distribution of Y-DNA and mtDNA haplogroup calls across all mirrored +/// biosamples, most-common first — a population report over `fed.biosample` +/// computed with query-time SQL. +pub async fn haplogroup_distribution(pool: &PgPool) -> Result, DbError> { + let rows = sqlx::query_as::<_, HaplogroupCount>( + "SELECT dna_type, haplogroup, count(*) AS samples FROM ( \ + SELECT 'Y_DNA' AS dna_type, y_haplogroup AS haplogroup \ + FROM fed.biosample WHERE y_haplogroup IS NOT NULL \ + UNION ALL \ + SELECT 'MT_DNA', mt_haplogroup FROM fed.biosample WHERE mt_haplogroup IS NOT NULL \ + ) t \ + GROUP BY dna_type, haplogroup \ + ORDER BY samples DESC, dna_type, haplogroup", + ) + .fetch_all(pool) + .await?; + Ok(rows) +} + +/// Biosample — pseudonymous DID, sex, Y/mt haplogroup calls, sequencing center, +/// and join refs/counts. Donor identifiers and free-text are dropped on ingest. +pub struct Biosample { + pub common: Common, + pub sex: Option, + pub y_haplogroup: Option, + pub mt_haplogroup: Option, + pub center_name: Option, + pub population_breakdown_ref: Option, + pub str_profile_ref: Option, + pub sequence_run_count: i32, + pub genotype_count: i32, +} + +pub async fn upsert_biosample(pool: &PgPool, b: &Biosample) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.biosample \ + (did, rkey, at_uri, cid, sex, y_haplogroup, mt_haplogroup, center_name, \ + population_breakdown_ref, str_profile_ref, sequence_run_count, genotype_count, \ + record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, sex = EXCLUDED.sex, \ + y_haplogroup = EXCLUDED.y_haplogroup, mt_haplogroup = EXCLUDED.mt_haplogroup, \ + center_name = EXCLUDED.center_name, \ + population_breakdown_ref = EXCLUDED.population_breakdown_ref, \ + str_profile_ref = EXCLUDED.str_profile_ref, \ + sequence_run_count = EXCLUDED.sequence_run_count, genotype_count = EXCLUDED.genotype_count, \ + record_created_at = EXCLUDED.record_created_at, time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.biosample.time_us", + ) + .bind(&b.common.did) + .bind(&b.common.rkey) + .bind(&b.common.at_uri) + .bind(&b.common.cid) + .bind(&b.sex) + .bind(&b.y_haplogroup) + .bind(&b.mt_haplogroup) + .bind(&b.center_name) + .bind(&b.population_breakdown_ref) + .bind(&b.str_profile_ref) + .bind(b.sequence_run_count) + .bind(b.genotype_count) + .bind(b.common.record_created_at) + .bind(b.common.time_us) + .execute(pool) + .await?; + Ok(()) +} + +/// Sequence run — platform/instrument/test characterization (no files, no PII). +pub struct SequenceRun { + pub common: Common, + pub biosample_ref: Option, + pub platform_name: Option, + pub instrument_model: Option, + pub instrument_id: Option, + pub test_type: Option, + pub library_layout: Option, + pub total_reads: Option, + pub read_length: Option, + pub mean_insert_size: Option, +} + +pub async fn upsert_sequencerun(pool: &PgPool, s: &SequenceRun) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.sequencerun \ + (did, rkey, at_uri, cid, biosample_ref, platform_name, instrument_model, \ + instrument_id, test_type, library_layout, total_reads, read_length, \ + mean_insert_size, record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, biosample_ref = EXCLUDED.biosample_ref, \ + platform_name = EXCLUDED.platform_name, instrument_model = EXCLUDED.instrument_model, \ + instrument_id = EXCLUDED.instrument_id, test_type = EXCLUDED.test_type, \ + library_layout = EXCLUDED.library_layout, total_reads = EXCLUDED.total_reads, \ + read_length = EXCLUDED.read_length, mean_insert_size = EXCLUDED.mean_insert_size, \ + record_created_at = EXCLUDED.record_created_at, time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.sequencerun.time_us", + ) + .bind(&s.common.did) + .bind(&s.common.rkey) + .bind(&s.common.at_uri) + .bind(&s.common.cid) + .bind(&s.biosample_ref) + .bind(&s.platform_name) + .bind(&s.instrument_model) + .bind(&s.instrument_id) + .bind(&s.test_type) + .bind(&s.library_layout) + .bind(s.total_reads) + .bind(s.read_length) + .bind(s.mean_insert_size) + .bind(s.common.record_created_at) + .bind(s.common.time_us) + .execute(pool) + .await?; + Ok(()) +} + +/// Project — surname/research project grouping (project-level, not donor PII). +pub struct Project { + pub common: Common, + pub project_name: Option, + pub administrator_did: Option, + pub member_count: i32, +} + +pub async fn upsert_project(pool: &PgPool, p: &Project) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.project \ + (did, rkey, at_uri, cid, project_name, administrator_did, member_count, \ + record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, project_name = EXCLUDED.project_name, \ + administrator_did = EXCLUDED.administrator_did, member_count = EXCLUDED.member_count, \ + record_created_at = EXCLUDED.record_created_at, time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.project.time_us", + ) + .bind(&p.common.did) + .bind(&p.common.rkey) + .bind(&p.common.at_uri) + .bind(&p.common.cid) + .bind(&p.project_name) + .bind(&p.administrator_did) + .bind(p.member_count) + .bind(p.common.record_created_at) + .bind(p.common.time_us) + .execute(pool) + .await?; + Ok(()) +} + +/// Workspace — researcher container; counts only. +pub struct Workspace { + pub common: Common, + pub sample_count: i32, + pub project_count: i32, +} + +pub async fn upsert_workspace(pool: &PgPool, w: &Workspace) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.workspace \ + (did, rkey, at_uri, cid, sample_count, project_count, record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, sample_count = EXCLUDED.sample_count, \ + project_count = EXCLUDED.project_count, record_created_at = EXCLUDED.record_created_at, \ + time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.workspace.time_us", + ) + .bind(&w.common.did) + .bind(&w.common.rkey) + .bind(&w.common.at_uri) + .bind(&w.common.cid) + .bind(w.sample_count) + .bind(w.project_count) + .bind(w.common.record_created_at) + .bind(w.common.time_us) + .execute(pool) + .await?; + Ok(()) +} diff --git a/rust/crates/du-db/src/fed/coverage.rs b/rust/crates/du-db/src/fed/coverage.rs new file mode 100644 index 00000000..de1114ec --- /dev/null +++ b/rust/crates/du-db/src/fed/coverage.rs @@ -0,0 +1,98 @@ +//! Alignment coverage summaries (`com.decodingus.atmosphere.alignment`) — the +//! first mirrored collection (migration 0011, `fed.coverage_summary`). QC metrics +//! only, never raw reads. See [`super`] for the shared cursor/delete. + +use crate::DbError; +use chrono::{DateTime, Utc}; +use serde_json::Value; +use sqlx::PgPool; + +/// One published alignment summary record. Scalars are extracted from +/// `metrics` (alignmentMetrics) for indexed aggregation; `metrics` keeps the +/// authoritative copy (incl. per-contig stats). +pub struct CoverageRecord { + pub did: String, + pub collection: String, + pub rkey: String, + pub at_uri: String, + pub cid: Option, + pub biosample_ref: Option, + pub sequence_run_ref: Option, + pub reference_build: Option, + pub aligner: Option, + pub mean_coverage: Option, + pub median_coverage: Option, + pub pct_10x: Option, + pub pct_20x: Option, + pub pct_30x: Option, + pub metrics: Value, + pub record_created_at: Option>, + pub time_us: i64, +} + +/// Upsert a mirrored summary record. Idempotent and ordered: a row is only +/// overwritten by an event with a `time_us` at least as new, so replays after a +/// reconnect and out-of-order deliveries can't resurrect stale state. +pub async fn upsert(pool: &PgPool, r: &CoverageRecord) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.coverage_summary \ + (did, collection, rkey, at_uri, cid, biosample_ref, sequence_run_ref, \ + reference_build, aligner, mean_coverage, median_coverage, \ + pct_10x, pct_20x, pct_30x, metrics, record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17) \ + ON CONFLICT (did, collection, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, \ + biosample_ref = EXCLUDED.biosample_ref, sequence_run_ref = EXCLUDED.sequence_run_ref, \ + reference_build = EXCLUDED.reference_build, aligner = EXCLUDED.aligner, \ + mean_coverage = EXCLUDED.mean_coverage, median_coverage = EXCLUDED.median_coverage, \ + pct_10x = EXCLUDED.pct_10x, pct_20x = EXCLUDED.pct_20x, pct_30x = EXCLUDED.pct_30x, \ + metrics = EXCLUDED.metrics, record_created_at = EXCLUDED.record_created_at, \ + time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.coverage_summary.time_us", + ) + .bind(&r.did) + .bind(&r.collection) + .bind(&r.rkey) + .bind(&r.at_uri) + .bind(&r.cid) + .bind(&r.biosample_ref) + .bind(&r.sequence_run_ref) + .bind(&r.reference_build) + .bind(&r.aligner) + .bind(r.mean_coverage) + .bind(r.median_coverage) + .bind(r.pct_10x) + .bind(r.pct_20x) + .bind(r.pct_30x) + .bind(&r.metrics) + .bind(r.record_created_at) + .bind(r.time_us) + .execute(pool) + .await?; + Ok(()) +} + +/// A population coverage aggregate over the mirror, grouped by reference build — +/// the cheap query-time path the mirror exists to enable. +#[derive(Debug, sqlx::FromRow)] +pub struct BuildCoverage { + pub reference_build: Option, + pub samples: i64, + pub mean_coverage: Option, + pub mean_pct_30x: Option, +} + +/// Aggregate mirrored summaries by reference build (sample count + averaged +/// depth/30x), most-sampled build first. +pub async fn aggregate_by_build(pool: &PgPool) -> Result, DbError> { + let rows = sqlx::query_as::<_, BuildCoverage>( + "SELECT reference_build, count(*) AS samples, \ + avg(mean_coverage) AS mean_coverage, avg(pct_30x) AS mean_pct_30x \ + FROM fed.coverage_summary \ + GROUP BY reference_build \ + ORDER BY samples DESC", + ) + .fetch_all(pool) + .await?; + Ok(rows) +} diff --git a/rust/crates/du-db/src/fed/device_key.rs b/rust/crates/du-db/src/fed/device_key.rs new file mode 100644 index 00000000..07a15ad8 --- /dev/null +++ b/rust/crates/du-db/src/fed/device_key.rs @@ -0,0 +1,45 @@ +//! Mirrored device keys (`com.decodingus.atmosphere.deviceKey`). Each client publishes its +//! Ed25519 device PUBLIC key (as a `did:key`) to the user's own repo; this is the registry +//! `crate::sig::verify_signed` checks signed Edge calls against. A DID may hold several +//! (one per device); revocation is a record delete (`super::delete`). See [`super`] for the +//! shared cursor/delete. PII-free — a DID + a public key + pointers only. + +use super::Common; +use crate::DbError; +use sqlx::PgPool; + +/// A mirrored device key. `public_key` is a `did:key:z…` string (verified directly). +pub struct DeviceKey { + pub common: Common, + pub public_key: String, +} + +pub async fn upsert(pool: &PgPool, d: &DeviceKey) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.device_key \ + (did, rkey, at_uri, cid, public_key, record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, public_key = EXCLUDED.public_key, \ + record_created_at = EXCLUDED.record_created_at, time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.device_key.time_us", + ) + .bind(&d.common.did) + .bind(&d.common.rkey) + .bind(&d.common.at_uri) + .bind(&d.common.cid) + .bind(&d.public_key) + .bind(d.common.record_created_at) + .bind(d.common.time_us) + .execute(pool) + .await?; + Ok(()) +} + +/// The `did:key` strings registered for a DID — the verifier's lookup (any may match). +pub async fn keys_for(pool: &PgPool, did: &str) -> Result, DbError> { + Ok(sqlx::query_scalar("SELECT public_key FROM fed.device_key WHERE did = $1") + .bind(did) + .fetch_all(pool) + .await?) +} diff --git a/rust/crates/du-db/src/fed/instrument_observation.rs b/rust/crates/du-db/src/fed/instrument_observation.rs new file mode 100644 index 00000000..79d337ed --- /dev/null +++ b/rust/crates/du-db/src/fed/instrument_observation.rs @@ -0,0 +1,59 @@ +//! Mirrored citizen instrument→lab observations +//! (`com.decodingus.atmosphere.instrumentObservation`). Each is one citizen's +//! explicit claim that an instrument id belongs to a lab, with a confidence level +//! (KNOWN/INFERRED/GUESSED). [`crate::sequencer::recompute_consensus`] folds these +//! into the proposal set alongside the implicit `fed.sequencerun.center_name` +//! claims. See [`super`] for the shared cursor/delete. + +use super::Common; +use crate::DbError; +use chrono::{DateTime, NaiveDate, Utc}; +use sqlx::PgPool; + +/// A mirrored instrument observation record. +pub struct InstrumentObservation { + pub common: Common, + pub instrument_id: Option, + pub lab_name: Option, + pub biosample_ref: Option, + pub platform: Option, + pub instrument_model: Option, + pub flowcell_id: Option, + pub run_date: Option, + pub confidence: Option, + pub observed_at: Option>, +} + +pub async fn upsert(pool: &PgPool, o: &InstrumentObservation) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.instrument_observation \ + (did, rkey, at_uri, cid, instrument_id, lab_name, biosample_ref, platform, \ + instrument_model, flowcell_id, run_date, confidence, observed_at, record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, instrument_id = EXCLUDED.instrument_id, \ + lab_name = EXCLUDED.lab_name, biosample_ref = EXCLUDED.biosample_ref, platform = EXCLUDED.platform, \ + instrument_model = EXCLUDED.instrument_model, flowcell_id = EXCLUDED.flowcell_id, \ + run_date = EXCLUDED.run_date, confidence = EXCLUDED.confidence, observed_at = EXCLUDED.observed_at, \ + record_created_at = EXCLUDED.record_created_at, time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.instrument_observation.time_us", + ) + .bind(&o.common.did) + .bind(&o.common.rkey) + .bind(&o.common.at_uri) + .bind(&o.common.cid) + .bind(&o.instrument_id) + .bind(&o.lab_name) + .bind(&o.biosample_ref) + .bind(&o.platform) + .bind(&o.instrument_model) + .bind(&o.flowcell_id) + .bind(o.run_date) + .bind(&o.confidence) + .bind(o.observed_at) + .bind(o.common.record_created_at) + .bind(o.common.time_us) + .execute(pool) + .await?; + Ok(()) +} diff --git a/rust/crates/du-db/src/fed/mod.rs b/rust/crates/du-db/src/fed/mod.rs new file mode 100644 index 00000000..b905feee --- /dev/null +++ b/rust/crates/du-db/src/fed/mod.rs @@ -0,0 +1,127 @@ +//! Federated reporting mirror (atmosphere "Record Status Summary" — the legacy +//! `✅ AppView Complete` ingest set). +//! +//! The AppView does NOT analyze — it **aggregates and reports**. Navigator +//! computes anonymized per-sample SUMMARIES at the edge and publishes them as +//! public PDS records; a single Jetstream consumer (`du-jobs`) mirrors each +//! supported collection into a dedicated `fed.*` reporting table here, and +//! reports aggregate with local SQL. This is the summary ingestion the atmosphere +//! v2.1 "scope reduction" wrongly dropped — NOT the raw-data network mirror +//! (summaries only, no raw reads/files, donor PII never stored). +//! +//! One module/table per collection; the consumer drives them and shares the +//! cursor defined here. Storage structs are plain data — the consumer (du-jobs) +//! owns record-shape extraction, keeping this layer pure storage. + +use crate::DbError; +use chrono::{DateTime, Utc}; +use sqlx::PgPool; + +pub mod analytics; +pub mod core; +pub mod coverage; +pub mod device_key; +pub mod instrument_observation; +pub mod private_variant; +pub mod str_profile; + +// Collection NSIDs the AppView ingests for reporting. The records Navigator computes + +// publishes are defined in the shared `du-domain::fed` module, so the NSIDs for those +// collections are sourced from there — publisher and consumer cannot drift. (project / +// workspace / genotype / strProfile have no shared record contract yet, so they stay +// local until one lands.) +pub const NS_ALIGNMENT: &str = du_domain::fed::NS_ALIGNMENT; +pub const NS_BIOSAMPLE: &str = du_domain::fed::NS_BIOSAMPLE; +pub const NS_SEQUENCERUN: &str = du_domain::fed::NS_SEQUENCERUN; +pub const NS_PROJECT: &str = "com.decodingus.atmosphere.project"; +pub const NS_WORKSPACE: &str = "com.decodingus.atmosphere.workspace"; +pub const NS_GENOTYPE: &str = "com.decodingus.atmosphere.genotype"; +pub const NS_POPULATION_BREAKDOWN: &str = du_domain::fed::NS_POPULATION_BREAKDOWN; +pub const NS_HAPLOGROUP_RECONCILIATION: &str = du_domain::fed::NS_HAPLOGROUP_RECONCILIATION; +pub const NS_STR_PROFILE: &str = "com.decodingus.atmosphere.strProfile"; +pub const NS_INSTRUMENT_OBSERVATION: &str = "com.decodingus.atmosphere.instrumentObservation"; +pub const NS_PRIVATE_VARIANT: &str = "com.decodingus.atmosphere.privateVariant"; +pub const NS_DEVICE_KEY: &str = "com.decodingus.atmosphere.deviceKey"; + +/// Every collection mirrored for reporting (the consumer's `wantedCollections`). +pub const INGEST_COLLECTIONS: &[&str] = &[ + NS_ALIGNMENT, + NS_BIOSAMPLE, + NS_SEQUENCERUN, + NS_PROJECT, + NS_WORKSPACE, + NS_GENOTYPE, + NS_POPULATION_BREAKDOWN, + NS_HAPLOGROUP_RECONCILIATION, + NS_STR_PROFILE, + NS_INSTRUMENT_OBSERVATION, + NS_PRIVATE_VARIANT, + NS_DEVICE_KEY, +]; + +/// The `fed.*` reporting table backing a collection, or `None` if unsupported. +fn table_for(collection: &str) -> Option<&'static str> { + Some(match collection { + NS_ALIGNMENT => "fed.coverage_summary", + NS_BIOSAMPLE => "fed.biosample", + NS_SEQUENCERUN => "fed.sequencerun", + NS_PROJECT => "fed.project", + NS_WORKSPACE => "fed.workspace", + NS_GENOTYPE => "fed.genotype", + NS_POPULATION_BREAKDOWN => "fed.population_breakdown", + NS_HAPLOGROUP_RECONCILIATION => "fed.haplogroup_reconciliation", + NS_STR_PROFILE => "fed.str_profile", + NS_INSTRUMENT_OBSERVATION => "fed.instrument_observation", + NS_PRIVATE_VARIANT => "fed.private_variant", + NS_DEVICE_KEY => "fed.device_key", + _ => return None, + }) +} + +/// Pointer/provenance fields common to every mirrored record. +#[derive(Clone)] +pub struct Common { + pub did: String, + pub rkey: String, + pub at_uri: String, + pub cid: Option, + pub record_created_at: Option>, + /// Jetstream cursor (`time_us`) of the event that produced this row. + pub time_us: i64, +} + +/// Remove a mirrored record (its source was deleted on the PDS). The table is +/// resolved from a fixed NSID map — never interpolated from untrusted input. +/// Returns `false` for an unsupported collection. +pub async fn delete(pool: &PgPool, collection: &str, did: &str, rkey: &str) -> Result { + let Some(table) = table_for(collection) else { return Ok(false) }; + let sql = format!("DELETE FROM {table} WHERE did = $1 AND rkey = $2"); + sqlx::query(&sql).bind(did).bind(rkey).execute(pool).await?; + Ok(true) +} + +/// Last persisted Jetstream cursor (`time_us`), if the consumer has run before. +pub async fn load_cursor(pool: &PgPool) -> Result, DbError> { + let cursor: Option = + sqlx::query_scalar("SELECT time_us FROM fed.jetstream_cursor WHERE id") + .fetch_optional(pool) + .await?; + Ok(cursor) +} + +/// Persist the Jetstream cursor (singleton row) so the consumer resumes here. +pub async fn save_cursor(pool: &PgPool, time_us: i64) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.jetstream_cursor (id, time_us) VALUES (true, $1) \ + ON CONFLICT (id) DO UPDATE SET time_us = EXCLUDED.time_us, updated_at = now()", + ) + .bind(time_us) + .execute(pool) + .await?; + Ok(()) +} + +/// Shared helper for the `Utc` conversion the consumer needs when building rows. +pub fn to_utc(s: &str) -> Option> { + DateTime::parse_from_rfc3339(s).ok().map(|dt| dt.with_timezone(&Utc)) +} diff --git a/rust/crates/du-db/src/fed/private_variant.rs b/rust/crates/du-db/src/fed/private_variant.rs new file mode 100644 index 00000000..6fdf9e5c --- /dev/null +++ b/rust/crates/du-db/src/fed/private_variant.rs @@ -0,0 +1,51 @@ +//! Mirrored citizen private-variant sets +//! (`com.decodingus.atmosphere.privateVariant`). Each record is one biosample's +//! private variants (mutations beyond its assigned terminal haplogroup) for one +//! DNA arm. [`crate::discovery::recompute_consensus`] materializes these into +//! `tree.biosample_private_variant` and pools them into proposed branches by +//! variant-set Jaccard similarity. See [`super`] for the shared cursor/delete. + +use super::Common; +use crate::DbError; +use serde_json::Value; +use sqlx::PgPool; + +/// A mirrored private-variant record. `variants` is the lexicon's variant array +/// verbatim (`[{name?, contig, position, ancestral, derived, rsId?}]`). +pub struct PrivateVariant { + pub common: Common, + pub biosample_ref: Option, + pub sequence_run_ref: Option, + pub dna_type: Option, + pub terminal_haplogroup: Option, + pub variants: Value, +} + +pub async fn upsert(pool: &PgPool, p: &PrivateVariant) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.private_variant \ + (did, rkey, at_uri, cid, biosample_ref, sequence_run_ref, dna_type, \ + terminal_haplogroup, variants, record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, biosample_ref = EXCLUDED.biosample_ref, \ + sequence_run_ref = EXCLUDED.sequence_run_ref, dna_type = EXCLUDED.dna_type, \ + terminal_haplogroup = EXCLUDED.terminal_haplogroup, variants = EXCLUDED.variants, \ + record_created_at = EXCLUDED.record_created_at, time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.private_variant.time_us", + ) + .bind(&p.common.did) + .bind(&p.common.rkey) + .bind(&p.common.at_uri) + .bind(&p.common.cid) + .bind(&p.biosample_ref) + .bind(&p.sequence_run_ref) + .bind(&p.dna_type) + .bind(&p.terminal_haplogroup) + .bind(&p.variants) + .bind(p.common.record_created_at) + .bind(p.common.time_us) + .execute(pool) + .await?; + Ok(()) +} diff --git a/rust/crates/du-db/src/fed/str_profile.rs b/rust/crates/du-db/src/fed/str_profile.rs new file mode 100644 index 00000000..ef6e9560 --- /dev/null +++ b/rust/crates/du-db/src/fed/str_profile.rs @@ -0,0 +1,52 @@ +//! Mirrored Y-STR profiles (`com.decodingus.atmosphere.strProfile`). Markers are +//! stored lossless as JSONB; the per-branch modal aggregation lives in +//! [`crate::ystr`]. See [`super`] for the shared cursor/delete. + +use super::Common; +use crate::DbError; +use serde_json::Value; +use sqlx::PgPool; + +/// A mirrored STR profile. `markers` is the lexicon's `strMarkerValue[]` verbatim. +pub struct StrProfile { + pub common: Common, + pub biosample_ref: Option, + pub sequence_run_ref: Option, + pub source: Option, + pub imported_from: Option, + pub derivation_method: Option, + pub total_markers: Option, + pub markers: Value, +} + +pub async fn upsert(pool: &PgPool, p: &StrProfile) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO fed.str_profile \ + (did, rkey, at_uri, cid, biosample_ref, sequence_run_ref, source, imported_from, \ + derivation_method, total_markers, markers, record_created_at, time_us) \ + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13) \ + ON CONFLICT (did, rkey) DO UPDATE SET \ + at_uri = EXCLUDED.at_uri, cid = EXCLUDED.cid, biosample_ref = EXCLUDED.biosample_ref, \ + sequence_run_ref = EXCLUDED.sequence_run_ref, source = EXCLUDED.source, \ + imported_from = EXCLUDED.imported_from, derivation_method = EXCLUDED.derivation_method, \ + total_markers = EXCLUDED.total_markers, markers = EXCLUDED.markers, \ + record_created_at = EXCLUDED.record_created_at, time_us = EXCLUDED.time_us, indexed_at = now() \ + WHERE EXCLUDED.time_us >= fed.str_profile.time_us", + ) + .bind(&p.common.did) + .bind(&p.common.rkey) + .bind(&p.common.at_uri) + .bind(&p.common.cid) + .bind(&p.biosample_ref) + .bind(&p.sequence_run_ref) + .bind(&p.source) + .bind(&p.imported_from) + .bind(&p.derivation_method) + .bind(p.total_markers) + .bind(&p.markers) + .bind(p.common.record_created_at) + .bind(p.common.time_us) + .execute(pool) + .await?; + Ok(()) +} diff --git a/rust/crates/du-db/src/genome_region.rs b/rust/crates/du-db/src/genome_region.rs new file mode 100644 index 00000000..f28ef79c --- /dev/null +++ b/rust/crates/du-db/src/genome_region.rs @@ -0,0 +1,192 @@ +//! Queries for `core.genome_region` (curator-managed multi-build regions). + +use crate::{DbError, Page}; +use du_domain::genome_region::GenomeRegion; +use sqlx::PgPool; + +#[derive(sqlx::FromRow)] +struct RegionRow { + id: i64, + region_type: String, + name: String, + coordinates: serde_json::Value, + properties: serde_json::Value, +} + +impl From for GenomeRegion { + fn from(r: RegionRow) -> Self { + GenomeRegion { + id: r.id, + region_type: r.region_type, + name: r.name, + coordinates: r.coordinates, + properties: r.properties, + } + } +} + +const SELECT: &str = "SELECT id, region_type, name, coordinates, properties FROM core.genome_region"; + +/// Distinct reference builds present across all regions' `coordinates` keys +/// (e.g. ["GRCh37","GRCh38","hs1"]). +pub async fn distinct_builds(pool: &PgPool) -> Result, DbError> { + Ok(sqlx::query_scalar( + "SELECT DISTINCT jsonb_object_keys(coordinates) AS build FROM core.genome_region \ + WHERE coordinates <> '{}'::jsonb ORDER BY build", + ) + .fetch_all(pool) + .await?) +} + +/// Regions that carry coordinates for the given build. +pub async fn for_build(pool: &PgPool, build: &str) -> Result, DbError> { + let rows: Vec = sqlx::query_as(&format!( + "{SELECT} WHERE jsonb_exists(coordinates, $1) ORDER BY region_type, name" + )) + .bind(build) + .fetch_all(pool) + .await?; + Ok(rows.into_iter().map(Into::into).collect()) +} + +pub async fn get_by_id(pool: &PgPool, id: i64) -> Result, DbError> { + let row: Option = sqlx::query_as(&format!("{SELECT} WHERE id = $1")) + .bind(id) + .fetch_optional(pool) + .await?; + Ok(row.map(Into::into)) +} + +/// Paginated list, optionally filtered by name/type substring and region type. +pub async fn list_paginated( + pool: &PgPool, + query: Option<&str>, + region_type: Option<&str>, + page: i64, + page_size: i64, +) -> Result, DbError> { + let offset = Page::<()>::offset(page, page_size); + let limit = page_size.clamp(1, 200); + let like = query.map(str::trim).filter(|q| !q.is_empty()).map(|q| format!("%{q}%")); + let rtype = region_type.map(str::trim).filter(|q| !q.is_empty()).map(str::to_string); + + let where_sql = "WHERE ($1::text IS NULL OR name ILIKE $1 OR region_type ILIKE $1) \ + AND ($2::text IS NULL OR region_type = $2)"; + + let total: i64 = sqlx::query_scalar(&format!("SELECT count(*) FROM core.genome_region {where_sql}")) + .bind(&like) + .bind(&rtype) + .fetch_one(pool) + .await?; + let rows: Vec = + sqlx::query_as(&format!("{SELECT} {where_sql} ORDER BY region_type, name LIMIT $3 OFFSET $4")) + .bind(&like) + .bind(&rtype) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + + Ok(Page { + items: rows.into_iter().map(Into::into).collect(), + total, + page: page.max(1), + page_size: limit, + }) +} + +pub async fn create( + pool: &PgPool, + region_type: &str, + name: &str, + coordinates: &serde_json::Value, + properties: &serde_json::Value, +) -> Result { + let id: i64 = sqlx::query_scalar( + "INSERT INTO core.genome_region (region_type, name, coordinates, properties) \ + VALUES ($1, $2, $3, $4) RETURNING id", + ) + .bind(region_type) + .bind(name) + .bind(coordinates) + .bind(properties) + .fetch_one(pool) + .await?; + Ok(id) +} + +/// Insert or update a region keyed on its unique `(region_type, name)`, +/// returning `true` when a new row was inserted and `false` when an existing +/// one was updated. Used by the Y-region reference ingest so re-runs are +/// idempotent (see `du_jobs::yregions`). +pub async fn upsert_by_key( + pool: &PgPool, + region_type: &str, + name: &str, + coordinates: &serde_json::Value, + properties: &serde_json::Value, +) -> Result { + let inserted: bool = sqlx::query_scalar( + "INSERT INTO core.genome_region (region_type, name, coordinates, properties) \ + VALUES ($1, $2, $3, $4) \ + ON CONFLICT (region_type, name) DO UPDATE \ + SET coordinates = EXCLUDED.coordinates, properties = EXCLUDED.properties, updated_at = now() \ + RETURNING (xmax = 0)", + ) + .bind(region_type) + .bind(name) + .bind(coordinates) + .bind(properties) + .fetch_one(pool) + .await?; + Ok(inserted) +} + +/// Delete reference rows for `source` whose `name` is not in `keep`. Prunes +/// orphans left when a source's regions change — e.g. a versioned BED bump that +/// shifts coordinates mints new locus-qualified names, leaving the old rows +/// behind. Returns the number removed. Paired with [`upsert_by_key`] this gives +/// the Y-region load full-snapshot (sync) semantics. +pub async fn prune_source_orphans(pool: &PgPool, source: &str, keep: &[String]) -> Result { + let removed = sqlx::query( + "DELETE FROM core.genome_region \ + WHERE properties->>'source' = $1 AND name <> ALL($2::text[])", + ) + .bind(source) + .bind(keep) + .execute(pool) + .await? + .rows_affected(); + Ok(removed) +} + +pub async fn update( + pool: &PgPool, + id: i64, + region_type: &str, + name: &str, + coordinates: &serde_json::Value, + properties: &serde_json::Value, +) -> Result { + let affected = sqlx::query( + "UPDATE core.genome_region SET region_type=$2, name=$3, coordinates=$4, properties=$5, updated_at=now() WHERE id=$1", + ) + .bind(id) + .bind(region_type) + .bind(name) + .bind(coordinates) + .bind(properties) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} + +pub async fn delete(pool: &PgPool, id: i64) -> Result { + let affected = sqlx::query("DELETE FROM core.genome_region WHERE id=$1") + .bind(id) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} diff --git a/rust/crates/du-db/src/haplogroup.rs b/rust/crates/du-db/src/haplogroup.rs new file mode 100644 index 00000000..a2b3ff65 --- /dev/null +++ b/rust/crates/du-db/src/haplogroup.rs @@ -0,0 +1,1022 @@ +//! Queries for `tree.haplogroup` + current parent/child edges. These back the +//! Y/MT tree views. "Current" edges are those with `valid_until IS NULL`. + +use crate::{parse_pg_enum, pg_enum_label, DbError, Page}; +use du_domain::enums::DnaType; +use du_domain::haplogroup::Haplogroup; +use du_domain::ids::HaplogroupId; +use sqlx::PgPool; + +#[derive(sqlx::FromRow)] +struct HaplogroupRow { + id: i64, + name: String, + haplogroup_type: String, + lineage: Option, + source: Option, + confidence_level: Option, + formed_ybp: Option, + tmrca_ybp: Option, + provenance: serde_json::Value, +} + +impl HaplogroupRow { + fn into_domain(self) -> Result { + Ok(Haplogroup { + id: HaplogroupId(self.id), + name: self.name, + haplogroup_type: parse_pg_enum(&self.haplogroup_type, "haplogroup_type")?, + lineage: self.lineage, + source: self.source, + confidence_level: self.confidence_level, + formed_ybp: self.formed_ybp, + tmrca_ybp: self.tmrca_ybp, + provenance: self.provenance, + }) + } +} + +// Columns qualified with alias `h` so joins (which also carry an `id`) stay +// unambiguous; output column names still match HaplogroupRow's fields. +const COLS: &str = "h.id, h.name, h.haplogroup_type::text AS haplogroup_type, h.lineage, h.source, \ + h.confidence_level, h.formed_ybp, h.tmrca_ybp, h.provenance"; + +fn collect(rows: Vec) -> Result, DbError> { + rows.into_iter().map(HaplogroupRow::into_domain).collect() +} + +pub async fn get_by_id(pool: &PgPool, id: HaplogroupId) -> Result, DbError> { + let row: Option = + sqlx::query_as(&format!("SELECT {COLS} FROM tree.haplogroup h WHERE h.id = $1")) + .bind(id.0) + .fetch_optional(pool) + .await?; + row.map(HaplogroupRow::into_domain).transpose() +} + +pub async fn get_by_name( + pool: &PgPool, + name: &str, + dna_type: DnaType, +) -> Result, DbError> { + let row: Option = sqlx::query_as(&format!( + "SELECT {COLS} FROM tree.haplogroup h WHERE h.name = $1 AND h.haplogroup_type::text = $2" + )) + .bind(name) + .bind(pg_enum_label(&dna_type)?) + .fetch_optional(pool) + .await?; + row.map(HaplogroupRow::into_domain).transpose() +} + +/// Direct children of a haplogroup (current edges), ordered by name. +pub async fn children(pool: &PgPool, parent: HaplogroupId) -> Result, DbError> { + let rows: Vec = sqlx::query_as(&format!( + "SELECT {COLS} FROM tree.haplogroup h \ + JOIN tree.haplogroup_relationship r ON r.child_haplogroup_id = h.id \ + WHERE r.parent_haplogroup_id = $1 AND r.valid_until IS NULL AND h.valid_until IS NULL \ + ORDER BY h.name" + )) + .bind(parent.0) + .fetch_all(pool) + .await?; + collect(rows) +} + +/// Flat, paginated, optionally name-filtered / lineage-filtered list for the +/// curator UI (the public tree views use roots/children instead). +pub async fn list_paginated( + pool: &PgPool, + query: Option<&str>, + dna_type: Option, + page: i64, + page_size: i64, +) -> Result, DbError> { + let offset = Page::<()>::offset(page, page_size); + let limit = page_size.clamp(1, 200); + let like = query + .map(str::trim) + .filter(|q| !q.is_empty()) + .map(|q| format!("%{q}%")); + let dna = dna_type.map(|d| pg_enum_label(&d)).transpose()?; + + // $1 = name filter (NULL = any), $2 = dna label (NULL = any). + let where_sql = "WHERE ($1::text IS NULL OR h.name ILIKE $1) \ + AND ($2::text IS NULL OR h.haplogroup_type::text = $2)"; + + let total: i64 = sqlx::query_scalar(&format!( + "SELECT count(*) FROM tree.haplogroup h {where_sql}" + )) + .bind(&like) + .bind(&dna) + .fetch_one(pool) + .await?; + + let rows: Vec = sqlx::query_as(&format!( + "SELECT {COLS} FROM tree.haplogroup h {where_sql} ORDER BY h.name LIMIT $3 OFFSET $4" + )) + .bind(&like) + .bind(&dna) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + + Ok(Page { items: collect(rows)?, total, page: page.max(1), page_size: limit }) +} + +/// Create a haplogroup; returns the new id. +pub async fn create( + pool: &PgPool, + name: &str, + dna_type: DnaType, + lineage: Option<&str>, + source: Option<&str>, + formed_ybp: Option, + tmrca_ybp: Option, +) -> Result { + let id: i64 = sqlx::query_scalar( + "INSERT INTO tree.haplogroup (name, haplogroup_type, lineage, source, formed_ybp, tmrca_ybp) \ + VALUES ($1, $2::core.dna_type, $3, $4, $5, $6) RETURNING id", + ) + .bind(name) + .bind(pg_enum_label(&dna_type)?) + .bind(lineage) + .bind(source) + .bind(formed_ybp) + .bind(tmrca_ybp) + .fetch_one(pool) + .await?; + Ok(HaplogroupId(id)) +} + +/// Update editable haplogroup fields. Returns whether a row was affected. +pub async fn update( + pool: &PgPool, + id: HaplogroupId, + name: &str, + lineage: Option<&str>, + source: Option<&str>, + formed_ybp: Option, + tmrca_ybp: Option, +) -> Result { + let affected = sqlx::query( + "UPDATE tree.haplogroup SET name=$2, lineage=$3, source=$4, formed_ybp=$5, tmrca_ybp=$6 WHERE id=$1", + ) + .bind(id.0) + .bind(name) + .bind(lineage) + .bind(source) + .bind(formed_ybp) + .bind(tmrca_ybp) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} + +/// Whether the haplogroup participates in any current relationship (a guard the +/// curator UI uses before allowing deletion). +pub async fn has_current_edges(pool: &PgPool, id: HaplogroupId) -> Result { + let n: i64 = sqlx::query_scalar( + "SELECT count(*) FROM tree.haplogroup_relationship \ + WHERE (child_haplogroup_id = $1 OR parent_haplogroup_id = $1) AND valid_until IS NULL", + ) + .bind(id.0) + .fetch_one(pool) + .await?; + Ok(n > 0) +} + +/// Delete a haplogroup. Returns whether a row was removed. +pub async fn delete(pool: &PgPool, id: HaplogroupId) -> Result { + let affected = sqlx::query("DELETE FROM tree.haplogroup WHERE id=$1") + .bind(id.0) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} + +/// Root haplogroups of a lineage: no current edge to a parent. +pub async fn roots(pool: &PgPool, dna_type: DnaType) -> Result, DbError> { + let rows: Vec = sqlx::query_as(&format!( + "SELECT {COLS} FROM tree.haplogroup h \ + WHERE h.haplogroup_type::text = $1 AND h.valid_until IS NULL \ + AND NOT EXISTS ( \ + SELECT 1 FROM tree.haplogroup_relationship r \ + WHERE r.child_haplogroup_id = h.id AND r.parent_haplogroup_id IS NOT NULL \ + AND r.valid_until IS NULL) \ + ORDER BY h.name" + )) + .bind(pg_enum_label(&dna_type)?) + .fetch_all(pool) + .await?; + collect(rows) +} + +/// Load the current production tree for a lineage as a nested +/// `du_domain::merge::ExistingNode` forest (current nodes/edges/variant links +/// only). Backs the merge algorithm's "existing tree" input. +pub async fn existing_tree( + pool: &PgPool, + dna_type: DnaType, +) -> Result, DbError> { + let dna = pg_enum_label(&dna_type)?; + let nodes: Vec<(i64, String)> = sqlx::query_as( + "SELECT id, name FROM tree.haplogroup WHERE haplogroup_type::text = $1 AND valid_until IS NULL", + ) + .bind(&dna) + .fetch_all(pool) + .await?; + let edges: Vec<(i64, Option)> = sqlx::query_as( + "SELECT r.child_haplogroup_id, r.parent_haplogroup_id FROM tree.haplogroup_relationship r \ + JOIN tree.haplogroup h ON h.id = r.child_haplogroup_id \ + WHERE r.valid_until IS NULL AND h.haplogroup_type::text = $1 AND h.valid_until IS NULL", + ) + .bind(&dna) + .fetch_all(pool) + .await?; + // Merge matches branches by defining-SNP *name*, so UNNAMED variants + // (canonical_name NULL — e.g. folded legacy homoplasy/duplicate rows) + // contribute nothing and are excluded (also keeps the column non-null). + let vars: Vec<(i64, String)> = sqlx::query_as( + "SELECT hv.haplogroup_id, v.canonical_name FROM tree.haplogroup_variant hv \ + JOIN core.variant v ON v.id = hv.variant_id \ + JOIN tree.haplogroup h ON h.id = hv.haplogroup_id \ + WHERE hv.valid_until IS NULL AND h.haplogroup_type::text = $1 AND h.valid_until IS NULL \ + AND v.canonical_name IS NOT NULL", + ) + .bind(&dna) + .fetch_all(pool) + .await?; + + use std::collections::BTreeMap; + let name_of: BTreeMap = nodes.iter().cloned().collect(); + let mut vars_of: BTreeMap> = BTreeMap::new(); + for (hid, name) in vars { + vars_of.entry(hid).or_default().push(name); + } + let parent_of: BTreeMap> = edges.into_iter().collect(); + let mut children_of: BTreeMap> = BTreeMap::new(); + for (&id, parent) in &parent_of { + if let Some(p) = parent { + children_of.entry(*p).or_default().push(id); + } + } + // Roots: nodes with no current parent edge. + let mut roots: Vec = name_of + .keys() + .copied() + .filter(|id| parent_of.get(id).copied().flatten().is_none()) + .collect(); + roots.sort_unstable(); + + fn build( + id: i64, + depth: u16, + name_of: &std::collections::BTreeMap, + vars_of: &std::collections::BTreeMap>, + children_of: &std::collections::BTreeMap>, + ) -> du_domain::merge::ExistingNode { + let children = if depth > 1000 { + Vec::new() + } else { + let mut kids = children_of.get(&id).cloned().unwrap_or_default(); + kids.sort_unstable(); + kids.into_iter().map(|c| build(c, depth + 1, name_of, vars_of, children_of)).collect() + }; + du_domain::merge::ExistingNode { + id, + name: name_of.get(&id).cloned().unwrap_or_default(), + variants: vars_of.get(&id).cloned().unwrap_or_default(), + children, + } + } + + Ok(roots.into_iter().map(|r| build(r, 0, &name_of, &vars_of, &children_of)).collect()) +} + +/// Ancestor chain of a haplogroup, ordered root → immediate parent (the node +/// itself is excluded). Backs the tree-view breadcrumb trail. +pub async fn ancestors(pool: &PgPool, id: HaplogroupId) -> Result, DbError> { + // Walk parent edges upward, tagging each step with its distance so we can + // return them root-first. + let rows: Vec<(i64, String, i32)> = sqlx::query_as( + "WITH RECURSIVE up AS ( \ + SELECT r.parent_haplogroup_id AS id, 1 AS dist \ + FROM tree.haplogroup_relationship r \ + WHERE r.child_haplogroup_id = $1 AND r.parent_haplogroup_id IS NOT NULL AND r.valid_until IS NULL \ + UNION ALL \ + SELECT r.parent_haplogroup_id, up.dist + 1 \ + FROM tree.haplogroup_relationship r \ + JOIN up ON up.id = r.child_haplogroup_id \ + WHERE r.parent_haplogroup_id IS NOT NULL AND r.valid_until IS NULL AND up.dist < 1000) \ + SELECT h.id, h.name, up.dist FROM up JOIN tree.haplogroup h ON h.id = up.id \ + WHERE h.valid_until IS NULL ORDER BY up.dist DESC", + ) + .bind(id.0) + .fetch_all(pool) + .await?; + Ok(rows.into_iter().map(|(id, name, _)| (HaplogroupId(id), name)).collect()) +} + +/// One node of a depth-bounded tree window for the public tree view. +#[derive(Debug, Clone)] +pub struct WindowNode { + pub id: i64, + pub name: String, + pub parent_id: Option, + pub depth: i32, + pub formed_ybp: Option, + pub tmrca_ybp: Option, + /// `source == 'backbone'` — the established spine, rendered green. + pub is_backbone: bool, + /// Edited within the last year — rendered amber. + pub is_recent: bool, + /// Defining-variant count (current links). + pub variant_count: i64, + /// True when this node sits at the window boundary AND has children that + /// were not included — the view shows a "+" affordance to re-root into it. + pub has_hidden: bool, +} + +/// The subtree under `root_name`, limited to `max_depth` levels below the root +/// (root = depth 0). Follows current edges. Returns a flat list with parent +/// linkage, per-node variant counts, backbone/recency flags, and a `has_hidden` +/// marker on boundary nodes whose children were clipped. The web layer nests it. +pub async fn subtree_window( + pool: &PgPool, + dna_type: DnaType, + root_name: &str, + max_depth: i32, +) -> Result, DbError> { + #[derive(sqlx::FromRow)] + struct WinRow { + id: i64, + name: String, + parent_id: Option, + depth: i32, + formed_ybp: Option, + tmrca_ybp: Option, + is_backbone: bool, + is_recent: bool, + variant_count: i64, + has_hidden: bool, + } + let rows: Vec = sqlx::query_as( + "WITH RECURSIVE sub AS ( \ + SELECT h.id, h.name, NULL::bigint AS parent_id, 0 AS depth \ + FROM tree.haplogroup h \ + WHERE h.haplogroup_type::text = $1 AND h.valid_until IS NULL AND h.name = $2 \ + UNION ALL \ + SELECT c.id, c.name, r.parent_haplogroup_id, sub.depth + 1 \ + FROM tree.haplogroup c \ + JOIN tree.haplogroup_relationship r \ + ON r.child_haplogroup_id = c.id AND r.valid_until IS NULL \ + JOIN sub ON sub.id = r.parent_haplogroup_id \ + WHERE c.valid_until IS NULL AND sub.depth < $3) \ + SELECT s.id, s.name, s.parent_id, s.depth, h.formed_ybp, h.tmrca_ybp, \ + h.is_backbone, \ + (h.valid_from > now() - interval '1 year') AS is_recent, \ + (SELECT count(*) FROM tree.haplogroup_variant hv \ + WHERE hv.haplogroup_id = s.id AND hv.valid_until IS NULL) AS variant_count, \ + (s.depth >= $3 AND EXISTS ( \ + SELECT 1 FROM tree.haplogroup_relationship r2 \ + WHERE r2.parent_haplogroup_id = s.id AND r2.valid_until IS NULL)) AS has_hidden \ + FROM sub s JOIN tree.haplogroup h ON h.id = s.id \ + ORDER BY s.depth, s.name", + ) + .bind(pg_enum_label(&dna_type)?) + .bind(root_name) + .bind(max_depth) + .fetch_all(pool) + .await?; + Ok(rows + .into_iter() + .map(|r| WindowNode { + id: r.id, + name: r.name, + parent_id: r.parent_id, + depth: r.depth, + formed_ybp: r.formed_ybp, + tmrca_ybp: r.tmrca_ybp, + is_backbone: r.is_backbone, + is_recent: r.is_recent, + variant_count: r.variant_count, + has_hidden: r.has_hidden, + }) + .collect()) +} + +/// Clear just **one DNA type's** de-novo tree (so Y and mt coexist), the +/// per-lineage greenfield front-end for a `--denovo-{y,mt}` reload. The de-novo +/// loader populates only `haplogroup` + `haplogroup_relationship` + +/// `haplogroup_variant` (+ later `haplogroup_sample`); the FKs into `haplogroup` +/// are NO ACTION, so dependents are deleted first. Unlike a full tree wipe this +/// leaves the other lineage (and the rest of `tree.*`) untouched. Returns the +/// number of haplogroups removed. +pub async fn clear_dna(pool: &PgPool, dna_type: DnaType) -> Result { + let dna = pg_enum_label(&dna_type)?; + let mut tx = pool.begin().await?; + let ids = "SELECT id FROM tree.haplogroup WHERE haplogroup_type::text = $1"; + sqlx::query(&format!("DELETE FROM tree.haplogroup_variant WHERE haplogroup_id IN ({ids})")) + .bind(&dna) + .execute(&mut *tx) + .await?; + sqlx::query(&format!( + "DELETE FROM tree.haplogroup_relationship \ + WHERE child_haplogroup_id IN ({ids}) OR parent_haplogroup_id IN ({ids})" + )) + .bind(&dna) + .execute(&mut *tx) + .await?; + sqlx::query("DELETE FROM tree.haplogroup_sample WHERE dna_type::text = $1") + .bind(&dna) + .execute(&mut *tx) + .await?; + sqlx::query("DELETE FROM tree.denovo_conflict WHERE dna_type::text = $1") + .bind(&dna) + .execute(&mut *tx) + .await?; + let n = sqlx::query("DELETE FROM tree.haplogroup WHERE haplogroup_type::text = $1") + .bind(&dna) + .execute(&mut *tx) + .await? + .rows_affected(); + tx.commit().await?; + Ok(n) +} + +/// Mark the **backbone** of a lineage: the computed spine (single-letter major +/// clades `A`–`T` + every ancestor up to the root) **unioned with curated +/// backbone** adopted from a source tree (nodes carrying a +/// `provenance.backbone_source` marker, stamped by the SNP-graft enrich/graft +/// writers). The major-clade seed matches either the **node name** (the ISOGG +/// import names backbone nodes `A`–`T` outright) or the **`provenance.isogg` +/// mapping** (the de-novo tree names nodes by defining SNP, e.g. `R-M269`, and +/// carries the matched ISOGG clade in provenance) — so one pass serves both +/// naming schemes. Recomputed wholesale (clears then sets) so the *computed* +/// spine stays correct as the tree changes, while curated flags are preserved. +/// Returns the number of backbone nodes. +pub async fn recompute_backbone(pool: &PgPool, dna_type: DnaType) -> Result { + let dna = pg_enum_label(&dna_type)?; + sqlx::query( + "WITH RECURSIVE seeds AS ( \ + SELECT id FROM tree.haplogroup \ + WHERE haplogroup_type::text = $1 AND valid_until IS NULL \ + AND (name ~ '^[A-Z]$' OR provenance->>'isogg' ~ '^[A-Z]$') \ + ), up AS ( \ + SELECT id FROM seeds \ + UNION \ + SELECT r.parent_haplogroup_id FROM tree.haplogroup_relationship r \ + JOIN up ON up.id = r.child_haplogroup_id \ + WHERE r.parent_haplogroup_id IS NOT NULL AND r.valid_until IS NULL \ + ) \ + UPDATE tree.haplogroup h \ + SET is_backbone = (h.id IN (SELECT id FROM up)) OR (h.provenance ? 'backbone_source') \ + WHERE h.haplogroup_type::text = $1 AND h.valid_until IS NULL", + ) + .bind(&dna) + .execute(pool) + .await?; + let n: i64 = sqlx::query_scalar( + "SELECT count(*) FROM tree.haplogroup WHERE haplogroup_type::text = $1 AND valid_until IS NULL AND is_backbone", + ) + .bind(&dna) + .fetch_one(pool) + .await?; + Ok(n) +} + +/// Summary of a recurrence scrub: how many multi-linked variants were examined, +/// how many were recurrent (links off a single lineage), the off-lineage links +/// soft-deleted, and a few human-readable samples. +#[derive(Debug, Clone)] +pub struct ScrubReport { + pub variants_examined: usize, + pub variants_scrubbed: usize, + pub links_removed: usize, + pub samples: Vec, +} + +/// Resolve a tree-search query to a haplogroup name: try a direct name match, +/// then an alternate (old ISOGG) name in `provenance.aliases`, then a defining +/// variant name; return the most-recent match. `None` if nothing matches. +/// +/// If the raw query resolves nothing, retry against the normalized candidate +/// tokens from [`normalize_haplogroup_call`] — heterogeneous publication calls +/// (FTDNA terminal-SNP shorthand `R-M269`, path strings `R-DF27 > Z195 > Z198`, +/// SNP synonyms `L151/PF6542`) resolve via the defining-variant phase once their +/// SNP token is isolated. Old YCC nested-letter longhand (`R1b1a2a1a2c1g`) has no +/// SNP to recover and stays unresolved (a historical crosswalk would be needed). +pub async fn resolve_name_or_variant( + pool: &PgPool, + query: &str, + dna_type: DnaType, +) -> Result, DbError> { + let dna = pg_enum_label(&dna_type)?; + let q = query.trim(); + if let Some(name) = resolve_one(pool, q, dna_type, &dna).await? { + return Ok(Some(name)); + } + // Fallback: normalize a heterogeneous publication call to candidate SNP/name + // tokens (most-specific first) and retry each. + for cand in normalize_haplogroup_call(q) { + if let Some(name) = resolve_one(pool, &cand, dna_type, &dna).await? { + return Ok(Some(name)); + } + } + Ok(None) +} + +/// One resolution attempt for an already-prepared query token: direct name → +/// `provenance.aliases` → defining-variant name. `dna` is the `pg_enum_label`. +async fn resolve_one( + pool: &PgPool, + q: &str, + dna_type: DnaType, + dna: &str, +) -> Result, DbError> { + // Direct name hit. + if let Some(h) = get_by_name(pool, q, dna_type).await? { + return Ok(Some(h.name)); + } + // Alternate (deprecated ISOGG) name → current name. + let by_alias: Option = sqlx::query_scalar( + "SELECT name FROM tree.haplogroup \ + WHERE haplogroup_type::text = $1 AND valid_until IS NULL \ + AND jsonb_exists(provenance->'aliases', $2) \ + ORDER BY valid_from DESC LIMIT 1", + ) + .bind(dna) + .bind(q) + .fetch_optional(pool) + .await?; + if by_alias.is_some() { + return Ok(by_alias); + } + // Variant name → defining haplogroup (latest by valid_from). + let name: Option = sqlx::query_scalar( + "SELECT h.name FROM tree.haplogroup h \ + JOIN tree.haplogroup_variant hv ON hv.haplogroup_id = h.id AND hv.valid_until IS NULL \ + JOIN core.variant v ON v.id = hv.variant_id \ + WHERE h.haplogroup_type::text = $1 AND h.valid_until IS NULL \ + AND lower(v.canonical_name) = lower($2) \ + ORDER BY h.valid_from DESC LIMIT 1", + ) + .bind(dna) + .bind(q) + .fetch_optional(pool) + .await?; + Ok(name) +} + +/// Normalize a heterogeneous publication haplogroup call into ordered candidate +/// tokens to retry resolution against, most-specific first. Handles: +/// - FTDNA terminal-SNP shorthand: `R-M269` → `M269` +/// - path strings: `R-DF27 > Z195 > Z198` → `Z198`, `Z195`, `DF27` (terminal first) +/// - SNP synonyms: `L151/PF6542` → `L151`, `PF6542` +/// +/// Returns empty for non-calls (`n/a`, `NA`, blank) and never re-emits the raw +/// trimmed input (the caller already tried that). +fn normalize_haplogroup_call(raw: &str) -> Vec { + let q = raw.trim(); + if q.is_empty() { + return Vec::new(); + } + let low = q.to_ascii_lowercase(); + if low == "na" || low == "?" || low == "unknown" || low.starts_with("n/a") { + return Vec::new(); + } + // Path string: '>'-separated clades, terminal (most specific) first. + let segments: Vec<&str> = q.split('>').map(str::trim).filter(|s| !s.is_empty()).collect(); + let mut out: Vec = Vec::new(); + for seg in segments.iter().rev() { + let stripped = strip_haplogroup_prefix(seg); + // SNP synonyms ('/'-separated); a no-slash token splits to itself. + for cand in stripped.split('/') { + let c = cand.trim().trim_matches(|ch: char| "*?.,()".contains(ch)).trim(); + if !c.is_empty() && c != q && !out.iter().any(|e| e == c) { + out.push(c.to_string()); + } + } + } + out +} + +/// Strip a leading haplogroup-label prefix (`R-`, `I-`, `E1b1b-`) from FTDNA +/// terminal-SNP shorthand, leaving the SNP token. No dash, or a non-label head, +/// returns the segment unchanged. +fn strip_haplogroup_prefix(seg: &str) -> &str { + if let Some(idx) = seg.find('-') { + let head = &seg[..idx]; + let looks_like_label = !head.is_empty() + && head.chars().next().is_some_and(|c| c.is_ascii_uppercase()) + && head.chars().all(|c| c.is_ascii_alphanumeric()); + if looks_like_label { + return &seg[idx + 1..]; + } + } + seg +} + +/// A defining variant of a haplogroup, for the SNP-detail sidebar. Carries this +/// branch's ancestral->derived transition (ASR) and whether the SNP is recurrent +/// (occurs on other branches too — homoplasy). +#[derive(Debug, Clone)] +pub struct VariantInfo { + /// NULL for UNNAMED variants (e.g. a homoplasy site whose name went to the + /// primary locus); the UI falls back to an alias. + pub canonical_name: Option, + pub mutation_type: String, + pub aliases: serde_json::Value, + pub coordinates: serde_json::Value, + /// This branch's ancestral/derived alleles (NULL for legacy/forward links). + pub link_ancestral: Option, + pub link_derived: Option, + /// True if this SNP also defines/occurs on another current branch. + pub recurrent: bool, +} + +/// All current defining variants of the named haplogroup, ordered by name. +pub async fn variants_of( + pool: &PgPool, + name: &str, + dna_type: DnaType, +) -> Result, DbError> { + #[derive(sqlx::FromRow)] + struct Row { + canonical_name: Option, + mutation_type: String, + aliases: serde_json::Value, + coordinates: serde_json::Value, + link_ancestral: Option, + link_derived: Option, + recurrent: bool, + } + let rows: Vec = sqlx::query_as( + "SELECT v.canonical_name, v.mutation_type::text AS mutation_type, v.aliases, v.coordinates, \ + hv.ancestral_allele AS link_ancestral, hv.derived_allele AS link_derived, \ + EXISTS (SELECT 1 FROM tree.haplogroup_variant hv2 \ + WHERE hv2.variant_id = v.id AND hv2.valid_until IS NULL \ + AND hv2.haplogroup_id <> hv.haplogroup_id) AS recurrent \ + FROM core.variant v \ + JOIN tree.haplogroup_variant hv ON hv.variant_id = v.id AND hv.valid_until IS NULL \ + JOIN tree.haplogroup h ON h.id = hv.haplogroup_id \ + WHERE h.name = $1 AND h.haplogroup_type::text = $2 AND h.valid_until IS NULL \ + ORDER BY v.canonical_name", + ) + .bind(name) + .bind(pg_enum_label(&dna_type)?) + .fetch_all(pool) + .await?; + Ok(rows + .into_iter() + .map(|r| VariantInfo { + canonical_name: r.canonical_name, + mutation_type: r.mutation_type, + aliases: r.aliases, + coordinates: r.coordinates, + link_ancestral: r.link_ancestral, + link_derived: r.link_derived, + recurrent: r.recurrent, + }) + .collect()) +} + +/// A node in a subtree fetch: the haplogroup plus its current parent (`None` at +/// the subtree root). The JSON tree API assembles the nesting in-process. +#[derive(Debug, Clone)] +pub struct SubtreeNode { + pub id: i64, + pub name: String, + pub parent_id: Option, + pub haplogroup_type: String, + pub formed_ybp: Option, + pub tmrca_ybp: Option, +} + +/// All haplogroups in the subtree under `root_name` (or every root of the +/// lineage when `None`), following current edges (`valid_until IS NULL`), as a +/// flat list with parent linkage. +pub async fn subtree( + pool: &PgPool, + dna_type: DnaType, + root_name: Option<&str>, +) -> Result, DbError> { + #[derive(sqlx::FromRow)] + struct SubRow { + id: i64, + name: String, + parent_id: Option, + haplogroup_type: String, + formed_ybp: Option, + tmrca_ybp: Option, + } + let rows: Vec = sqlx::query_as( + "WITH RECURSIVE sub AS ( \ + SELECT h.id, h.name, NULL::bigint AS parent_id, h.haplogroup_type::text AS haplogroup_type, \ + h.formed_ybp, h.tmrca_ybp \ + FROM tree.haplogroup h \ + WHERE h.haplogroup_type::text = $1 AND h.valid_until IS NULL AND ( \ + ($2::text IS NULL AND NOT EXISTS ( \ + SELECT 1 FROM tree.haplogroup_relationship r \ + WHERE r.child_haplogroup_id = h.id AND r.parent_haplogroup_id IS NOT NULL \ + AND r.valid_until IS NULL)) \ + OR ($2::text IS NOT NULL AND h.name = $2)) \ + UNION ALL \ + SELECT c.id, c.name, r.parent_haplogroup_id, c.haplogroup_type::text, c.formed_ybp, c.tmrca_ybp \ + FROM tree.haplogroup c \ + JOIN tree.haplogroup_relationship r \ + ON r.child_haplogroup_id = c.id AND r.valid_until IS NULL \ + JOIN sub ON sub.id = r.parent_haplogroup_id \ + WHERE c.valid_until IS NULL) \ + SELECT id, name, parent_id, haplogroup_type, formed_ybp, tmrca_ybp FROM sub", + ) + .bind(pg_enum_label(&dna_type)?) + .bind(root_name) + .fetch_all(pool) + .await?; + Ok(rows + .into_iter() + .map(|r| SubtreeNode { + id: r.id, + name: r.name, + parent_id: r.parent_id, + haplogroup_type: r.haplogroup_type, + formed_ybp: r.formed_ybp, + tmrca_ybp: r.tmrca_ybp, + }) + .collect()) +} + +// ── curator structural ops (direct temporal edits) ────────────────────────── + +/// The current parent of a node (id, name), or `None` at a root. +pub async fn current_parent( + pool: &PgPool, + id: HaplogroupId, +) -> Result, DbError> { + let row: Option<(i64, String)> = sqlx::query_as( + "SELECT p.id, p.name FROM tree.haplogroup_relationship r \ + JOIN tree.haplogroup p ON p.id = r.parent_haplogroup_id \ + WHERE r.child_haplogroup_id = $1 AND r.parent_haplogroup_id IS NOT NULL \ + AND r.valid_until IS NULL AND p.valid_until IS NULL LIMIT 1", + ) + .bind(id.0) + .fetch_optional(pool) + .await?; + Ok(row.map(|(id, name)| (HaplogroupId(id), name))) +} + +/// Current defining-variant links of a node: `(variant_id, canonical_name)`, +/// ordered by name (backs the split variant-picker). +pub async fn current_variant_links( + pool: &PgPool, + id: HaplogroupId, +) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT v.id, v.canonical_name FROM tree.haplogroup_variant hv \ + JOIN core.variant v ON v.id = hv.variant_id \ + WHERE hv.haplogroup_id = $1 AND hv.valid_until IS NULL ORDER BY v.canonical_name", + ) + .bind(id.0) + .fetch_all(pool) + .await?) +} + +/// **Reparent** a node (and its subtree) under a new parent: close the current +/// parent edge and open a new one (temporal). Rejects a no-op, a self-parent, or +/// a `new_parent` inside the node's own subtree (which would create a cycle). +pub async fn reparent( + pool: &PgPool, + child: HaplogroupId, + new_parent: HaplogroupId, +) -> Result<(), DbError> { + if child.0 == new_parent.0 { + return Err(DbError::Conflict("a node cannot be its own parent".into())); + } + // Cycle guard: new_parent must not be at/below child in the current tree. + let cycles: bool = sqlx::query_scalar( + "WITH RECURSIVE down AS ( \ + SELECT $1::bigint AS id \ + UNION \ + SELECT r.child_haplogroup_id FROM tree.haplogroup_relationship r \ + JOIN down ON down.id = r.parent_haplogroup_id WHERE r.valid_until IS NULL) \ + SELECT EXISTS(SELECT 1 FROM down WHERE id = $2)", + ) + .bind(child.0) + .bind(new_parent.0) + .fetch_one(pool) + .await?; + if cycles { + return Err(DbError::Conflict("new parent is within the node's own subtree (would cycle)".into())); + } + + let mut tx = pool.begin().await?; + sqlx::query( + "UPDATE tree.haplogroup_relationship SET valid_until = now() \ + WHERE child_haplogroup_id = $1 AND valid_until IS NULL", + ) + .bind(child.0) + .execute(&mut *tx) + .await?; + sqlx::query( + "INSERT INTO tree.haplogroup_relationship (child_haplogroup_id, parent_haplogroup_id, source) \ + VALUES ($1, $2, 'curator')", + ) + .bind(child.0) + .bind(new_parent.0) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(()) +} + +/// **Merge a node into its parent**: reparent the node's children onto its +/// parent, union its defining variants into the parent, then temporal-delete the +/// node (expire it + close its edges/variant links). Errors if the node is a root. +pub async fn merge_into_parent(pool: &PgPool, node: HaplogroupId) -> Result<(), DbError> { + let parent = current_parent(pool, node) + .await? + .ok_or_else(|| DbError::Conflict("node has no parent to merge into".into()))? + .0; + let mut tx = pool.begin().await?; + + // Reparent the node's current children onto the parent. + sqlx::query( + "UPDATE tree.haplogroup_relationship SET parent_haplogroup_id = $2 \ + WHERE parent_haplogroup_id = $1 AND valid_until IS NULL", + ) + .bind(node.0) + .bind(parent.0) + .execute(&mut *tx) + .await?; + + // Union the node's variants into the parent (skip dupes). + sqlx::query( + "INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) \ + SELECT $2, hv.variant_id FROM tree.haplogroup_variant hv \ + WHERE hv.haplogroup_id = $1 AND hv.valid_until IS NULL \ + AND NOT EXISTS (SELECT 1 FROM tree.haplogroup_variant e \ + WHERE e.haplogroup_id = $2 AND e.variant_id = hv.variant_id AND e.valid_until IS NULL)", + ) + .bind(node.0) + .bind(parent.0) + .execute(&mut *tx) + .await?; + + // Temporal-delete the node: expire it, close its remaining edges + variant links. + sqlx::query("UPDATE tree.haplogroup SET valid_until = now() WHERE id = $1 AND valid_until IS NULL") + .bind(node.0) + .execute(&mut *tx) + .await?; + sqlx::query( + "UPDATE tree.haplogroup_relationship SET valid_until = now() \ + WHERE (child_haplogroup_id = $1 OR parent_haplogroup_id = $1) AND valid_until IS NULL", + ) + .bind(node.0) + .execute(&mut *tx) + .await?; + sqlx::query("UPDATE tree.haplogroup_variant SET valid_until = now() WHERE haplogroup_id = $1 AND valid_until IS NULL") + .bind(node.0) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(()) +} + +/// **Split** a node: create a new child `new_child_name` under it and move the +/// given variant links from the node onto the new child (close on the node, open +/// on the child). Returns the new child id. Rejects a taken name; ignores ids not +/// currently linked to the node. +pub async fn split( + pool: &PgPool, + node: HaplogroupId, + new_child_name: &str, + variant_ids: &[i64], + dna_type: DnaType, + source: Option<&str>, +) -> Result { + let dna = pg_enum_label(&dna_type)?; + let name = new_child_name.trim(); + if name.is_empty() { + return Err(DbError::Conflict("new child name is required".into())); + } + let exists: bool = sqlx::query_scalar( + "SELECT EXISTS(SELECT 1 FROM tree.haplogroup \ + WHERE name = $1 AND haplogroup_type::text = $2 AND valid_until IS NULL)", + ) + .bind(name) + .bind(&dna) + .fetch_one(pool) + .await?; + if exists { + return Err(DbError::Conflict(format!("a haplogroup named {name} already exists"))); + } + + let mut tx = pool.begin().await?; + let child_id: i64 = sqlx::query_scalar( + "INSERT INTO tree.haplogroup (name, haplogroup_type, source) \ + VALUES ($1, $2::core.dna_type, $3) RETURNING id", + ) + .bind(name) + .bind(&dna) + .bind(source) + .fetch_one(&mut *tx) + .await?; + sqlx::query( + "INSERT INTO tree.haplogroup_relationship (child_haplogroup_id, parent_haplogroup_id, source) \ + VALUES ($1, $2, 'curator')", + ) + .bind(child_id) + .bind(node.0) + .execute(&mut *tx) + .await?; + + // Move only links currently on the node: close on node, open on the child. + sqlx::query( + "UPDATE tree.haplogroup_variant SET valid_until = now() \ + WHERE haplogroup_id = $1 AND variant_id = ANY($2) AND valid_until IS NULL", + ) + .bind(node.0) + .bind(variant_ids) + .execute(&mut *tx) + .await?; + sqlx::query( + "INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) \ + SELECT $1, v FROM unnest($2::bigint[]) AS v \ + WHERE EXISTS (SELECT 1 FROM core.variant cv WHERE cv.id = v)", + ) + .bind(child_id) + .bind(variant_ids) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(HaplogroupId(child_id)) +} + +// ── phylogenetic pathway (for the public per-sample report) ─────────────────── + +/// One clade on the root→tip pathway: the node, its ages, and its defining SNPs. +#[derive(Debug, Clone)] +pub struct PathwayStep { + pub haplogroup_id: HaplogroupId, + pub name: String, + pub formed_ybp: Option, + pub tmrca_ybp: Option, + pub defining_snps: Vec, +} + +/// A called haplogroup resolved to its place in the tree, root→tip. +#[derive(Debug, Clone)] +pub struct Pathway { + pub dna_type: DnaType, + /// The name as called on the sample (raw input). + pub called_name: String, + /// The matched tree node name, or `None` when the call isn't placed in the + /// tree (raw caller output, deprecated nomenclature, provisional `~` names). + pub resolved_name: Option, + /// Root → tip clades. Empty when `resolved_name` is `None`. + pub steps: Vec, +} + +/// Turn a called haplogroup NAME into its root→tip pathway with branch ages and +/// defining SNPs, reusing [`resolve_name_or_variant`] / [`get_by_name`] / +/// [`ancestors`] / [`variants_of`]. When the name can't be resolved to a tree +/// node, returns a `Pathway` with `resolved_name: None` and no steps (the gap +/// case) rather than erroring — the report shows the raw call as "not placed". +pub async fn pathway(pool: &PgPool, called_name: &str, dna_type: DnaType) -> Result { + let gap = |resolved: Option| Pathway { + dna_type, + called_name: called_name.to_string(), + resolved_name: resolved, + steps: Vec::new(), + }; + let Some(resolved) = resolve_name_or_variant(pool, called_name, dna_type).await? else { + return Ok(gap(None)); + }; + let Some(tip) = get_by_name(pool, &resolved, dna_type).await? else { + return Ok(gap(None)); + }; + + // root → parent, then append the tip itself for the full chain. + let mut chain = ancestors(pool, tip.id).await?; + chain.push((tip.id, tip.name.clone())); + + let mut steps = Vec::with_capacity(chain.len()); + for (id, name) in chain { + // ages live on the node; the chain only carries id+name. + let node = get_by_id(pool, id).await?; + let defining_snps = variants_of(pool, &name, dna_type).await?; + steps.push(PathwayStep { + haplogroup_id: id, + name, + formed_ybp: node.as_ref().and_then(|h| h.formed_ybp), + tmrca_ybp: node.as_ref().and_then(|h| h.tmrca_ybp), + defining_snps, + }); + } + Ok(Pathway { dna_type, called_name: called_name.to_string(), resolved_name: Some(resolved), steps }) +} diff --git a/rust/crates/du-db/src/ibd.rs b/rust/crates/du-db/src/ibd.rs new file mode 100644 index 00000000..02733106 --- /dev/null +++ b/rust/crates/du-db/src/ibd.rs @@ -0,0 +1,778 @@ +//! IBD candidate-generation engine (D3 first slice, D1-independent). +//! +//! The AppView *coordinates* IBD: it proposes **introduction candidates** (which pairs +//! should attempt an Edge-to-Edge comparison) from anonymized `fed.*` aggregates — it +//! never sees a genotype, and the segment detection is the Edge's job. The +//! load-bearing rule (D3 §3.0): **never materialize N×N, never hand a client +//! "everyone"** — block by ancestry, expand the match graph, emit a bounded top-K list +//! per sample. Three signals feed `ibd.match_suggestion`: +//! 1. **population overlap** — `Σ min(A[pop], B[pop])`, computed only *within ancestry +//! blocks* (dominant super-population × a z-scored PCA cell); +//! 2. **haplogroup** — a shared terminal Y/mt consensus haplogroup (rarer = higher); +//! 3. **shared-match** — 2-hop expansion over the `ibd_discovery_index` match graph +//! (the in-common-with / clustering signal; dormant until the graph has edges). +//! +//! Mirrors the sequencer/discovery engines: advisory-locked, declarative recompute that +//! preserves user decisions (`DISMISSED`/`CONVERTED` pairs are never re-suggested). + +use crate::DbError; +use serde_json::{json, Value}; +use sqlx::PgPool; +use std::collections::{HashMap, HashSet}; +use uuid::Uuid; + +/// Advisory-lock key guarding concurrent recomputes. +const IBD_ADVISORY_KEY: i64 = 0x4942_445F_4347; // "IBD_CG" + +const SIG_POPULATION: &str = "POPULATION_OVERLAP"; +const SIG_HAPLOGROUP: &str = "HAPLOGROUP"; +const SIG_SHARED_MATCH: &str = "SHARED_MATCH"; + +/// Thresholds + weights for candidate generation (plain config; no table for v1). +#[derive(Debug, Clone)] +pub struct IbdConfig { + /// Minimum population-overlap (Σ min over shared populations, 0..1). + pub min_overlap: f64, + /// Minimum shared third parties for a SHARED_MATCH candidate. + pub min_shared: i64, + /// Max suggestions kept per target sample (the no-N:N cap). + pub top_k: usize, + /// PCA grid cell size in standard deviations (z-scored, scale-free). + pub pca_cell_sigma: f64, + /// Suggestion lifetime (days) before it ages out. + pub ttl_days: i32, + pub w_population: f64, + pub w_haplogroup: f64, + pub w_shared_match: f64, + /// Tree-depth half-saturation for the haplogroup signal: a shared clade at this depth + /// scores at half its rarity. Deeper (more terminal) shared clades are far more + /// informative — sharing R-CTS4466 (depth ~21) ≫ sharing R (depth ~2). + pub depth_half_life: f64, +} + +impl Default for IbdConfig { + fn default() -> Self { + Self { + min_overlap: 0.6, + min_shared: 2, + top_k: 50, + pca_cell_sigma: 0.5, + ttl_days: 30, + w_population: 0.4, + w_haplogroup: 0.3, + w_shared_match: 0.3, + depth_half_life: 8.0, + } + } +} + +/// Outcome of [`recompute_suggestions`]. +#[derive(Debug, Default, Clone)] +pub struct SuggestionReport { + pub samples: u64, + pub blocks: u64, + pub population_pairs: u64, + pub haplogroup_pairs: u64, + pub shared_match_pairs: u64, + pub suggestions_written: u64, +} + +/// A ranked suggestion for a sample (the reader's row). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct SuggestionView { + pub suggested_sample_guid: Uuid, + pub suggestion_type: String, + pub score: Option, + pub metadata: Value, +} + +/// Serve a sample's ranked active candidates (used by the eventual consent-gated API). +pub async fn suggestions_for(pool: &PgPool, sample_guid: Uuid, limit: i64) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT suggested_sample_guid, suggestion_type, score, metadata \ + FROM ibd.match_suggestion \ + WHERE target_sample_guid = $1 AND status = 'ACTIVE' \ + ORDER BY score DESC NULLS LAST LIMIT $2", + ) + .bind(sample_guid) + .bind(limit) + .fetch_all(pool) + .await?) +} + +// ── federated read API (D3 entry point — pseudonymous, owner-DID scoped) ──────── + +/// The exact bytes each request's Ed25519 signature is computed over (cross-repo +/// contract with the Navigator Edge — keep byte-stable, mirrors `exchange::messages`). +pub mod messages { + /// Replay-guarded read poll: caller proves it is `did` at `ts` (unix seconds). + pub fn poll(did: &str, ts: i64) -> String { + format!("ibd-poll\n{did}\n{ts}") + } + /// Ask the broker to relay a consent request to a pseudonymous candidate. The caller + /// signs over the sample handle it *can* see; the counterpart DID is resolved server-side. + pub fn introduce(did: &str, suggested_sample_guid: &str) -> String { + format!("ibd-introduce\n{did}\n{suggested_sample_guid}") + } + /// Dismiss a candidate so it stops being suggested (preserved across recomputes). + pub fn dismiss(did: &str, suggested_sample_guid: &str) -> String { + format!("ibd-dismiss\n{did}\n{suggested_sample_guid}") + } + /// Attest the *outcome* of a completed Edge-to-Edge comparison: the attester (a party to + /// the consented exchange `request_uri`) reports that its own sample and the counterpart's + /// share an IBD segment in `region` totalling `cm` centimorgans. The figure is part of the + /// signed message so the claim is bound to the signature. + pub fn attest(did: &str, request_uri: &str, claimed: &str, counterpart: &str, region: &str, cm: &str) -> String { + format!("ibd-attest\n{did}\n{request_uri}\n{claimed}\n{counterpart}\n{region}\n{cm}") + } +} + +/// A caller's ranked active candidates, scoped to the samples they own (via the +/// `core.biosample.atproto->>'repo_did'` self-publish bridge the engine itself uses). +/// Pseudonymous: rows carry only `suggested_sample_guid` + non-PII signal scores — never +/// a counterpart DID (identity reveal stays Edge-to-Edge over D1 consent). +pub async fn suggestions_for_did(pool: &PgPool, did: &str, limit: i64) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT ms.suggested_sample_guid, ms.suggestion_type, ms.score, ms.metadata \ + FROM ibd.match_suggestion ms \ + JOIN core.biosample b ON b.sample_guid = ms.target_sample_guid \ + WHERE b.atproto->>'repo_did' = $1 AND ms.status = 'ACTIVE' \ + ORDER BY ms.score DESC NULLS LAST LIMIT $2", + ) + .bind(did) + .bind(limit) + .fetch_all(pool) + .await?) +} + +/// Authorization for `introduce`: true only when an ACTIVE suggestion exists whose +/// *target* is one of `did`'s own samples and whose suggested sample matches — so a caller +/// can only ask to meet its own genuine candidates, never probe/forge contact to an +/// arbitrary sample. +pub async fn is_suggested_to_did(pool: &PgPool, did: &str, suggested_sample_guid: Uuid) -> Result { + Ok(sqlx::query_scalar::<_, i64>( + "SELECT count(*) FROM ibd.match_suggestion ms \ + JOIN core.biosample b ON b.sample_guid = ms.target_sample_guid \ + WHERE b.atproto->>'repo_did' = $1 AND ms.suggested_sample_guid = $2 AND ms.status = 'ACTIVE'", + ) + .bind(did) + .bind(suggested_sample_guid) + .fetch_one(pool) + .await? + > 0) +} + +/// Resolve the publisher DID that owns a sample (server-side counterpart resolution for +/// `introduce`). `None` ⇒ the sample isn't federated/claimable, so no introduction is +/// possible — and no DID is ever returned to the caller. +pub async fn owner_did_of_sample(pool: &PgPool, sample_guid: Uuid) -> Result, DbError> { + Ok(sqlx::query_scalar("SELECT atproto->>'repo_did' FROM core.biosample WHERE sample_guid = $1") + .bind(sample_guid) + .fetch_optional(pool) + .await? + .flatten()) +} + +/// The D1 exchange `purpose` for introducing the caller to one of its candidates, derived +/// from the suggestion's dominant signal: a shared-haplogroup match → `IBD_Y`/`IBD_MT` (from +/// the recorded arm), an autosomal signal (population / shared-match) → `IBD_AUTOSOMAL`. +/// `None` ⇒ the candidate is not a live (`ACTIVE`/`CONVERTED`) suggestion for `did` — the +/// caller may not introduce to it (authorization, replacing the bare existence check). +pub async fn introduction_purpose(pool: &PgPool, did: &str, suggested_sample_guid: Uuid) -> Result, DbError> { + let row: Option<(String, Value)> = sqlx::query_as( + "SELECT ms.suggestion_type, ms.metadata FROM ibd.match_suggestion ms \ + JOIN core.biosample b ON b.sample_guid = ms.target_sample_guid \ + WHERE b.atproto->>'repo_did' = $1 AND ms.suggested_sample_guid = $2 \ + AND ms.status IN ('ACTIVE','CONVERTED') \ + ORDER BY ms.score DESC NULLS LAST LIMIT 1", + ) + .bind(did) + .bind(suggested_sample_guid) + .fetch_optional(pool) + .await?; + Ok(row.map(|(suggestion_type, meta)| match suggestion_type.as_str() { + SIG_HAPLOGROUP => match meta.get("hgDnaType").and_then(Value::as_str) { + Some("MT_DNA") => "IBD_MT".to_string(), + Some("Y_DNA") => "IBD_Y".to_string(), + _ => "IBD_AUTOSOMAL".to_string(), + }, + _ => "IBD_AUTOSOMAL".to_string(), + })) +} + +/// Mark the caller's suggestion for a candidate `CONVERTED` (it became an exchange request) so +/// it drops out of the active candidate list. Idempotent; only affects the caller's own ACTIVE rows. +pub async fn mark_converted(pool: &PgPool, did: &str, suggested_sample_guid: Uuid) -> Result<(), DbError> { + sqlx::query( + "UPDATE ibd.match_suggestion ms SET status = 'CONVERTED' \ + FROM core.biosample b \ + WHERE b.sample_guid = ms.target_sample_guid \ + AND b.atproto->>'repo_did' = $1 AND ms.suggested_sample_guid = $2 AND ms.status = 'ACTIVE'", + ) + .bind(did) + .bind(suggested_sample_guid) + .execute(pool) + .await?; + Ok(()) +} + +/// Dismiss the caller's candidate so the engine stops suggesting it (recompute preserves +/// `DISMISSED`). Only the caller's own ACTIVE rows are affected; returns the number dismissed. +pub async fn dismiss_suggestion(pool: &PgPool, did: &str, suggested_sample_guid: Uuid) -> Result { + Ok(sqlx::query( + "UPDATE ibd.match_suggestion ms SET status = 'DISMISSED' \ + FROM core.biosample b \ + WHERE b.sample_guid = ms.target_sample_guid \ + AND b.atproto->>'repo_did' = $1 AND ms.suggested_sample_guid = $2 AND ms.status = 'ACTIVE'", + ) + .bind(did) + .bind(suggested_sample_guid) + .execute(pool) + .await? + .rows_affected()) +} + +// ── attestation ingest (close the loop: a completed exchange → match state) ───── + +/// One Edge's signed report of a completed IBD comparison. PII-free: only pseudonymous +/// sample handles, a region, and coarse totals — never segment coordinates or genotypes. +#[derive(Debug, Clone)] +pub struct Attestation<'a> { + /// The DID that signed this report (verified by the route before it reaches here). + pub attester_did: &'a str, + /// The consented exchange this comparison came out of (the privacy rail). + pub request_uri: &'a str, + /// The attester's own sample (must be owned by `attester_did`). + pub claimed_sample: Uuid, + /// The counterpart's sample (must be owned by the exchange's *other* party). + pub counterpart_sample: Uuid, + /// Match region: `AUTOSOMAL` / `X` / `Y` / `MT`. + pub region_type: &'a str, + pub total_shared_cm: Option, + pub num_segments: Option, + /// `INITIAL_REPORT` / `CONFIRMATION` / `DISPUTE` / `REVOCATION`. + pub attestation_type: &'a str, + pub signature: &'a str, + pub notes: Option<&'a str>, +} + +/// Result of recording an attestation. `Rejected` carries a static reason the route maps +/// to a 4xx — the attestation never touched the match graph. +#[derive(Debug, Clone, PartialEq)] +pub enum AttestationOutcome { + Recorded { + discovery_index_id: i64, + consensus_status: String, + publicly_discoverable: bool, + }, + Rejected(&'static str), +} + +/// True iff `sample` is a federated biosample published by `did` (the repo_did self-publish +/// bridge — the same ownership proof the suggestion engine and `/introduce` rely on). +async fn owns_sample(pool: &PgPool, did: &str, sample: Uuid) -> Result { + Ok(sqlx::query_scalar::<_, i64>( + "SELECT count(*) FROM core.biosample WHERE sample_guid = $1 AND atproto->>'repo_did' = $2", + ) + .bind(sample) + .bind(did) + .fetch_one(pool) + .await? + > 0) +} + +/// Two cM totals are compatible when within `max(10 cM, 20%)` — segment detectors disagree +/// on exact boundaries, so consensus tolerates spread rather than demanding equality. +fn cm_agree(a: f64, b: f64) -> bool { + (a - b).abs() <= (0.20 * a.max(b)).max(10.0) +} + +/// Record a signed IBD attestation and recompute the pair's consensus. +/// +/// Gated, in order: the `request_uri` must be a **CONSENTED** exchange with an **IBD** +/// purpose; the attester must be a **party** to it; the attester must **own** its claimed +/// sample and the *other* party must own the counterpart sample. This ties every match-graph +/// edge to a real dual-consented comparison — there is no way to forge an edge for a pair +/// that never agreed to compare. On the second party's compatible report the pair flips +/// `CONFIRMED` + `is_publicly_discoverable`, which is what the SHARED_MATCH signal reads. +pub async fn record_attestation(pool: &PgPool, a: &Attestation<'_>) -> Result { + // 1. The exchange must exist, be consented, and be an IBD exchange the attester is in. + let req: Option<(String, String, String, String)> = sqlx::query_as( + "SELECT initiator_did, partner_did, status, purpose FROM exchange.exchange_request WHERE request_uri = $1", + ) + .bind(a.request_uri) + .fetch_optional(pool) + .await?; + let Some((initiator, partner, status, purpose)) = req else { + return Ok(AttestationOutcome::Rejected("unknown exchange request")); + }; + if status != "CONSENTED" { + return Ok(AttestationOutcome::Rejected("exchange is not consented")); + } + if !purpose.starts_with("IBD") { + return Ok(AttestationOutcome::Rejected("not an IBD exchange")); + } + let counterpart_did = if a.attester_did == initiator { + partner + } else if a.attester_did == partner { + initiator + } else { + return Ok(AttestationOutcome::Rejected("attester is not a party to this exchange")); + }; + + // 2. The reported pair must be the two parties' own samples. + if !owns_sample(pool, a.attester_did, a.claimed_sample).await? { + return Ok(AttestationOutcome::Rejected("claimed sample is not owned by the attester")); + } + if !owns_sample(pool, &counterpart_did, a.counterpart_sample).await? { + return Ok(AttestationOutcome::Rejected("counterpart sample is not owned by the other party")); + } + + // 3. Get-or-create the match-graph edge (order-independent pair × region), record the + // attestation, recompute consensus — all in one transaction. + let (s1, s2) = ordered(a.claimed_sample, a.counterpart_sample); + let mut tx = pool.begin().await?; + sqlx::query( + "INSERT INTO ibd.ibd_discovery_index \ + (sample_guid_1, sample_guid_2, match_region_type, consensus_status, is_publicly_discoverable) \ + VALUES ($1, $2, $3, 'PENDING', false) \ + ON CONFLICT (LEAST(sample_guid_1, sample_guid_2), GREATEST(sample_guid_1, sample_guid_2), match_region_type) \ + DO NOTHING", + ) + .bind(s1) + .bind(s2) + .bind(a.region_type) + .execute(&mut *tx) + .await?; + let idx_id: i64 = sqlx::query_scalar( + "SELECT id FROM ibd.ibd_discovery_index \ + WHERE LEAST(sample_guid_1, sample_guid_2) = LEAST($1, $2) \ + AND GREATEST(sample_guid_1, sample_guid_2) = GREATEST($1, $2) \ + AND match_region_type = $3", + ) + .bind(s1) + .bind(s2) + .bind(a.region_type) + .fetch_one(&mut *tx) + .await?; + + sqlx::query( + "INSERT INTO ibd.ibd_pds_attestation \ + (ibd_discovery_index_id, attesting_did, exchange_request_uri, attestation_signature, \ + attestation_type, reported_total_cm, reported_segments, attestation_notes) \ + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) \ + ON CONFLICT (ibd_discovery_index_id, attesting_did, attestation_type) WHERE attesting_did IS NOT NULL \ + DO UPDATE SET attestation_signature = EXCLUDED.attestation_signature, \ + exchange_request_uri = EXCLUDED.exchange_request_uri, \ + reported_total_cm = EXCLUDED.reported_total_cm, reported_segments = EXCLUDED.reported_segments, \ + attestation_notes = EXCLUDED.attestation_notes, attestation_timestamp = now()", + ) + .bind(idx_id) + .bind(a.attester_did) + .bind(a.request_uri) + .bind(a.signature) + .bind(a.attestation_type) + .bind(a.total_shared_cm) + .bind(a.num_segments) + .bind(a.notes) + .execute(&mut *tx) + .await?; + + // Recompute consensus from all of this edge's attestations. + let atts: Vec<(String, String, Option, Option)> = sqlx::query_as( + "SELECT attesting_did, attestation_type, reported_total_cm, reported_segments \ + FROM ibd.ibd_pds_attestation WHERE ibd_discovery_index_id = $1 AND attesting_did IS NOT NULL", + ) + .bind(idx_id) + .fetch_all(&mut *tx) + .await?; + + let disputed = atts.iter().any(|(_, t, _, _)| t == "DISPUTE" || t == "REVOCATION"); + // The distinct parties that reported a (non-dispute) match, with their figures. + let mut reports: HashMap)> = HashMap::new(); + for (did, t, cm, segs) in &atts { + if t == "DISPUTE" || t == "REVOCATION" { + continue; + } + if let Some(cm) = cm { + reports.entry(did.clone()).or_insert((*cm, *segs)); + } + } + let (consensus_status, discoverable, agreed_cm, agreed_segs) = if disputed { + ("DISPUTED", false, None, None) + } else if reports.len() >= 2 { + let vals: Vec = reports.values().map(|(cm, _)| *cm).collect(); + let lo = vals.iter().cloned().fold(f64::INFINITY, f64::min); + let hi = vals.iter().cloned().fold(f64::NEG_INFINITY, f64::max); + if cm_agree(lo, hi) { + let avg = vals.iter().sum::() / vals.len() as f64; + let seg_avg = { + let segs: Vec = reports.values().filter_map(|(_, s)| *s).collect(); + if segs.is_empty() { None } else { Some(segs.iter().sum::() / segs.len() as i32) } + }; + ("CONFIRMED", true, Some(avg), seg_avg) + } else { + ("DISPUTED", false, None, None) + } + } else { + ("PENDING", false, None, None) + }; + + sqlx::query( + "UPDATE ibd.ibd_discovery_index \ + SET consensus_status = $2, is_publicly_discoverable = $3, \ + total_shared_cm_approx = COALESCE($4, total_shared_cm_approx), \ + num_shared_segments_approx = COALESCE($5, num_shared_segments_approx) \ + WHERE id = $1", + ) + .bind(idx_id) + .bind(consensus_status) + .bind(discoverable) + .bind(agreed_cm) + .bind(agreed_segs) + .execute(&mut *tx) + .await?; + + tx.commit().await?; + Ok(AttestationOutcome::Recorded { + discovery_index_id: idx_id, + consensus_status: consensus_status.to_string(), + publicly_discoverable: discoverable, + }) +} + +// ── internal model ─────────────────────────────────────────────────────────── + +struct Profile { + guid: Uuid, + breakdown: HashMap, // population -> fraction (0..1) + super_pop: Option, + pca: Option<(f64, f64)>, +} + +/// One signal's contribution to a candidate pair. +struct Hit { + a: Uuid, // canonical: a < b + b: Uuid, + signal: &'static str, + score: f64, + /// For a HAPLOGROUP hit, the shared haplogroup's DNA arm (`Y_DNA`/`MT_DNA`); else None. + dna: Option<&'static str>, +} + +fn ordered(x: Uuid, y: Uuid) -> (Uuid, Uuid) { + if x <= y { + (x, y) + } else { + (y, x) + } +} + +/// `Σ min(A[pop], B[pop])` over shared populations. +fn overlap(a: &HashMap, b: &HashMap) -> f64 { + a.iter().filter_map(|(p, fa)| b.get(p).map(|fb| fa.min(*fb))).sum() +} + +pub async fn recompute_suggestions(pool: &PgPool, cfg: &IbdConfig) -> Result { + let mut lock = pool.acquire().await?; + let locked: bool = sqlx::query_scalar("SELECT pg_try_advisory_lock($1)") + .bind(IBD_ADVISORY_KEY) + .fetch_one(&mut *lock) + .await?; + if !locked { + return Ok(SuggestionReport::default()); + } + let result = recompute_locked(pool, cfg).await; + let _ = sqlx::query("SELECT pg_advisory_unlock($1)").bind(IBD_ADVISORY_KEY).execute(&mut *lock).await; + result +} + +async fn recompute_locked(pool: &PgPool, cfg: &IbdConfig) -> Result { + let mut rep = SuggestionReport::default(); + + // ── Load federated ancestry profiles (latest breakdown per sample) ── + let rows: Vec<(Uuid, Value, Value, Option)> = sqlx::query_as( + "SELECT DISTINCT ON (b.sample_guid) b.sample_guid, pb.components, pb.super_population_summary, pb.pca_coordinates \ + FROM core.biosample b \ + JOIN fed.population_breakdown pb ON pb.biosample_ref = b.atproto->>'uri' \ + WHERE b.atproto IS NOT NULL AND b.deleted = false \ + ORDER BY b.sample_guid, pb.time_us DESC", + ) + .fetch_all(pool) + .await?; + + let profiles: Vec = rows + .into_iter() + .map(|(guid, components, super_pop, pca)| { + let breakdown = components + .as_array() + .map(|a| { + a.iter() + .filter_map(|c| { + let pop = c.get("population").and_then(Value::as_str)?; + let pct = c.get("percentage").and_then(Value::as_f64)?; + Some((pop.to_string(), pct / 100.0)) + }) + .collect() + }) + .unwrap_or_default(); + let super_pop = super_pop.as_array().and_then(|a| { + a.iter() + .filter_map(|s| { + let name = s.get("superPopulation").and_then(Value::as_str)?; + let pct = s.get("percentage").and_then(Value::as_f64)?; + Some((name.to_string(), pct)) + }) + .max_by(|x, y| x.1.total_cmp(&y.1)) + .map(|(name, _)| name) + }); + let pca = pca.as_ref().and_then(|v| { + let arr = v.as_array()?; + Some((arr.first()?.as_f64()?, arr.get(1)?.as_f64()?)) + }); + Profile { guid, breakdown, super_pop, pca } + }) + .collect(); + rep.samples = profiles.len() as u64; + + // ── z-score PCA across the cohort so the grid is scale-free ── + let (mut m1, mut m2, mut n) = (0.0f64, 0.0f64, 0.0f64); + for p in &profiles { + if let Some((a, b)) = p.pca { + m1 += a; + m2 += b; + n += 1.0; + } + } + let (mean1, mean2) = if n > 0.0 { (m1 / n, m2 / n) } else { (0.0, 0.0) }; + let (mut v1, mut v2) = (0.0f64, 0.0f64); + for p in &profiles { + if let Some((a, b)) = p.pca { + v1 += (a - mean1).powi(2); + v2 += (b - mean2).powi(2); + } + } + let sd1 = if n > 1.0 { (v1 / n).sqrt() } else { 1.0 }.max(1e-9); + let sd2 = if n > 1.0 { (v2 / n).sqrt() } else { 1.0 }.max(1e-9); + let cell = cfg.pca_cell_sigma.max(1e-6); + let block_key = |p: &Profile| -> String { + let sp = p.super_pop.clone().unwrap_or_else(|| "?".into()); + match p.pca { + Some((a, b)) => { + let c1 = ((a - mean1) / sd1 / cell).round() as i64; + let c2 = ((b - mean2) / sd2 / cell).round() as i64; + format!("{sp}:{c1}:{c2}") + } + None => format!("{sp}:nopca"), + } + }; + + // ── Signal 1: population overlap, only within ancestry blocks ── + let mut hits: Vec = Vec::new(); + let mut overlap_pairs: Vec<(Uuid, Uuid, f64)> = Vec::new(); + let mut blocks: HashMap> = HashMap::new(); + for (i, p) in profiles.iter().enumerate() { + if !p.breakdown.is_empty() { + blocks.entry(block_key(p)).or_default().push(i); + } + } + rep.blocks = blocks.len() as u64; + for members in blocks.values() { + for (xi, &i) in members.iter().enumerate() { + for &j in &members[xi + 1..] { + let s = overlap(&profiles[i].breakdown, &profiles[j].breakdown); + if s >= cfg.min_overlap { + let (a, b) = ordered(profiles[i].guid, profiles[j].guid); + hits.push(Hit { a, b, signal: SIG_POPULATION, score: s, dna: None }); + overlap_pairs.push((a, b, s)); + rep.population_pairs += 1; + } + } + } + } + + // ── Signal 2: shared terminal Y/mt consensus haplogroup (rarer = higher) ── + let hg_rows: Vec<(Uuid, String, String)> = sqlx::query_as( + "SELECT DISTINCT ON (b.sample_guid, r.dna_type) b.sample_guid, r.dna_type, r.consensus_haplogroup \ + FROM core.biosample b \ + JOIN fed.haplogroup_reconciliation r ON r.did = b.atproto->>'repo_did' \ + WHERE b.atproto IS NOT NULL AND b.deleted = false \ + AND r.consensus_haplogroup IS NOT NULL AND r.dna_type IS NOT NULL \ + ORDER BY b.sample_guid, r.dna_type, r.run_count DESC NULLS LAST, r.time_us DESC", + ) + .fetch_all(pool) + .await?; + let total = profiles.len().max(1) as f64; + + // Tree depth of every current clade, keyed (haplogroup_type, name), via one downward + // walk from the roots. Lets the haplogroup signal weight a shared *deep* clade above a + // shared shallow macro-clade (the `depth_score` refinement, enabled by the de-novo tree). + let depth_rows: Vec<(String, String, i32)> = sqlx::query_as( + "WITH RECURSIVE walk AS ( \ + SELECT h.id, h.name, h.haplogroup_type::text AS dna, 0 AS depth \ + FROM tree.haplogroup h \ + WHERE h.valid_until IS NULL \ + AND NOT EXISTS (SELECT 1 FROM tree.haplogroup_relationship r \ + WHERE r.child_haplogroup_id = h.id AND r.valid_until IS NULL) \ + UNION ALL \ + SELECT c.id, c.name, c.haplogroup_type::text, walk.depth + 1 \ + FROM walk \ + JOIN tree.haplogroup_relationship r ON r.parent_haplogroup_id = walk.id AND r.valid_until IS NULL \ + JOIN tree.haplogroup c ON c.id = r.child_haplogroup_id AND c.valid_until IS NULL \ + WHERE walk.depth < 200) \ + SELECT dna, name, depth FROM walk", + ) + .fetch_all(pool) + .await?; + let mut depth_of: HashMap<(String, String), i32> = HashMap::new(); + for (dna, name, depth) in depth_rows { + // Keep the shallowest occurrence if a name recurs (defensive; tree is normally acyclic). + depth_of.entry((dna, name)).and_modify(|d| *d = (*d).min(depth)).or_insert(depth); + } + let half = cfg.depth_half_life.max(1.0); + + let mut by_hg: HashMap<(String, String), Vec> = HashMap::new(); + for (guid, dna, hg) in hg_rows { + by_hg.entry((dna, hg)).or_default().push(guid); + } + for ((dna, hg), members) in &by_hg { + if members.len() < 2 { + continue; + } + // The DNA arm this shared terminal belongs to (drives the IBD_Y/IBD_MT exchange purpose). + let dna_arm: &'static str = if dna == "MT_DNA" { "MT_DNA" } else { "Y_DNA" }; + // Rarer shared terminal ⇒ more informative, scaled by tree depth: a deeper shared + // clade is a much tighter relationship signal. Unknown clade ⇒ treated as shallow (3). + let rarity = (1.0 - members.len() as f64 / total).max(0.01); + let d = depth_of.get(&(dna.clone(), hg.clone())).copied().unwrap_or(3) as f64; + let depth_factor = d / (d + half); + let score = (rarity * depth_factor).max(0.01); + for (xi, &a) in members.iter().enumerate() { + for &b in &members[xi + 1..] { + let (a, b) = ordered(a, b); + hits.push(Hit { a, b, signal: SIG_HAPLOGROUP, score, dna: Some(dna_arm) }); + rep.haplogroup_pairs += 1; + } + } + } + + // ── Signal 3: shared-match — 2-hop over the *confirmed* match graph. Only + // consensus-confirmed, publicly-discoverable edges count (attestation ingest gates + // this — see `record_attestation`); a one-sided or disputed report never propagates. ── + let sm_rows: Vec<(Uuid, Uuid, i64)> = sqlx::query_as( + "WITH edges AS ( \ + SELECT sample_guid_1 AS a, sample_guid_2 AS b FROM ibd.ibd_discovery_index WHERE is_publicly_discoverable \ + UNION ALL SELECT sample_guid_2, sample_guid_1 FROM ibd.ibd_discovery_index WHERE is_publicly_discoverable) \ + SELECT e1.a, e2.a, count(*) AS shared \ + FROM edges e1 JOIN edges e2 ON e1.b = e2.b AND e1.a < e2.a \ + GROUP BY e1.a, e2.a HAVING count(*) >= $1", + ) + .bind(cfg.min_shared) + .fetch_all(pool) + .await?; + for (a, b, shared) in sm_rows { + let (a, b) = ordered(a, b); + hits.push(Hit { a, b, signal: SIG_SHARED_MATCH, score: shared as f64, dna: None }); + rep.shared_match_pairs += 1; + } + + // ── Combine per pair, rank per target, cap top-K ── + struct Combined { + score: f64, + primary: &'static str, + signals: Vec<&'static str>, + /// The shared haplogroup's DNA arm, if a HAPLOGROUP signal contributed. + hg_dna: Option<&'static str>, + } + let weight = |sig: &str| match sig { + SIG_POPULATION => cfg.w_population, + SIG_HAPLOGROUP => cfg.w_haplogroup, + _ => cfg.w_shared_match, + }; + let mut combined: HashMap<(Uuid, Uuid), Combined> = HashMap::new(); + for h in hits { + let contrib = weight(h.signal) * h.score; + let e = combined.entry((h.a, h.b)).or_insert(Combined { score: 0.0, primary: h.signal, signals: vec![], hg_dna: None }); + if !e.signals.contains(&h.signal) { + e.signals.push(h.signal); + } + if h.dna.is_some() { + e.hg_dna = h.dna; + } + // Primary = the signal with the largest single weighted contribution. + if contrib >= weight(e.primary) { + e.primary = h.signal; + } + e.score += contrib; + } + + // Directional candidate rows, grouped by target. + let mut per_target: HashMap> = HashMap::new(); + for ((a, b), c) in combined { + let mut meta = json!({ "signals": c.signals }); + if let Some(dna) = c.hg_dna { + meta.as_object_mut().unwrap().insert("hgDnaType".into(), json!(dna)); + } + per_target.entry(a).or_default().push((b, c.score, c.primary, meta.clone())); + per_target.entry(b).or_default().push((a, c.score, c.primary, meta)); + } + + // ── Declarative write: preserve curator/user decisions, refresh ACTIVE ── + let dismissed: HashSet<(Uuid, Uuid)> = sqlx::query_as::<_, (Uuid, Uuid)>( + "SELECT target_sample_guid, suggested_sample_guid FROM ibd.match_suggestion \ + WHERE status IN ('DISMISSED','CONVERTED')", + ) + .fetch_all(pool) + .await? + .into_iter() + .collect(); + + let mut tx = pool.begin().await?; + sqlx::query("DELETE FROM ibd.match_suggestion WHERE status IN ('ACTIVE','EXPIRED')") + .execute(&mut *tx) + .await?; + + for (target, mut cands) in per_target { + cands.sort_by(|x, y| y.1.total_cmp(&x.1)); + for (suggested, score, primary, meta) in cands.into_iter().take(cfg.top_k) { + if dismissed.contains(&(target, suggested)) { + continue; + } + sqlx::query( + "INSERT INTO ibd.match_suggestion \ + (target_sample_guid, suggested_sample_guid, suggestion_type, score, metadata, status, expires_at) \ + VALUES ($1, $2, $3, $4, $5, 'ACTIVE', now() + make_interval(days => $6))", + ) + .bind(target) + .bind(suggested) + .bind(primary) + .bind(score) + .bind(&meta) + .bind(cfg.ttl_days) + .execute(&mut *tx) + .await?; + rep.suggestions_written += 1; + } + } + + // Refresh the within-block overlap cache (order-independent pairs). + if !overlap_pairs.is_empty() { + let s1: Vec = overlap_pairs.iter().map(|(a, _, _)| *a).collect(); + let s2: Vec = overlap_pairs.iter().map(|(_, b, _)| *b).collect(); + let sc: Vec = overlap_pairs.iter().map(|(_, _, s)| *s).collect(); + sqlx::query( + "INSERT INTO ibd.population_overlap_score (sample_guid_1, sample_guid_2, score) \ + SELECT a, b, s FROM unnest($1::uuid[], $2::uuid[], $3::float8[]) AS t(a, b, s) \ + ON CONFLICT (LEAST(sample_guid_1, sample_guid_2), GREATEST(sample_guid_1, sample_guid_2)) \ + DO UPDATE SET score = EXCLUDED.score, computed_at = now()", + ) + .bind(&s1) + .bind(&s2) + .bind(&sc) + .execute(&mut *tx) + .await?; + } + + tx.commit().await?; + Ok(rep) +} diff --git a/rust/crates/du-db/src/lib.rs b/rust/crates/du-db/src/lib.rs new file mode 100644 index 00000000..feabbd60 --- /dev/null +++ b/rust/crates/du-db/src/lib.rs @@ -0,0 +1,96 @@ +//! Data-access layer. Owns the `PgPool` and exposes per-aggregate query modules. +//! +//! Status: scaffold. The pool + error type are wired; query modules +//! (`biosample`, `variant`, `haplogroup`, …) land as each subsystem is ported. + +/// Re-exported so downstream crates can hold a pool without depending on sqlx. +pub use sqlx::postgres::PgPool; +use sqlx::postgres::PgPoolOptions; +use std::time::Duration; +use thiserror::Error; + +pub mod age; +pub mod audit; +pub mod auth; +pub mod biosample; +pub mod change_set; +pub mod consent; +pub mod coverage; +pub mod denovo; +pub mod discovery; +pub mod exchange; +pub mod fed; +pub mod genome_region; +pub mod haplogroup; +pub mod ibd; +pub mod merge; +pub mod naming; +pub mod pagination; +pub mod pdf; +pub mod proposal; +pub mod publication; +pub mod research; +pub mod test_type; +pub mod sequencer; +pub mod study; +pub mod support; +pub mod testing; +pub mod tree_revision; +pub mod tree_sample; +pub mod variant; +pub mod ybrowse; +pub mod ystr; + +pub use pagination::Page; + +#[derive(Debug, Error)] +pub enum DbError { + #[error("database error: {0}")] + Sqlx(#[from] sqlx::Error), + #[error("migration error: {0}")] + Migrate(#[from] sqlx::migrate::MigrateError), + /// A row's text/JSONB column failed to decode into a domain type. + #[error("decode error: {0}")] + Decode(String), + /// A precondition/uniqueness conflict surfaced to the caller (e.g. promoting + /// a proposal whose name is already in the catalog). + #[error("conflict: {0}")] + Conflict(String), +} + +/// Decode a Postgres enum label (fetched as `::text`) into a domain enum that +/// derives `Deserialize` with matching SCREAMING_SNAKE_CASE variants. Keeps +/// du-domain free of any sqlx dependency. +pub(crate) fn parse_pg_enum( + label: &str, + what: &str, +) -> Result { + serde_json::from_value(serde_json::Value::String(label.to_string())) + .map_err(|e| DbError::Decode(format!("{what} = {label:?}: {e}"))) +} + +/// Inverse of `parse_pg_enum`: a domain enum's SCREAMING_SNAKE_CASE label for +/// binding against a `::text`-cast enum column. +pub(crate) fn pg_enum_label(value: &T) -> Result { + match serde_json::to_value(value).map_err(|e| DbError::Decode(e.to_string()))? { + serde_json::Value::String(s) => Ok(s), + other => Err(DbError::Decode(format!("expected enum string, got {other}"))), + } +} + +/// Connect and return a pool. `database_url` is the standard `postgres://` DSN +/// (driven by `DATABASE_URL`; see `scripts/test-db.sh` and plan §9). +pub async fn connect(database_url: &str, max_connections: u32) -> Result { + let pool = PgPoolOptions::new() + .max_connections(max_connections) + .acquire_timeout(Duration::from_secs(10)) + .connect(database_url) + .await?; + Ok(pool) +} + +/// Apply the workspace migrations (the redesigned schema) to the given pool. +pub async fn run_migrations(pool: &PgPool) -> Result<(), DbError> { + sqlx::migrate!("../../migrations").run(pool).await?; + Ok(()) +} diff --git a/rust/crates/du-db/src/merge.rs b/rust/crates/du-db/src/merge.rs new file mode 100644 index 00000000..91ad8a18 --- /dev/null +++ b/rust/crates/du-db/src/merge.rs @@ -0,0 +1,150 @@ +//! Materialize a `du_domain::merge::MergePlan` into a reviewable change set. +//! +//! Each [`MergeOp`] becomes a `tree.tree_change` row. New-node placement uses +//! the placeholder mechanism the apply engine understands: a CREATE carries its +//! negative `placeholder`, and ops attaching to it carry `*_placeholder` refs +//! (resolved to real ids during `change_set::apply`). Variant *names* from the +//! plan are resolved to `core.variant` ids here (get-or-create as UNNAMED). +//! +//! The resulting set lands in READY_FOR_REVIEW: a curator reviews/approves the +//! changes (and the algorithm's ambiguity flags) before applying. + +use crate::DbError; +use du_domain::merge::{MergeOp, MergePlan, NodeRef}; +use serde_json::{json, Map, Value}; +use sqlx::{PgPool, Postgres, Transaction}; +use std::collections::HashMap; + +pub struct Materialized { + pub change_set_id: i64, + pub change_count: i64, +} + +pub async fn materialize( + pool: &PgPool, + plan: &MergePlan, + source: &str, + dna: &str, + created_by: &str, +) -> Result { + let mut tx = pool.begin().await?; + + let cs_id: i64 = sqlx::query_scalar( + "INSERT INTO tree.change_set (source, haplogroup_type, status, description, created_by) \ + VALUES ($1, $2::core.dna_type, 'READY_FOR_REVIEW', $3, $4) RETURNING id", + ) + .bind(source) + .bind(dna) + .bind(format!("Merge from {source}: {} ambiguities", plan.ambiguities.len())) + .bind(created_by) + .fetch_one(&mut *tx) + .await?; + + let mut cache: HashMap = HashMap::new(); + let mut count: i64 = 0; + + for op in &plan.ops { + match op { + MergeOp::CreateNode { placeholder, name, parent, variants } => { + let mut ids = Vec::with_capacity(variants.len()); + for v in variants { + ids.push(get_or_create_variant(&mut tx, &mut cache, v).await?); + } + let mut nv = Map::new(); + nv.insert("name".into(), json!(name)); + nv.insert("haplogroup_type".into(), json!(dna)); + nv.insert("placeholder".into(), json!(placeholder)); + nv.insert("variant_ids".into(), json!(ids)); + nv.insert("source".into(), json!(source)); + insert_parent_ref(&mut nv, parent, "parent_haplogroup_id", "parent_placeholder"); + insert_change(&mut tx, cs_id, "CREATE", None, Value::Object(nv)).await?; + count += 1; + } + MergeOp::Reparent { node, new_parent } => { + let mut nv = Map::new(); + insert_parent_ref(&mut nv, new_parent, "new_parent_haplogroup_id", "new_parent_placeholder"); + insert_change(&mut tx, cs_id, "REPARENT", Some(*node), Value::Object(nv)).await?; + count += 1; + } + MergeOp::EditVariants { node, add, remove } => { + let mut add_ids = Vec::new(); + for v in add { + add_ids.push(get_or_create_variant(&mut tx, &mut cache, v).await?); + } + let mut remove_ids = Vec::new(); + for v in remove { + if let Some(id) = lookup_variant(&mut tx, v).await? { + remove_ids.push(id); + } + } + let nv = json!({ "add": add_ids, "remove": remove_ids }); + insert_change(&mut tx, cs_id, "VARIANT_EDIT", Some(*node), nv).await?; + count += 1; + } + // Informational: the source matched an existing node 1:1. No tree + // mutation; left out of the change set (visible in the merge stats). + MergeOp::MatchMetadata { .. } => {} + } + } + + sqlx::query("UPDATE tree.change_set SET change_count = $2 WHERE id = $1") + .bind(cs_id) + .bind(count) + .execute(&mut *tx) + .await?; + + tx.commit().await?; + Ok(Materialized { change_set_id: cs_id, change_count: count }) +} + +fn insert_parent_ref(nv: &mut Map, r: &NodeRef, id_key: &str, ph_key: &str) { + match r { + NodeRef::Existing(id) => { + nv.insert(id_key.into(), json!(id)); + } + NodeRef::New(ph) => { + nv.insert(ph_key.into(), json!(ph)); + } + NodeRef::Root => {} + } +} + +async fn insert_change( + tx: &mut Transaction<'_, Postgres>, + cs_id: i64, + change_type: &str, + haplogroup_id: Option, + new_values: Value, +) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO tree.tree_change (change_set_id, change_type, haplogroup_id, new_values) \ + VALUES ($1, $2::tree.tree_change_type, $3, $4)", + ) + .bind(cs_id) + .bind(change_type) + .bind(haplogroup_id) + .bind(new_values) + .execute(&mut **tx) + .await?; + Ok(()) +} + +async fn get_or_create_variant( + tx: &mut Transaction<'_, Postgres>, + cache: &mut HashMap, + name: &str, +) -> Result { + if let Some(&id) = cache.get(name) { + return Ok(id); + } + let id = crate::variant::ensure_base_variant_id(tx, name).await?; + cache.insert(name.to_string(), id); + Ok(id) +} + +async fn lookup_variant(tx: &mut Transaction<'_, Postgres>, name: &str) -> Result, DbError> { + Ok(sqlx::query_scalar("SELECT id FROM core.variant WHERE canonical_name = $1") + .bind(name) + .fetch_optional(&mut **tx) + .await?) +} diff --git a/rust/crates/du-db/src/naming.rs b/rust/crates/du-db/src/naming.rs new file mode 100644 index 00000000..f0699467 --- /dev/null +++ b/rust/crates/du-db/src/naming.rs @@ -0,0 +1,159 @@ +//! Variant **Naming Authority** (planning/variant-naming-authority.md). DecodingUs +//! owns the `DU` Y-variant name prefix. Variants may exist before they have an +//! official name (discovered by coordinates → `naming_status = UNNAMED`, +//! `canonical_name = NULL`). A curator works the naming queue: reuse an +//! established name where one exists, else **mint** a `DUxxxxx` identifier from +//! `core.du_variant_name_seq` and publish (`NAMED`). +//! +//! Lifecycle: `UNNAMED` → (`PENDING_REVIEW`) → `NAMED`. Minting is the only path +//! that sets a `DU` canonical name; the old working name (if any) is preserved as +//! an alias. + +use crate::{DbError, Page}; +use serde_json::Value; +use sqlx::PgPool; + +/// A variant in the naming queue (named or awaiting a name). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct NamingItem { + pub id: i64, + /// `None` for a truly-unnamed (coordinate-only) variant. + pub canonical_name: Option, + pub naming_status: String, + pub mutation_type: String, + pub coordinates: Value, + pub aliases: Value, + /// A haplogroup this variant currently defines (context), if any. + pub defining: Option, +} + +const ITEM_COLS: &str = "v.id, v.canonical_name, v.naming_status::text AS naming_status, \ + v.mutation_type::text AS mutation_type, v.coordinates, v.aliases, \ + (SELECT h.name FROM tree.haplogroup_variant hv \ + JOIN tree.haplogroup h ON h.id = hv.haplogroup_id AND h.valid_until IS NULL \ + WHERE hv.variant_id = v.id AND hv.valid_until IS NULL ORDER BY h.name LIMIT 1) AS defining"; + +/// The SQL predicate for a queue `mode`. The default **needs_name** queue is the +/// actionable set: variants with no name yet (discovery output) or explicitly +/// flagged for review — NOT the whole `UNNAMED` backlog (most imported variants +/// default to UNNAMED but already carry an established name, which the authority +/// reuses rather than re-minting). Other modes browse by raw status / all. +fn mode_predicate(mode: &str) -> &'static str { + match mode { + "NAMED" => "v.naming_status = 'NAMED'", + "PENDING_REVIEW" => "v.naming_status = 'PENDING_REVIEW'", + // The imported backlog: has a name but DU hasn't ratified it. + "UNNAMED" => "v.naming_status = 'UNNAMED' AND v.canonical_name IS NOT NULL", + "all" => "TRUE", + // needs_name (default) + _ => "(v.canonical_name IS NULL OR v.naming_status = 'PENDING_REVIEW')", + } +} + +/// Paginated naming queue. `mode` ∈ {needs_name (default), PENDING_REVIEW, NAMED, +/// UNNAMED (named-but-unratified backlog), all}. Unnamed first, then by name. +pub async fn queue( + pool: &PgPool, + mode: &str, + page: i64, + page_size: i64, +) -> Result, DbError> { + let offset = Page::<()>::offset(page, page_size); + let limit = page_size.clamp(1, 200); + let pred = mode_predicate(mode); + + let total: i64 = sqlx::query_scalar(&format!("SELECT count(*) FROM core.variant v WHERE {pred}")) + .fetch_one(pool) + .await?; + let items: Vec = sqlx::query_as(&format!( + "SELECT {ITEM_COLS} FROM core.variant v WHERE {pred} \ + ORDER BY v.canonical_name NULLS FIRST, v.id LIMIT $1 OFFSET $2" + )) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + Ok(Page { items, total, page: page.max(1), page_size: limit }) +} + +pub async fn get(pool: &PgPool, id: i64) -> Result, DbError> { + Ok(sqlx::query_as(&format!("SELECT {ITEM_COLS} FROM core.variant v WHERE v.id = $1")) + .bind(id) + .fetch_optional(pool) + .await?) +} + +/// Set a variant's naming status (e.g. flag for review or send back to unnamed). +/// Does not touch the name. Returns whether a row changed. +pub async fn set_status(pool: &PgPool, id: i64, status: &str) -> Result { + let n = sqlx::query( + "UPDATE core.variant SET naming_status = $2::core.naming_status, updated_at = now() WHERE id = $1", + ) + .bind(id) + .bind(status) + .execute(pool) + .await? + .rows_affected(); + Ok(n > 0) +} + +/// **Mint a DU name** for a variant: take the next `DUxxxxx` from the authority +/// sequence, set it as `canonical_name`, mark `NAMED`. Any prior working name is +/// preserved in `aliases.common_names`. Refuses a variant already `NAMED`. +/// Returns the minted name. +pub async fn assign_du_name(pool: &PgPool, id: i64) -> Result { + let mut tx = pool.begin().await?; + let row: Option<(Option, String)> = + sqlx::query_as("SELECT canonical_name, naming_status::text FROM core.variant WHERE id = $1 FOR UPDATE") + .bind(id) + .fetch_optional(&mut *tx) + .await?; + let (old_name, status) = row.ok_or_else(|| DbError::Conflict(format!("variant {id} not found")))?; + if status == "NAMED" { + return Err(DbError::Conflict("variant is already NAMED".into())); + } + let du: String = sqlx::query_scalar("SELECT core.next_du_name()").fetch_one(&mut *tx).await?; + + // Preserve any prior working name as a common-name alias (union, deduped). + if let Some(prev) = old_name.filter(|n| !n.trim().is_empty() && *n != du) { + sqlx::query( + "UPDATE core.variant SET aliases = jsonb_set( \ + COALESCE(aliases, '{}'::jsonb), '{common_names}', \ + (SELECT COALESCE(jsonb_agg(DISTINCT a), '[]'::jsonb) FROM ( \ + SELECT jsonb_array_elements_text(COALESCE(aliases->'common_names', '[]'::jsonb)) AS a \ + UNION SELECT $2) u), true) \ + WHERE id = $1", + ) + .bind(id) + .bind(&prev) + .execute(&mut *tx) + .await?; + } + sqlx::query( + "UPDATE core.variant SET canonical_name = $2, naming_status = 'NAMED', updated_at = now() WHERE id = $1", + ) + .bind(id) + .bind(&du) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(du) +} + +/// **Dedup check** before minting: other *named* variants sharing this variant's +/// GRCh38 coordinate (contig + position). A non-empty result means an +/// established name likely already exists — reuse it instead of minting. +pub async fn dedup_by_coordinates(pool: &PgPool, id: i64) -> Result, DbError> { + Ok(sqlx::query_as( + "WITH me AS (SELECT coordinates->'GRCh38'->>'contig' AS c, coordinates->'GRCh38'->>'position' AS p \ + FROM core.variant WHERE id = $1) \ + SELECT v.id, v.canonical_name FROM core.variant v, me \ + WHERE v.id <> $1 AND v.canonical_name IS NOT NULL AND me.c IS NOT NULL AND me.p IS NOT NULL \ + AND v.coordinates->'GRCh38'->>'contig' = me.c \ + AND v.coordinates->'GRCh38'->>'position' = me.p \ + ORDER BY v.canonical_name LIMIT 10", + ) + .bind(id) + .fetch_all(pool) + .await?) +} diff --git a/rust/crates/du-db/src/pagination.rs b/rust/crates/du-db/src/pagination.rs new file mode 100644 index 00000000..d24d8890 --- /dev/null +++ b/rust/crates/du-db/src/pagination.rs @@ -0,0 +1,27 @@ +//! Pagination helper shared by list/search queries. + +use serde::Serialize; + +/// A page of results plus the totals the UI needs to render pagination controls. +#[derive(Debug, Clone, Serialize)] +pub struct Page { + pub items: Vec, + pub total: i64, + pub page: i64, + pub page_size: i64, +} + +impl Page { + pub fn total_pages(&self) -> i64 { + if self.page_size <= 0 { + 0 + } else { + (self.total + self.page_size - 1) / self.page_size + } + } + + /// Clamp page/page_size to sane bounds and return the SQL OFFSET. + pub fn offset(page: i64, page_size: i64) -> i64 { + (page.max(1) - 1) * page_size.clamp(1, 200) + } +} diff --git a/rust/crates/du-db/src/pdf.rs b/rust/crates/du-db/src/pdf.rs new file mode 100644 index 00000000..a8cd9d63 --- /dev/null +++ b/rust/crates/du-db/src/pdf.rs @@ -0,0 +1,332 @@ +//! Age probability-distribution machinery for the McDonald (2021) branch-age +//! model — the foundation the SNP, STR, and historical terms all build on. +//! +//! An age estimate is a PDF over time-before-present, `P(t|e)`. The paper combines +//! independent evidence by multiplying PDFs (Eq 1: `P(t|e)=k·∏P(t|eᵢ)`), derives a +//! parent clade's age by convolving a child's age with the parent→child branch +//! time (Eq 7), and reverses that to push a constraint down a branch (Eq 9). We +//! represent each PDF as probability **mass per fixed-width time bin** so those +//! operations are exact discrete arithmetic. +//! +//! This module is pure (no DB, no I/O) and is the replacement substrate for the +//! inverse-variance Gaussian shortcut in [`crate::age::combine`]. + +/// Grid resolution — years per bin (design default `pdf-resolution`). +pub const RESOLUTION_YEARS: f64 = 10.0; +/// Grid extent — oldest age modelled (design default `pdf-max-age`). +pub const MAX_AGE_YEARS: f64 = 100_000.0; + +/// A discrete probability distribution over age (years before present). `mass[i]` +/// is the probability the age lies in bin `i`, i.e. around `i * res` years; the +/// masses are kept normalized to sum 1 (a degenerate all-zero PDF is allowed and +/// reports zero for every statistic). +#[derive(Debug, Clone)] +pub struct Pdf { + res: f64, + mass: Vec, +} + +impl Pdf { + fn zeros(res: f64, max_age: f64) -> Pdf { + let bins = (max_age / res).round() as usize + 1; + Pdf { res, mass: vec![0.0; bins] } + } + + /// Years at the centre of bin `i`. + #[inline] + fn age(&self, i: usize) -> f64 { + i as f64 * self.res + } + + fn bin_of(&self, years: f64) -> usize { + ((years / self.res).round() as usize).min(self.mass.len() - 1) + } + + /// `P(t|m)` for `m` SNPs over `b` callable bp at rate `mu` (Eq 3: + /// `Poisson(m, t·b·µ)` read as a function of `t`). As a density in `t` this is + /// `∝ (t·b·µ)^m · exp(−t·b·µ)`, i.e. a Gamma with mode `m/(b·µ)` and mean + /// `(m+1)/(b·µ)` — so `1/(b·µ)` is the per-SNP temporal resolution (~83 yr for + /// a 15 Mbp test). Uses the default grid. + pub fn poisson(m: i64, b: f64, mu: f64) -> Pdf { + Pdf::poisson_on(m, b, mu, RESOLUTION_YEARS, MAX_AGE_YEARS) + } + + pub fn poisson_on(m: i64, b: f64, mu: f64, res: f64, max_age: f64) -> Pdf { + let mut pdf = Pdf::zeros(res, max_age); + let m = m.max(0) as f64; + for i in 0..pdf.mass.len() { + let lambda = pdf.age(i) * b * mu; // t·b·µ + // log-space: m·ln(λ) − λ (the 1/m! constant drops out in normalization). + pdf.mass[i] = if lambda <= 0.0 { + if m == 0.0 { + 1.0 + } else { + 0.0 + } + } else { + (m * lambda.ln() - lambda).exp() + }; + } + pdf.normalize(); + pdf + } + + /// A Gaussian age PDF — for genealogical/aDNA anchors with a date ± sigma, or + /// for applying mutation-rate uncertainty as a broadening. Uses the default grid. + pub fn gaussian(mean_years: f64, sigma_years: f64) -> Pdf { + Pdf::gaussian_on(mean_years, sigma_years, RESOLUTION_YEARS, MAX_AGE_YEARS) + } + + pub fn gaussian_on(mean_years: f64, sigma_years: f64, res: f64, max_age: f64) -> Pdf { + let mut pdf = Pdf::zeros(res, max_age); + let s = sigma_years.max(pdf.res / 2.0); + for i in 0..pdf.mass.len() { + let z = (pdf.age(i) - mean_years) / s; + pdf.mass[i] = (-0.5 * z * z).exp(); + } + pdf.normalize(); + pdf + } + + /// A near-delta PDF at a precisely known age (e.g. a proven MRCA birth year). + pub fn point(years: f64) -> Pdf { + let mut pdf = Pdf::zeros(RESOLUTION_YEARS, MAX_AGE_YEARS); + let i = pdf.bin_of(years.max(0.0)); + pdf.mass[i] = 1.0; + pdf + } + + /// Weighted mixture `Σ wᵢ·pdfᵢ`, renormalized — used for the STR marker age + /// `P(t|g) = Σ_m P(g|m)·P(t|m)` (McDonald Eq 14, inner sum over the hidden + /// mutation count `m`). Components must share a grid; non-positive weights are + /// skipped. `None` if nothing contributes. + pub fn mixture(components: &[(f64, Pdf)]) -> Option { + let res = components.iter().find(|(w, _)| *w > 0.0).map(|(_, p)| p.res)?; + let len = components.iter().map(|(_, p)| p.mass.len()).max().unwrap_or(0); + let mut out = Pdf { res, mass: vec![0.0; len] }; + for (w, p) in components { + if *w <= 0.0 { + continue; + } + debug_assert_eq!(p.res, res, "mixture PDFs must share a grid resolution"); + for (i, &m) in p.mass.iter().enumerate() { + out.mass[i] += w * m; + } + } + (out.total() > 0.0).then(|| { + out.normalize(); + out + }) + } + + /// Combine independent evidence (Eq 1): pointwise product, renormalized. + pub fn multiply(&self, other: &Pdf) -> Pdf { + debug_assert_eq!(self.res, other.res, "PDFs must share a grid resolution"); + let n = self.mass.len().min(other.mass.len()); + let mut out = Pdf { res: self.res, mass: vec![0.0; self.mass.len()] }; + for i in 0..n { + out.mass[i] = self.mass[i] * other.mass[i]; + } + out.normalize(); + out + } + + /// Distribution of the sum of two ages (Eq 7: parent age = child age ⊛ + /// parent→child branch time). Mass that would land beyond the grid is dropped. + pub fn convolve(&self, other: &Pdf) -> Pdf { + debug_assert_eq!(self.res, other.res, "PDFs must share a grid resolution"); + let mut out = Pdf { res: self.res, mass: vec![0.0; self.mass.len()] }; + let last = out.mass.len() - 1; + let (alo, ahi) = self.support(); + let (blo, bhi) = other.support(); + for i in alo..=ahi { + let a = self.mass[i]; + if a == 0.0 { + continue; + } + for j in blo..=bhi { + let k = i + j; + if k > last { + break; // j only increases → rest also overflow + } + out.mass[k] += a * other.mass[j]; + } + } + out.normalize(); + out + } + + /// Distribution of the difference of two ages, `self − other`, with negative + /// outcomes dropped (Eq 9/10: derive a child's age from its parent's, given the + /// branch time — the parent must be older, so `P(t<0)=0`). + pub fn convolve_sub(&self, other: &Pdf) -> Pdf { + debug_assert_eq!(self.res, other.res, "PDFs must share a grid resolution"); + let mut out = Pdf { res: self.res, mass: vec![0.0; self.mass.len()] }; + let (alo, ahi) = self.support(); + let (blo, bhi) = other.support(); + for i in alo..=ahi { + let a = self.mass[i]; + if a == 0.0 { + continue; + } + for j in blo..=bhi { + if j > i { + break; // i−j < 0 → dropped; j only grows + } + out.mass[i - j] += a * other.mass[j]; + } + } + out.normalize(); + out + } + + /// First and last bin carrying non-negligible mass (keeps convolution near the + /// support rather than O(n²) over the whole grid). + fn support(&self) -> (usize, usize) { + let lo = self.mass.iter().position(|&m| m > 0.0).unwrap_or(0); + let hi = self.mass.iter().rposition(|&m| m > 0.0).unwrap_or(0); + (lo, hi) + } + + fn normalize(&mut self) { + let total: f64 = self.mass.iter().sum(); + if total > 0.0 { + for m in &mut self.mass { + *m /= total; + } + } + } + + /// Total mass — 1.0 for a valid PDF, 0.0 for a degenerate (empty) one. + pub fn total(&self) -> f64 { + self.mass.iter().sum() + } + + /// Expected age (years). + pub fn mean(&self) -> f64 { + self.mass.iter().enumerate().map(|(i, &m)| self.age(i) * m).sum() + } + + /// Age (years) at the most probable bin. + pub fn mode(&self) -> f64 { + let mut best = (0usize, 0.0f64); + for (i, &m) in self.mass.iter().enumerate() { + if m > best.1 { + best = (i, m); + } + } + self.age(best.0) + } + + /// The `p`-quantile (0–1) by linear interpolation across the crossing bin. + pub fn percentile(&self, p: f64) -> f64 { + if self.total() <= 0.0 { + return 0.0; + } + let target = p.clamp(0.0, 1.0); + let mut cum = 0.0; + for (i, &m) in self.mass.iter().enumerate() { + let next = cum + m; + if next >= target { + // Interpolate within bin i for a smoother quantile. + let frac = if m > 0.0 { (target - cum) / m } else { 0.0 }; + return (i as f64 - 0.5 + frac).max(0.0) * self.res; + } + cum = next; + } + self.age(self.mass.len() - 1) + } + + pub fn median(&self) -> f64 { + self.percentile(0.5) + } + + /// `(median, 2.5th percentile, 97.5th percentile)` — the central estimate and + /// its 95% credible interval. + pub fn ci95(&self) -> (f64, f64, f64) { + (self.median(), self.percentile(0.025), self.percentile(0.975)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn approx(a: f64, b: f64, tol: f64) -> bool { + (a - b).abs() <= tol + } + + /// P(t|m) is Gamma(shape m+1, rate b·µ) in t: mode m/(b·µ), mean (m+1)/(b·µ). + /// Pick b·µ = 0.01 (1/bµ = 100 yr/SNP) for round numbers. + #[test] + fn poisson_mode_and_mean_match_gamma() { + let (b, mu) = (1.25e7, 8e-10); // b·µ = 0.01 + let pdf = Pdf::poisson(3, b, mu); + assert!(approx(pdf.total(), 1.0, 1e-9)); + assert!(approx(pdf.mode(), 300.0, RESOLUTION_YEARS), "mode {}", pdf.mode()); + assert!(approx(pdf.mean(), 400.0, 2.0), "mean {}", pdf.mean()); + } + + /// 1/(b·µ) ≈ 83 yr/SNP for a ~15 Mbp test (paper's quoted resolution): m=1 mode. + #[test] + fn per_snp_resolution_is_about_83_years() { + let pdf = Pdf::poisson(1, 1.5e7, 8.33e-10); // 1/bµ ≈ 80 + assert!(approx(pdf.mode(), 80.0, RESOLUTION_YEARS), "mode {}", pdf.mode()); + } + + #[test] + fn gaussian_median_and_ci() { + let pdf = Pdf::gaussian(3000.0, 300.0); + let (med, lo, hi) = pdf.ci95(); + assert!(approx(med, 3000.0, RESOLUTION_YEARS)); + assert!(approx(lo, 3000.0 - 1.96 * 300.0, 15.0), "lo {lo}"); + assert!(approx(hi, 3000.0 + 1.96 * 300.0, 15.0), "hi {hi}"); + } + + /// Multiplying two Gaussians = inverse-variance combine (the old behaviour), + /// now as a proper PDF: mean pulled between, CI tighter than either input. + #[test] + fn multiply_is_inverse_variance_combine() { + let a = Pdf::gaussian(3000.0, 300.0); + let b = Pdf::gaussian(3300.0, 300.0); + let m = a.multiply(&b); + assert!(approx(m.mean(), 3150.0, 5.0), "mean {}", m.mean()); + let (_, lo, hi) = m.ci95(); + assert!(hi - lo < 2.0 * 1.96 * 300.0, "combined CI should tighten"); + // A much tighter estimate dominates. + let tight = Pdf::gaussian(3000.0, 50.0).multiply(&Pdf::gaussian(5000.0, 1000.0)); + assert!(tight.mean() < 3100.0, "tight estimate dominates, got {}", tight.mean()); + } + + #[test] + fn multiply_disjoint_is_empty() { + // Non-overlapping terms produce zero mass — the signal the combined-age step + // (du_db::age) uses to fall back from the PDF product to a Gaussian combine. + let disjoint = Pdf::gaussian(1000.0, 50.0).multiply(&Pdf::gaussian(50_000.0, 50.0)); + assert_eq!(disjoint.total(), 0.0); + } + + #[test] + fn convolve_adds_ages() { + // Two point ages add. + let s = Pdf::point(1200.0).convolve(&Pdf::point(800.0)); + assert!(approx(s.mean(), 2000.0, RESOLUTION_YEARS)); + // Two Gaussians (means kept clear of the 0 floor so neither tail is + // truncated): means add, variances add (sigma = √(σ₁²+σ₂²)). + let g = Pdf::gaussian(2000.0, 300.0).convolve(&Pdf::gaussian(1500.0, 400.0)); + assert!(approx(g.mean(), 3500.0, 5.0), "mean {}", g.mean()); + let (_, lo, hi) = g.ci95(); + let sigma = (hi - lo) / (2.0 * 1.96); + assert!(approx(sigma, 500.0, 20.0), "sigma {sigma}"); // √(300²+400²)=500 + } + + #[test] + fn convolve_sub_subtracts_and_floors_at_zero() { + // Parent 2000 minus branch 800 → child 1200. + let c = Pdf::point(2000.0).convolve_sub(&Pdf::point(800.0)); + assert!(approx(c.mean(), 1200.0, RESOLUTION_YEARS), "mean {}", c.mean()); + // A larger subtrahend than minuend → all mass floored away (P(t<0)=0). + let z = Pdf::point(500.0).convolve_sub(&Pdf::point(900.0)); + assert!(approx(z.mean(), 0.0, RESOLUTION_YEARS), "mean {}", z.mean()); + } +} diff --git a/rust/crates/du-db/src/proposal.rs b/rust/crates/du-db/src/proposal.rs new file mode 100644 index 00000000..4b52c412 --- /dev/null +++ b/rust/crates/du-db/src/proposal.rs @@ -0,0 +1,380 @@ +//! Curation proposals: Navigator submits variant/branch proposals; the AppView +//! pools them by (name, parent) across submitters, and curators review/name/ +//! promote. Backed by `tree.proposed_branch` (+ evidence) and `tree.curator_action`. +//! +//! AppView-only (not in the shared `du-domain`): proposals are a server concern. + +use crate::{DbError, Page}; +use serde_json::Value; +use sqlx::PgPool; +use uuid::Uuid; + +/// A proposal as submitted by Navigator. Defining-variant detail rides in +/// `evidence` (JSONB) — intake does NOT mutate the catalog; that happens at +/// promotion when a curator names/creates the variants. +pub struct SubmitProposal { + pub proposed_name: String, + /// Parent haplogroup name (resolved to an id if it exists). + pub parent_haplogroup: Option, + pub dna_type: du_domain::enums::DnaType, + /// The submitting sample's GUID (for distinct-submitter consensus), if any. + pub sample_guid: Option, + pub proposed_by: Option, + /// Free-form evidence (candidate variants, positions, scores, …). + pub evidence: Value, +} + +#[derive(sqlx::FromRow)] +pub struct ProposalSummary { + pub id: i64, + pub proposed_name: Option, + pub parent_name: Option, + pub dna_type: Option, + pub evidence_count: i32, + pub submitter_count: i32, + pub confidence: Option, + pub status: String, +} + +/// A proposed branch's defining variant (with cross-submitter support count). +#[derive(sqlx::FromRow)] +pub struct DefiningVariantView { + pub name: Option, + pub supporting_sample_count: i32, +} + +pub struct ProposalDetail { + pub summary: ProposalSummary, + pub variants: Vec, + pub evidence: Vec, +} + +/// Filter for [`list`]. All fields optional (None = unfiltered). +#[derive(Default)] +pub struct ProposalFilter<'a> { + pub status: Option<&'a str>, + pub dna_type: Option<&'a str>, + pub parent: Option<&'a str>, + pub min_consensus: Option, +} + +/// Submit a proposal, pooling into an existing open proposal with the same +/// (proposed_name, parent). Returns (proposal_id, created_new). +pub async fn submit(pool: &PgPool, p: &SubmitProposal) -> Result<(i64, bool), DbError> { + let mut tx = pool.begin().await?; + + // Resolve parent haplogroup id (by name + lineage), if a parent was given. + let parent_id: Option = match &p.parent_haplogroup { + Some(name) => { + sqlx::query_scalar( + "SELECT id FROM tree.haplogroup WHERE name = $1 AND haplogroup_type::text = $2", + ) + .bind(name) + .bind(crate::pg_enum_label(&p.dna_type)?) + .fetch_optional(&mut *tx) + .await? + } + None => None, + }; + + // Find an open proposal to pool into. + let existing: Option = sqlx::query_scalar( + "SELECT id FROM tree.proposed_branch \ + WHERE proposed_name = $1 AND parent_haplogroup_id IS NOT DISTINCT FROM $2 \ + AND status IN ('PROPOSED','UNDER_REVIEW') LIMIT 1", + ) + .bind(&p.proposed_name) + .bind(parent_id) + .fetch_optional(&mut *tx) + .await?; + + let (id, created) = match existing { + Some(id) => { + sqlx::query( + "UPDATE tree.proposed_branch SET evidence_count = evidence_count + 1, \ + confidence = LEAST(0.99, (evidence_count + 1) * 0.1), \ + discovery_sample_guids = CASE \ + WHEN $2::uuid IS NULL OR $2 = ANY(discovery_sample_guids) THEN discovery_sample_guids \ + ELSE array_append(discovery_sample_guids, $2) END \ + WHERE id = $1", + ) + .bind(id) + .bind(p.sample_guid) + .execute(&mut *tx) + .await?; + (id, false) + } + None => { + let guids: Vec = p.sample_guid.into_iter().collect(); + let id: i64 = sqlx::query_scalar( + "INSERT INTO tree.proposed_branch \ + (proposed_name, parent_haplogroup_id, discovery_sample_guids, evidence_count, confidence, proposed_by, status) \ + VALUES ($1, $2, $3, 1, 0.1, $4, 'PROPOSED') RETURNING id", + ) + .bind(&p.proposed_name) + .bind(parent_id) + .bind(&guids) + .bind(&p.proposed_by) + .fetch_one(&mut *tx) + .await?; + (id, true) + } + }; + + sqlx::query( + "INSERT INTO tree.proposed_branch_evidence (proposed_branch_id, evidence_type, evidence_detail) \ + VALUES ($1, 'PRIVATE_VARIANT', $2)", + ) + .bind(id) + .bind(&p.evidence) + .execute(&mut *tx) + .await?; + + tx.commit().await?; + Ok((id, created)) +} + +const SUMMARY_SELECT: &str = "SELECT pb.id, pb.proposed_name, h.name AS parent_name, \ + pb.haplogroup_type::text AS dna_type, \ + pb.evidence_count, cardinality(pb.discovery_sample_guids) AS submitter_count, \ + pb.confidence::float8 AS confidence, pb.status \ + FROM tree.proposed_branch pb LEFT JOIN tree.haplogroup h ON h.id = pb.parent_haplogroup_id"; + +/// Predicates shared by the list count + page queries (binds $1..$4). +const LIST_FILTER: &str = "WHERE ($1::text IS NULL OR pb.status = $1) \ + AND ($2::text IS NULL OR pb.haplogroup_type::text = $2) \ + AND ($3::text IS NULL OR h.name = $3) \ + AND ($4::bigint IS NULL OR cardinality(pb.discovery_sample_guids) >= $4)"; + +/// List proposals matching `filter`, newest first. +pub async fn list( + pool: &PgPool, + filter: &ProposalFilter<'_>, + page: i64, + page_size: i64, +) -> Result, DbError> { + let offset = Page::<()>::offset(page, page_size); + let limit = page_size.clamp(1, 200); + + let total: i64 = sqlx::query_scalar(&format!( + "SELECT count(*) FROM tree.proposed_branch pb \ + LEFT JOIN tree.haplogroup h ON h.id = pb.parent_haplogroup_id {LIST_FILTER}" + )) + .bind(filter.status) + .bind(filter.dna_type) + .bind(filter.parent) + .bind(filter.min_consensus) + .fetch_one(pool) + .await?; + let items: Vec = sqlx::query_as(&format!( + "{SUMMARY_SELECT} {LIST_FILTER} ORDER BY pb.id DESC LIMIT $5 OFFSET $6" + )) + .bind(filter.status) + .bind(filter.dna_type) + .bind(filter.parent) + .bind(filter.min_consensus) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + Ok(Page { items, total, page: page.max(1), page_size: limit }) +} + +pub async fn get(pool: &PgPool, id: i64) -> Result, DbError> { + let summary: Option = + sqlx::query_as(&format!("{SUMMARY_SELECT} WHERE pb.id = $1")) + .bind(id) + .fetch_optional(pool) + .await?; + let Some(summary) = summary else { return Ok(None) }; + let variants: Vec = sqlx::query_as( + "SELECT v.canonical_name AS name, pbv.supporting_sample_count \ + FROM tree.proposed_branch_variant pbv JOIN core.variant v ON v.id = pbv.variant_id \ + WHERE pbv.proposed_branch_id = $1 ORDER BY pbv.supporting_sample_count DESC, v.canonical_name", + ) + .bind(id) + .fetch_all(pool) + .await?; + let evidence: Vec = sqlx::query_scalar( + "SELECT evidence_detail FROM tree.proposed_branch_evidence WHERE proposed_branch_id = $1 ORDER BY id", + ) + .bind(id) + .fetch_all(pool) + .await?; + Ok(Some(ProposalDetail { summary, variants, evidence })) +} + +/// A defining variant extracted from proposal evidence. +struct DefiningVariant { + name: String, + position: Option, + reference: Option, + alternate: Option, +} + +/// Pull distinct defining variants from the proposal's evidence JSONB. Each +/// evidence object may carry `variant` (name) plus optional `pos`/`ref`/`alt`. +fn defining_variants(evidence: &[Value]) -> Vec { + let mut seen = std::collections::BTreeMap::new(); + for e in evidence { + if let Some(name) = e.get("variant").and_then(Value::as_str) { + seen.entry(name.to_string()).or_insert_with(|| DefiningVariant { + name: name.to_string(), + position: e.get("pos").and_then(Value::as_i64), + reference: e.get("ref").and_then(Value::as_str).map(str::to_string), + alternate: e.get("alt").and_then(Value::as_str).map(str::to_string), + }); + } + } + seen.into_values().collect() +} + +/// Promote an ACCEPTED proposal into the named catalog: create the +/// `tree.haplogroup` branch under its parent, a current relationship edge, and +/// `core.variant` links from the evidence's defining variants. Sets the +/// proposal status to `PROMOTED` and records a `PROMOTE` curator action. +/// Returns the new haplogroup id. All in one transaction. +pub async fn promote(pool: &PgPool, id: i64, action_by: &str) -> Result { + // Load proposal + evidence first (read-only). + let detail = get(pool, id).await?.ok_or_else(|| DbError::Conflict(format!("proposal {id} not found")))?; + if detail.summary.status != "ACCEPTED" { + return Err(DbError::Conflict(format!("proposal must be ACCEPTED to promote (is {})", detail.summary.status))); + } + let name = detail.summary.proposed_name.clone().filter(|n| !n.is_empty()) + .ok_or_else(|| DbError::Conflict("proposal has no proposed_name".into()))?; + + let mut tx = pool.begin().await?; + + // Parent (and its lineage) is required to place the branch. + let parent: Option<(i64, String)> = sqlx::query_as( + "SELECT h.id, h.haplogroup_type::text FROM tree.proposed_branch pb \ + JOIN tree.haplogroup h ON h.id = pb.parent_haplogroup_id WHERE pb.id = $1", + ) + .bind(id) + .fetch_optional(&mut *tx) + .await?; + let (parent_id, dna) = parent.ok_or_else(|| DbError::Conflict("proposal has no parent haplogroup to attach under".into()))?; + + // Name must not already exist for this lineage. + let exists: Option = sqlx::query_scalar( + "SELECT id FROM tree.haplogroup WHERE name = $1 AND haplogroup_type::text = $2", + ) + .bind(&name) + .bind(&dna) + .fetch_optional(&mut *tx) + .await?; + if exists.is_some() { + return Err(DbError::Conflict(format!("'{name}' is already in the {dna} catalog"))); + } + + // Create the branch. + let new_id: i64 = sqlx::query_scalar( + "INSERT INTO tree.haplogroup (name, haplogroup_type, source, confidence_level) \ + VALUES ($1, $2::core.dna_type, 'discovery', 'proposed') RETURNING id", + ) + .bind(&name) + .bind(&dna) + .fetch_one(&mut *tx) + .await?; + + // Edge under the parent. + sqlx::query( + "INSERT INTO tree.haplogroup_relationship (child_haplogroup_id, parent_haplogroup_id, source) \ + VALUES ($1, $2, 'discovery')", + ) + .bind(new_id) + .bind(parent_id) + .execute(&mut *tx) + .await?; + + // Defining variants: get-or-create by name (promote UNNAMED -> NAMED), link. + for dv in defining_variants(&detail.evidence) { + let coords = match dv.position { + Some(pos) => serde_json::json!({ "GRCh38": { + "contig": "chrY", "position": pos, + "ancestral": dv.reference, "derived": dv.alternate + }}), + None => serde_json::json!({}), + }; + let variant_id: i64 = sqlx::query_scalar( + "INSERT INTO core.variant (canonical_name, mutation_type, naming_status, coordinates) \ + VALUES ($1, 'SNP'::core.mutation_type, 'NAMED'::core.naming_status, $2) \ + ON CONFLICT (canonical_name, COALESCE(defining_haplogroup_id, -1)) WHERE canonical_name IS NOT NULL DO UPDATE SET naming_status = \ + CASE WHEN core.variant.naming_status = 'UNNAMED' THEN 'NAMED'::core.naming_status \ + ELSE core.variant.naming_status END \ + RETURNING id", + ) + .bind(&dv.name) + .bind(coords) + .fetch_one(&mut *tx) + .await?; + sqlx::query("INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES ($1, $2)") + .bind(new_id) + .bind(variant_id) + .execute(&mut *tx) + .await?; + } + + sqlx::query("UPDATE tree.proposed_branch SET status = 'PROMOTED' WHERE id = $1") + .bind(id) + .execute(&mut *tx) + .await?; + + // Reassign the contributing samples to the new terminal (and freeze their + // private variants) atomically with the branch creation. + let reassigned = crate::discovery::reassign_after_promote(&mut tx, id, new_id).await?; + + sqlx::query( + "INSERT INTO tree.curator_action (proposed_branch_id, action, notes, action_by) \ + VALUES ($1, 'PROMOTE', $2, $3)", + ) + .bind(id) + .bind(format!("promoted to haplogroup #{new_id} ({reassigned} samples reassigned)")) + .bind(action_by) + .execute(&mut *tx) + .await?; + + tx.commit().await?; + Ok(new_id) +} + +/// Curator decision. `action` is APPROVE / REJECT / DEFER; sets the proposal +/// status (ACCEPTED / REJECTED / UNDER_REVIEW) and records a curator_action. +/// (Catalog promotion — creating the named branch/variants — is a separate step.) +pub async fn review( + pool: &PgPool, + id: i64, + action: &str, + action_by: &str, + notes: Option<&str>, +) -> Result { + let status = match action { + "APPROVE" => "ACCEPTED", + "REJECT" => "REJECTED", + "DEFER" => "UNDER_REVIEW", + other => return Err(DbError::Decode(format!("unknown review action: {other}"))), + }; + let mut tx = pool.begin().await?; + let affected = sqlx::query("UPDATE tree.proposed_branch SET status = $2 WHERE id = $1") + .bind(id) + .bind(status) + .execute(&mut *tx) + .await? + .rows_affected(); + if affected == 0 { + tx.rollback().await?; + return Ok(false); + } + sqlx::query( + "INSERT INTO tree.curator_action (proposed_branch_id, action, notes, action_by) \ + VALUES ($1, $2, $3, $4)", + ) + .bind(id) + .bind(action) + .bind(notes) + .bind(action_by) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(true) +} diff --git a/rust/crates/du-db/src/publication.rs b/rust/crates/du-db/src/publication.rs new file mode 100644 index 00000000..f40185fb --- /dev/null +++ b/rust/crates/du-db/src/publication.rs @@ -0,0 +1,379 @@ +//! Queries for `pubs.publication` (the references listing) + publication jobs +//! (OpenAlex enrichment, discovery candidates). + +use crate::{DbError, Page}; +use chrono::NaiveDate; +use du_domain::ids::PublicationId; +use du_domain::publication::Publication; +use sqlx::types::chrono::{DateTime, Utc}; +use sqlx::PgPool; +use uuid::Uuid; + +/// Fields the OpenAlex enrichment job updates (COALESCE'd — nulls don't wipe). +#[derive(Debug, Default, Clone)] +pub struct OpenAlexUpdate { + pub openalex_id: Option, + pub journal: Option, + pub publication_date: Option, + pub cited_by_count: Option, + pub open_access_status: Option, + pub abstract_summary: Option, +} + +/// All publications that have a DOI (the enrichment job's work-list). +pub async fn dois(pool: &PgPool) -> Result, DbError> { + let rows: Vec<(i64, String)> = + sqlx::query_as("SELECT id, doi FROM pubs.publication WHERE doi IS NOT NULL ORDER BY id") + .fetch_all(pool) + .await?; + Ok(rows.into_iter().map(|(id, doi)| (PublicationId(id), doi)).collect()) +} + +/// Apply OpenAlex enrichment (only overwrites a column when the new value is set). +pub async fn update_openalex(pool: &PgPool, id: PublicationId, u: &OpenAlexUpdate) -> Result { + let affected = sqlx::query( + "UPDATE pubs.publication SET \ + open_alex_id = COALESCE($2, open_alex_id), \ + journal = COALESCE($3, journal), \ + publication_date = COALESCE($4, publication_date), \ + cited_by_count = COALESCE($5, cited_by_count), \ + open_access_status = COALESCE($6, open_access_status), \ + abstract_summary = COALESCE($7, abstract_summary), \ + updated_at = now() \ + WHERE id = $1", + ) + .bind(id.0) + .bind(&u.openalex_id) + .bind(&u.journal) + .bind(u.publication_date) + .bind(u.cited_by_count) + .bind(&u.open_access_status) + .bind(&u.abstract_summary) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} + +/// Fields the PubMed (NCBI) enrichment job fills. Gap-fill semantics: only +/// populates an empty column (never overwrites curated/OpenAlex values). +#[derive(Debug, Default, Clone)] +pub struct PubMedUpdate { + pub journal: Option, + pub publication_date: Option, + pub authors: Option, + pub doi: Option, +} + +/// Publications that have a PMID but still lack journal/authors/date/doi — +/// the PubMed enrichment job's work-list, oldest first, capped at `limit`. +pub async fn pmids_needing_enrichment( + pool: &PgPool, + limit: i64, +) -> Result, DbError> { + let rows: Vec<(i64, String)> = sqlx::query_as( + "SELECT id, pubmed_id FROM pubs.publication \ + WHERE pubmed_id IS NOT NULL \ + AND (journal IS NULL OR authors IS NULL OR publication_date IS NULL OR doi IS NULL) \ + ORDER BY id LIMIT $1", + ) + .bind(limit.clamp(1, 500)) + .fetch_all(pool) + .await?; + Ok(rows.into_iter().map(|(id, p)| (PublicationId(id), p)).collect()) +} + +/// Apply PubMed enrichment, filling only empty columns. The DOI is set only when +/// the row has none AND the value isn't already taken (DOI is UNIQUE — this avoids +/// a constraint violation aborting the batch). +pub async fn update_pubmed(pool: &PgPool, id: PublicationId, u: &PubMedUpdate) -> Result { + let affected = sqlx::query( + "UPDATE pubs.publication SET \ + journal = COALESCE(journal, $2), \ + publication_date = COALESCE(publication_date, $3), \ + authors = COALESCE(authors, $4), \ + doi = CASE WHEN doi IS NULL AND $5 IS NOT NULL \ + AND NOT EXISTS (SELECT 1 FROM pubs.publication p2 WHERE p2.doi = $5 AND p2.id <> $1) \ + THEN $5 ELSE doi END, \ + updated_at = now() \ + WHERE id = $1", + ) + .bind(id.0) + .bind(&u.journal) + .bind(u.publication_date) + .bind(&u.authors) + .bind(&u.doi) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} + +/// Enabled discovery search queries. +pub async fn enabled_search_configs(pool: &PgPool) -> Result, DbError> { + let rows: Vec = sqlx::query_scalar( + "SELECT search_query FROM pubs.publication_search_config WHERE enabled = true AND search_query IS NOT NULL", + ) + .fetch_all(pool) + .await?; + Ok(rows) +} + +/// Upsert a discovery candidate by OpenAlex id (preserves curator status/review). +#[allow(clippy::too_many_arguments)] +pub async fn upsert_candidate( + pool: &PgPool, + openalex_id: &str, + doi: Option<&str>, + title: Option<&str>, + abstract_summary: Option<&str>, + publication_date: Option, + journal_name: Option<&str>, +) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO pubs.publication_candidate \ + (openalex_id, doi, title, abstract, publication_date, journal_name, status) \ + VALUES ($1, $2, $3, $4, $5, $6, 'pending') \ + ON CONFLICT (openalex_id) DO UPDATE SET doi = EXCLUDED.doi, title = EXCLUDED.title, \ + abstract = EXCLUDED.abstract, publication_date = EXCLUDED.publication_date, \ + journal_name = EXCLUDED.journal_name", + ) + .bind(openalex_id) + .bind(doi) + .bind(title) + .bind(abstract_summary) + .bind(publication_date) + .bind(journal_name) + .execute(pool) + .await?; + Ok(()) +} + +/// Whether a publication with this DOI already exists in the catalog. +pub async fn exists_by_doi(pool: &PgPool, doi: &str) -> Result { + let n: i64 = sqlx::query_scalar("SELECT count(*) FROM pubs.publication WHERE doi = $1") + .bind(doi) + .fetch_one(pool) + .await?; + Ok(n > 0) +} + +// ── discovery candidate review queue ──────────────────────────────────────── + +/// A discovery candidate awaiting editorial review. +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct Candidate { + pub id: i64, + pub openalex_id: String, + pub doi: Option, + pub title: Option, + pub abstract_text: Option, + pub publication_date: Option, + pub journal_name: Option, + pub relevance_score: Option, + pub status: String, + pub created_at: DateTime, +} + +const CAND_COLS: &str = "id, openalex_id, doi, title, abstract AS abstract_text, publication_date, \ + journal_name, relevance_score::float8 AS relevance_score, status, created_at"; + +/// Paginated candidate queue, optionally filtered by status, newest first. +pub async fn list_candidates( + pool: &PgPool, + status: Option<&str>, + page: i64, + page_size: i64, +) -> Result, DbError> { + let offset = Page::<()>::offset(page, page_size); + let limit = page_size.clamp(1, 200); + let status = status.filter(|s| !s.is_empty()); + let where_sql = "WHERE ($1::text IS NULL OR status = $1)"; + let total: i64 = + sqlx::query_scalar(&format!("SELECT count(*) FROM pubs.publication_candidate {where_sql}")) + .bind(status) + .fetch_one(pool) + .await?; + let items: Vec = sqlx::query_as(&format!( + "SELECT {CAND_COLS} FROM pubs.publication_candidate {where_sql} \ + ORDER BY created_at DESC, id DESC LIMIT $2 OFFSET $3" + )) + .bind(status) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + Ok(Page { items, total, page: page.max(1), page_size: limit }) +} + +pub async fn get_candidate(pool: &PgPool, id: i64) -> Result, DbError> { + Ok(sqlx::query_as(&format!("SELECT {CAND_COLS} FROM pubs.publication_candidate WHERE id = $1")) + .bind(id) + .fetch_optional(pool) + .await?) +} + +/// Set a candidate's review status (`accepted`/`rejected`/`deferred`) and the +/// reviewing curator. Returns whether a row changed. +pub async fn review_candidate( + pool: &PgPool, + id: i64, + status: &str, + reviewed_by: Uuid, +) -> Result { + let n = sqlx::query("UPDATE pubs.publication_candidate SET status = $2, reviewed_by = $3 WHERE id = $1") + .bind(id) + .bind(status) + .bind(reviewed_by) + .execute(pool) + .await? + .rows_affected(); + Ok(n > 0) +} + +/// **Promote** a candidate to a real `pubs.publication`: reuse an existing +/// publication matching the candidate's OpenAlex id or DOI, else create one from +/// the candidate's metadata; then mark the candidate `accepted`. Returns the +/// publication id. Errors if the candidate has no title (publications require one). +pub async fn promote_candidate(pool: &PgPool, id: i64, by: Uuid) -> Result { + let mut tx = pool.begin().await?; + let c: Candidate = sqlx::query_as(&format!( + "SELECT {CAND_COLS} FROM pubs.publication_candidate WHERE id = $1 FOR UPDATE" + )) + .bind(id) + .fetch_optional(&mut *tx) + .await? + .ok_or_else(|| DbError::Conflict(format!("candidate {id} not found")))?; + let title = c + .title + .as_deref() + .map(str::trim) + .filter(|t| !t.is_empty()) + .ok_or_else(|| DbError::Conflict("candidate has no title — cannot promote".into()))?; + + // Reuse an existing publication by OpenAlex id or DOI; else insert. + let existing: Option = sqlx::query_scalar( + "SELECT id FROM pubs.publication WHERE open_alex_id = $1 OR ($2::text IS NOT NULL AND doi = $2) LIMIT 1", + ) + .bind(&c.openalex_id) + .bind(c.doi.as_deref()) + .fetch_optional(&mut *tx) + .await?; + let pub_id: i64 = match existing { + Some(pid) => pid, + None => { + sqlx::query_scalar( + "INSERT INTO pubs.publication (open_alex_id, doi, title, journal, publication_date, abstract_summary) \ + VALUES ($1, $2, $3, $4, $5, $6) RETURNING id", + ) + .bind(&c.openalex_id) + .bind(c.doi.as_deref()) + .bind(title) + .bind(c.journal_name.as_deref()) + .bind(c.publication_date) + .bind(c.abstract_text.as_deref()) + .fetch_one(&mut *tx) + .await? + } + }; + + sqlx::query("UPDATE pubs.publication_candidate SET status = 'accepted', reviewed_by = $2 WHERE id = $1") + .bind(id) + .bind(by) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(PublicationId(pub_id)) +} + +#[derive(sqlx::FromRow)] +struct PublicationRow { + id: i64, + title: String, + doi: Option, + pubmed_id: Option, + journal: Option, + publication_date: Option, + authors: Option, + abstract_summary: Option, + url: Option, + cited_by_count: Option, + open_access_status: Option, +} + +impl From for Publication { + fn from(r: PublicationRow) -> Self { + Publication { + id: PublicationId(r.id), + title: r.title, + doi: r.doi, + pubmed_id: r.pubmed_id, + journal: r.journal, + publication_date: r.publication_date, + authors: r.authors, + abstract_summary: r.abstract_summary, + url: r.url, + cited_by_count: r.cited_by_count, + open_access_status: r.open_access_status, + } + } +} + +const SELECT: &str = "SELECT id, title, doi, pubmed_id, journal, publication_date, authors, \ + abstract_summary, url, cited_by_count, open_access_status FROM pubs.publication"; + +pub async fn get_by_id(pool: &PgPool, id: PublicationId) -> Result, DbError> { + let row: Option = sqlx::query_as(&format!("{SELECT} WHERE id = $1")) + .bind(id.0) + .fetch_optional(pool) + .await?; + Ok(row.map(Into::into)) +} + +/// Paginated list, optionally filtered by title/journal/DOI substring, newest first. +pub async fn search( + pool: &PgPool, + query: Option<&str>, + page: i64, + page_size: i64, +) -> Result, DbError> { + let offset = Page::<()>::offset(page, page_size); + let limit = page_size.clamp(1, 200); + let term = query.map(str::trim).filter(|q| !q.is_empty()); + + const FILTER: &str = "WHERE title ILIKE $1 OR journal ILIKE $1 OR doi ILIKE $1"; + const ORDER: &str = "ORDER BY publication_date DESC NULLS LAST, id DESC"; + + let (total, rows): (i64, Vec) = if let Some(t) = term { + let like = format!("%{t}%"); + let total: i64 = + sqlx::query_scalar(&format!("SELECT count(*) FROM pubs.publication {FILTER}")) + .bind(&like) + .fetch_one(pool) + .await?; + let rows = sqlx::query_as(&format!("{SELECT} {FILTER} {ORDER} LIMIT $2 OFFSET $3")) + .bind(&like) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + (total, rows) + } else { + let total: i64 = sqlx::query_scalar("SELECT count(*) FROM pubs.publication") + .fetch_one(pool) + .await?; + let rows = sqlx::query_as(&format!("{SELECT} {ORDER} LIMIT $1 OFFSET $2")) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + (total, rows) + }; + + Ok(Page { + items: rows.into_iter().map(Into::into).collect(), + total, + page: page.max(1), + page_size: limit, + }) +} diff --git a/rust/crates/du-db/src/research.rs b/rust/crates/du-db/src/research.rs new file mode 100644 index 00000000..c9f6ada2 --- /dev/null +++ b/rust/crates/du-db/src/research.rs @@ -0,0 +1,705 @@ +//! D2 ResearchSubject registry (AppView side) — **PII-free**. A pseudonymous +//! "person" node co-admins attach project memberships + merge-links to. Identity +//! resolution (which kit/sample is which person) happens Edge-to-Edge over D1 / +//! genetically over D3; the AppView learns no name, kit number, or hash of one. +//! This module is pure storage + the authorization readers the handler gates on. +//! +//! **Canonical signed messages** ([`messages`]) are a cross-repo contract with the +//! Navigator Edge — keep them byte-stable. + +use crate::DbError; +use serde_json::{json, Value}; +use sqlx::PgPool; +use uuid::Uuid; + +/// The exact bytes each request's Ed25519 signature is computed over (cross-repo). +pub mod messages { + pub fn register(steward_did: &str, project_id: i64, subject_id: Option<&str>) -> String { + format!("research-register\n{steward_did}\n{project_id}\n{}", subject_id.unwrap_or("")) + } + pub fn merge(asserted_by_did: &str, keep: &str, retire: &str, method: &str) -> String { + format!("research-merge\n{asserted_by_did}\n{keep}\n{retire}\n{method}") + } + pub fn custody(steward_did: &str, subject_id: &str, new_custody_did: &str) -> String { + format!("research-custody\n{steward_did}\n{subject_id}\n{new_custody_did}") + } + /// Replay-guarded read poll: caller proves it is `did` at `ts` (unix seconds). + pub fn poll(did: &str, ts: i64) -> String { + format!("research-poll\n{did}\n{ts}") + } + pub fn add_member(actor_did: &str, project_id: i64, member_did: &str, role: &str) -> String { + format!("research-add-member\n{actor_did}\n{project_id}\n{member_did}\n{role}") + } + pub fn revoke_member(actor_did: &str, project_id: i64, member_did: &str) -> String { + format!("research-revoke-member\n{actor_did}\n{project_id}\n{member_did}") + } + /// Record an assertion (D4). The value/evidence aren't signed (like `merge` doesn't + /// sign confidence) — the author binds subject + predicate + final scope. + pub fn assert(author_did: &str, subject_id: &str, predicate: &str, scope: &str) -> String { + format!("research-assert\n{author_did}\n{subject_id}\n{predicate}\n{scope}") + } + pub fn retract(actor_did: &str, assertion_id: i64) -> String { + format!("research-retract\n{actor_did}\n{assertion_id}") + } + /// Accept a `SAME_PERSON_AS` claim → drives the D2 merge (dispute-resolution authority). + pub fn resolve(actor_did: &str, assertion_id: i64) -> String { + format!("research-resolve\n{actor_did}\n{assertion_id}") + } +} + +// ── collaborator-team ACL (D5) ──────────────────────────────────────────────── + +/// A project collaborator role. `social.group_project.owner_did` is the founding ADMIN. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Role { + Admin, + CoAdmin, + Moderator, + Curator, +} + +impl Role { + pub fn parse(s: &str) -> Option { + Some(match s { + "ADMIN" => Role::Admin, + "CO_ADMIN" => Role::CoAdmin, + "MODERATOR" => Role::Moderator, + "CURATOR" => Role::Curator, + _ => return None, + }) + } + pub fn as_str(self) -> &'static str { + match self { + Role::Admin => "ADMIN", + Role::CoAdmin => "CO_ADMIN", + Role::Moderator => "MODERATOR", + Role::Curator => "CURATOR", + } + } + /// Whether this role is granted `cap` (the D5 §4 capability→role map). Capabilities + /// beyond what's enforced today (assertions/disputes/catalog) are defined for the + /// cross-repo contract; they gate D4 once it lands. + pub fn allows(self, cap: Capability) -> bool { + use Capability::*; + use Role::*; + match cap { + ReadProject => true, // any live team member + ManageRoles => self == Admin, + ManageSubjects | WriteAssertions => matches!(self, Admin | CoAdmin), + ResolveDispute => matches!(self, Admin | Curator), + PromoteToCatalog => self == Curator, + } + } +} + +/// A capability the ACL gates (D5 §4). +#[derive(Debug, Clone, Copy)] +pub enum Capability { + ManageRoles, + ManageSubjects, + ReadProject, + WriteAssertions, // D4 (forward) + ResolveDispute, // D4 (forward) + PromoteToCatalog, // catalog bridge (forward) +} + +/// The caller's role in a project: `owner_did` ⇒ ADMIN, else the live `project_member`. +pub async fn role_of(pool: &PgPool, project_id: i64, did: &str) -> Result, DbError> { + if project_owner(pool, project_id).await?.as_deref() == Some(did) { + return Ok(Some(Role::Admin)); + } + let r: Option = sqlx::query_scalar( + "SELECT role FROM research.project_member WHERE project_id = $1 AND member_did = $2 AND left_at IS NULL", + ) + .bind(project_id) + .bind(did) + .fetch_optional(pool) + .await?; + Ok(r.as_deref().and_then(Role::parse)) +} + +/// Whether `did` is a live team member of the project. +pub async fn is_team_member(pool: &PgPool, project_id: i64, did: &str) -> Result { + Ok(role_of(pool, project_id, did).await?.is_some()) +} + +/// Whether `did` is granted `cap` in the project. +pub async fn can(pool: &PgPool, project_id: i64, did: &str, cap: Capability) -> Result { + Ok(role_of(pool, project_id, did).await?.map(|r| r.allows(cap)).unwrap_or(false)) +} + +/// Add or update a team member (re-activates a revoked one). +pub async fn add_member( + pool: &PgPool, + project_id: i64, + member_did: &str, + role: Role, + permissions: &[String], + appointed_by: &str, +) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO research.project_member (project_id, member_did, role, permissions, appointed_by, left_at) \ + VALUES ($1, $2, $3, $4, $5, NULL) \ + ON CONFLICT (project_id, member_did) DO UPDATE SET \ + role = EXCLUDED.role, permissions = EXCLUDED.permissions, appointed_by = EXCLUDED.appointed_by, left_at = NULL", + ) + .bind(project_id) + .bind(member_did) + .bind(role.as_str()) + .bind(permissions) + .bind(appointed_by) + .execute(pool) + .await?; + Ok(()) +} + +/// Revoke a member (set `left_at`). Returns whether a live row was revoked. +pub async fn revoke_member(pool: &PgPool, project_id: i64, member_did: &str) -> Result { + let affected = + sqlx::query("UPDATE research.project_member SET left_at = now() WHERE project_id = $1 AND member_did = $2 AND left_at IS NULL") + .bind(project_id) + .bind(member_did) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} + +/// A live team member (the owner appears as an implicit ADMIN). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct MemberRow { + pub member_did: String, + pub role: String, +} + +pub async fn members_of(pool: &PgPool, project_id: i64) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT member_did, role FROM ( \ + SELECT owner_did AS member_did, 'ADMIN' AS role, created_at AS joined_at \ + FROM social.group_project WHERE id = $1 AND owner_did IS NOT NULL \ + UNION ALL \ + SELECT member_did, role, joined_at FROM research.project_member \ + WHERE project_id = $1 AND left_at IS NULL \ + ) t ORDER BY joined_at", + ) + .bind(project_id) + .fetch_all(pool) + .await?) +} + +// ── registry ops ────────────────────────────────────────────────────────────── + +/// Register a subject in a project: mint a fresh pseudonymous id when `subject_id` is +/// `None` (the new-member case), else attach the given id (the id-exchange-linked +/// case). The membership is idempotent. Returns the subject id. +pub async fn register_in_project( + pool: &PgPool, + subject_id: Option, + project_id: i64, + steward_did: &str, +) -> Result { + let mut tx = pool.begin().await?; + let sid = match subject_id { + Some(id) => { + // Ensure the row exists (idempotent for an externally-agreed id). + sqlx::query("INSERT INTO research.research_subject (research_subject_id) VALUES ($1) ON CONFLICT DO NOTHING") + .bind(id) + .execute(&mut *tx) + .await?; + id + } + None => { + sqlx::query_scalar("INSERT INTO research.research_subject DEFAULT VALUES RETURNING research_subject_id") + .fetch_one(&mut *tx) + .await? + } + }; + sqlx::query( + "INSERT INTO research.subject_membership (research_subject_id, project_id, steward_did) \ + VALUES ($1, $2, $3) ON CONFLICT (research_subject_id, project_id) DO NOTHING", + ) + .bind(sid) + .bind(project_id) + .bind(steward_did) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(sid) +} + +/// Merge two subjects (audited): keep `keep`, **tombstone** `retire` (set its +/// `retired_into = keep`) and repoint its memberships + biosample links to `keep`. +/// The pseudonymous id survives so a local holder can still resolve it. +pub async fn merge_subjects( + pool: &PgPool, + keep: Uuid, + retire: Uuid, + method: &str, + asserted_by_did: &str, + confidence: Option, +) -> Result<(), DbError> { + if keep == retire { + return Err(DbError::Conflict("cannot merge a subject into itself".into())); + } + let mut tx = pool.begin().await?; + sqlx::query( + "INSERT INTO research.subject_link (subject_a, subject_b, method, asserted_by_did, confidence) \ + VALUES ($1, $2, $3, $4, $5)", + ) + .bind(keep) + .bind(retire) + .bind(method) + .bind(asserted_by_did) + .bind(confidence) + .execute(&mut *tx) + .await?; + // Repoint memberships (skip a project where the kept id is already a member). + sqlx::query( + "UPDATE research.subject_membership m SET research_subject_id = $1 \ + WHERE m.research_subject_id = $2 \ + AND NOT EXISTS (SELECT 1 FROM research.subject_membership k \ + WHERE k.research_subject_id = $1 AND k.project_id = m.project_id)", + ) + .bind(keep) + .bind(retire) + .execute(&mut *tx) + .await?; + sqlx::query("DELETE FROM research.subject_membership WHERE research_subject_id = $1") + .bind(retire) + .execute(&mut *tx) + .await?; + sqlx::query( + "UPDATE research.subject_biosample b SET research_subject_id = $1 \ + WHERE b.research_subject_id = $2 \ + AND NOT EXISTS (SELECT 1 FROM research.subject_biosample k \ + WHERE k.research_subject_id = $1 AND k.sample_guid = b.sample_guid)", + ) + .bind(keep) + .bind(retire) + .execute(&mut *tx) + .await?; + sqlx::query("DELETE FROM research.subject_biosample WHERE research_subject_id = $1") + .bind(retire) + .execute(&mut *tx) + .await?; + sqlx::query("UPDATE research.research_subject SET retired_into = $1 WHERE research_subject_id = $2") + .bind(keep) + .bind(retire) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(()) +} + +/// Flip custody to a member DID (the member-claim pointer flip, §6). Returns whether +/// the subject existed. +pub async fn set_custody(pool: &PgPool, subject_id: Uuid, custody_did: &str) -> Result { + let affected = sqlx::query("UPDATE research.research_subject SET custody_did = $2 WHERE research_subject_id = $1") + .bind(subject_id) + .bind(custody_did) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} + +/// Link a subject to a federated sample (sparse; only when the person federated data). +pub async fn link_biosample(pool: &PgPool, subject_id: Uuid, sample_guid: Uuid) -> Result<(), DbError> { + sqlx::query( + "INSERT INTO research.subject_biosample (research_subject_id, sample_guid) VALUES ($1, $2) \ + ON CONFLICT DO NOTHING", + ) + .bind(subject_id) + .bind(sample_guid) + .execute(pool) + .await?; + Ok(()) +} + +// ── authorization readers (gate the handler) ────────────────────────────────── + +/// The project's owner DID (the v1 register-ACL; extends to project-admins under D5). +pub async fn project_owner(pool: &PgPool, project_id: i64) -> Result, DbError> { + Ok(sqlx::query_scalar("SELECT owner_did FROM social.group_project WHERE id = $1 AND deleted = false") + .bind(project_id) + .fetch_optional(pool) + .await? + .flatten()) +} + +/// Whether `did` participates in a project — its owner or a steward of a member +/// (gates the member-list read; extends to project-admins under D5). +pub async fn is_project_participant(pool: &PgPool, did: &str, project_id: i64) -> Result { + Ok(sqlx::query_scalar::<_, i64>( + "SELECT count(*) FROM ( \ + SELECT 1 FROM social.group_project WHERE id = $2 AND owner_did = $1 \ + UNION ALL \ + SELECT 1 FROM research.subject_membership WHERE project_id = $2 AND steward_did = $1 \ + ) p", + ) + .bind(did) + .bind(project_id) + .fetch_one(pool) + .await? + > 0) +} + +/// Whether `did` stewards `subject_id` in any project (gates merge/custody). +pub async fn is_steward_of(pool: &PgPool, did: &str, subject_id: Uuid) -> Result { + Ok(sqlx::query_scalar::<_, i64>( + "SELECT count(*) FROM research.subject_membership WHERE research_subject_id = $1 AND steward_did = $2", + ) + .bind(subject_id) + .bind(did) + .fetch_one(pool) + .await? + > 0) +} + +// ── reads ───────────────────────────────────────────────────────────────────── + +/// A subject's membership (pseudonymous). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct SubjectRow { + pub research_subject_id: Uuid, + pub steward_did: String, +} + +/// Pseudonymous subjects in a project (the member list the registry exposes). +pub async fn subjects_in_project(pool: &PgPool, project_id: i64) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT research_subject_id, steward_did FROM research.subject_membership \ + WHERE project_id = $1 ORDER BY added_at", + ) + .bind(project_id) + .fetch_all(pool) + .await?) +} + +/// A subject's custody + (if merged away) where it was retired into. +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct SubjectView { + pub research_subject_id: Uuid, + pub custody_did: Option, + pub retired_into: Option, +} + +pub async fn subject(pool: &PgPool, id: Uuid) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT research_subject_id, custody_did, retired_into FROM research.research_subject WHERE research_subject_id = $1", + ) + .bind(id) + .fetch_optional(pool) + .await?) +} + +// ── D4: attributed-claim assertion store (R2 — non-PII, project-scoped) ───────── + +/// A claim predicate. Its PII-class picks the rail: `MdkaIs`/`Identity` are R3 (D1 P2P +/// only) and have **no** AppView table; `Note` is free text → PII by default unless the +/// author explicitly clears it. The rest are non-PII (a classification or pseudonymous id). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Predicate { + SamePersonAs, + BelongsToBranch, + HaplogroupIs, + Note, + MdkaIs, + Identity, +} + +impl Predicate { + pub fn parse(s: &str) -> Option { + Some(match s { + "SAME_PERSON_AS" => Predicate::SamePersonAs, + "BELONGS_TO_BRANCH" => Predicate::BelongsToBranch, + "HAPLOGROUP_IS" => Predicate::HaplogroupIs, + "NOTE" => Predicate::Note, + "MDKA_IS" => Predicate::MdkaIs, + "IDENTITY" => Predicate::Identity, + _ => return None, + }) + } + /// Whether this predicate may be stored on the AppView (R1/R2). PII predicates are + /// R3-only; `Note` is storable only when the author cleared it of PII. + pub fn is_appview_storable(self, pii_cleared: bool) -> bool { + match self { + Predicate::MdkaIs | Predicate::Identity => false, + Predicate::Note => pii_cleared, + _ => true, + } + } + /// Single-valued predicates fold to SETTLED/DISPUTED; set-valued show all live members. + pub fn is_single_valued(self) -> bool { + matches!(self, Predicate::BelongsToBranch | Predicate::HaplogroupIs) + } +} + +/// Defense-in-depth: flag obvious PII in a value (emails / overlong free text) so it +/// never reaches a server row regardless of predicate (the FTDNA Note-column lesson — +/// free text can't be auto-cleaned, so it's PII until proven otherwise). +fn scan_pii(value: &Value) -> bool { + fn walk(v: &Value) -> bool { + match v { + Value::String(s) => s.contains('@') || s.chars().count() > 200, + Value::Array(a) => a.iter().any(walk), + Value::Object(o) => o.values().any(walk), + _ => false, + } + } + walk(value) +} + +/// Record an append-only assertion (R1/R2), then refold the affected views. Rejects +/// PII predicates (`MdkaIs`/`Identity`), an un-cleared `Note`, and any value the +/// scrubber flags — those belong on the R3 P2P rail, never an AppView row. +#[allow(clippy::too_many_arguments)] +pub async fn record_assertion( + pool: &PgPool, + subject_id: Uuid, + predicate: &str, + value: &Value, + author_did: &str, + scope: &str, + evidence: Option<&Value>, + supersedes_id: Option, + pii_cleared: bool, +) -> Result { + let pred = Predicate::parse(predicate).ok_or_else(|| DbError::Conflict(format!("unknown predicate {predicate}")))?; + if !pred.is_appview_storable(pii_cleared) { + return Err(DbError::Conflict(format!("predicate {predicate} carries PII; not storable in the AppView (R3 P2P only)"))); + } + if scan_pii(value) { + return Err(DbError::Conflict("value flagged as PII; not storable in the AppView".into())); + } + let id: i64 = sqlx::query_scalar( + "INSERT INTO research.assertion (subject_id, predicate, value, author_did, scope, evidence, supersedes_id) \ + VALUES ($1, $2, $3, $4, $5, $6, $7) RETURNING id", + ) + .bind(subject_id) + .bind(predicate) + .bind(value) + .bind(author_did) + .bind(scope) + .bind(evidence) + .bind(supersedes_id) + .fetch_one(pool) + .await?; + refold_affected(pool, subject_id, scope).await?; + Ok(id) +} + +/// Retract an assertion (drops out of `current_view`, kept for audit). Returns whether a +/// live row was retracted. +pub async fn retract_assertion(pool: &PgPool, assertion_id: i64) -> Result { + let row: Option<(Uuid, String)> = sqlx::query_as( + "UPDATE research.assertion SET retracted_at = now() \ + WHERE id = $1 AND retracted_at IS NULL RETURNING subject_id, scope", + ) + .bind(assertion_id) + .fetch_optional(pool) + .await?; + match row { + Some((subject_id, scope)) => { + refold_affected(pool, subject_id, &scope).await?; + Ok(true) + } + None => Ok(false), + } +} + +/// Metadata an authorization gate needs about an assertion (author + its scope). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct AssertionMeta { + pub subject_id: Uuid, + pub predicate: String, + pub author_did: String, + pub scope: String, +} + +pub async fn assertion_meta(pool: &PgPool, assertion_id: i64) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT subject_id, predicate, author_did, scope FROM research.assertion WHERE id = $1 AND retracted_at IS NULL", + ) + .bind(assertion_id) + .fetch_optional(pool) + .await?) +} + +/// Accept a live `SAME_PERSON_AS` claim → drives the D2 merge (never auto-collapsed; this +/// is the explicit, role-gated resolution). Keeps the claim's subject, retires the +/// `other_subject_id` from its value, audited via `subject_link` method `ASSERTION`. +/// Returns `(kept, retired)`. +pub async fn accept_same_person(pool: &PgPool, assertion_id: i64) -> Result<(Uuid, Uuid), DbError> { + let row: Option<(Uuid, Value, String)> = sqlx::query_as( + "SELECT subject_id, value, author_did FROM research.assertion \ + WHERE id = $1 AND predicate = 'SAME_PERSON_AS' AND retracted_at IS NULL", + ) + .bind(assertion_id) + .fetch_optional(pool) + .await?; + let (keep, value, author_did) = row.ok_or_else(|| DbError::Conflict(format!("no live SAME_PERSON_AS assertion {assertion_id}")))?; + let retire: Uuid = value + .get("other_subject_id") + .and_then(|v| v.as_str()) + .and_then(|s| s.parse().ok()) + .ok_or_else(|| DbError::Conflict("SAME_PERSON_AS value missing other_subject_id".into()))?; + let confidence = value.get("confidence").and_then(|v| v.as_f64()); + merge_subjects(pool, keep, retire, "ASSERTION", &author_did, confidence).await?; + // The retired subject's claims fold into the kept id's views. + refold_subject(pool, keep).await?; + Ok((keep, retire)) +} + +// ── current_view fold (mirrors the fed.* reporting fold; assign-and-prune) ────── + +/// A live claim in a folded view. +#[derive(sqlx::FromRow)] +struct LiveClaim { + id: i64, + predicate: String, + value: Value, + author_did: String, + evidence: Option, + created_at: chrono::DateTime, +} + +/// The viewing scopes a write under `assertion_scope` affects: its own scope, plus — if +/// it's a PUBLIC claim — every PROJECT scope this subject already has claims in (PUBLIC +/// claims surface in every project fold). Per-project isolation otherwise. +async fn affected_scopes(pool: &PgPool, subject_id: Uuid, assertion_scope: &str) -> Result, DbError> { + let mut scopes = vec![assertion_scope.to_string()]; + if assertion_scope == "PUBLIC" { + let projects: Vec = sqlx::query_scalar( + "SELECT DISTINCT scope FROM research.assertion WHERE subject_id = $1 AND scope LIKE 'PROJECT:%'", + ) + .bind(subject_id) + .fetch_all(pool) + .await?; + for p in projects { + if !scopes.contains(&p) { + scopes.push(p); + } + } + } + Ok(scopes) +} + +async fn refold_affected(pool: &PgPool, subject_id: Uuid, assertion_scope: &str) -> Result<(), DbError> { + for scope in affected_scopes(pool, subject_id, assertion_scope).await? { + refold(pool, subject_id, &scope).await?; + } + Ok(()) +} + +/// Refold every scope a subject currently has a materialized view in (used after a merge, +/// which can move claims across subjects). +async fn refold_subject(pool: &PgPool, subject_id: Uuid) -> Result<(), DbError> { + let scopes: Vec = sqlx::query_scalar( + "SELECT DISTINCT scope FROM research.assertion WHERE subject_id = $1 \ + UNION SELECT DISTINCT scope FROM research.subject_current_view WHERE subject_id = $1", + ) + .bind(subject_id) + .fetch_all(pool) + .await?; + for scope in scopes { + refold(pool, subject_id, &scope).await?; + } + Ok(()) +} + +/// Recompute the materialized `current_view` for one (subject, viewing-scope): a PROJECT +/// view folds its own claims together with PUBLIC ones; single-valued predicates settle or +/// dispute, set-valued list all live members. Declarative assign-and-prune (mirrors +/// `coverage::recompute_norms_locked`) — never auto-collapses a disagreement. +async fn refold(pool: &PgPool, subject_id: Uuid, view_scope: &str) -> Result<(), DbError> { + // Live heads = non-retracted and not superseded by another row, scoped to PUBLIC + the view. + let live: Vec = sqlx::query_as( + "SELECT a.id, a.predicate, a.value, a.author_did, a.evidence, a.created_at \ + FROM research.assertion a \ + WHERE a.subject_id = $1 AND a.retracted_at IS NULL \ + AND (a.scope = 'PUBLIC' OR a.scope = $2) \ + AND NOT EXISTS (SELECT 1 FROM research.assertion s \ + WHERE s.supersedes_id = a.id AND s.retracted_at IS NULL) \ + ORDER BY a.predicate, a.created_at", + ) + .bind(subject_id) + .bind(view_scope) + .fetch_all(pool) + .await?; + + // Group by predicate, preserving created_at order. + let mut by_pred: std::collections::BTreeMap> = std::collections::BTreeMap::new(); + for c in live { + by_pred.entry(c.predicate.clone()).or_default().push(c); + } + + let mut tx = pool.begin().await?; + let mut kept: Vec = Vec::new(); + for (predicate, claims) in by_pred { + let single = Predicate::parse(&predicate).map(|p| p.is_single_valued()).unwrap_or(false); + let state = if single && claims.len() > 1 { + let first = &claims[0].value; + if claims.iter().all(|c| &c.value == first) { + "SETTLED" + } else { + "DISPUTED" + } + } else { + "SETTLED" + }; + let view: Vec = claims + .iter() + .map(|c| json!({ + "assertion_id": c.id, + "value": c.value, + "author_did": c.author_did, + "evidence": c.evidence, + "created_at": c.created_at, + })) + .collect(); + sqlx::query( + "INSERT INTO research.subject_current_view (subject_id, predicate, scope, state, view, refolded_at) \ + VALUES ($1, $2, $3, $4, $5, now()) \ + ON CONFLICT (subject_id, predicate, scope) DO UPDATE SET \ + state = EXCLUDED.state, view = EXCLUDED.view, refolded_at = now()", + ) + .bind(subject_id) + .bind(&predicate) + .bind(view_scope) + .bind(state) + .bind(Value::Array(view)) + .execute(&mut *tx) + .await?; + kept.push(predicate); + } + // Prune predicates that no longer have any live claim in this view. + sqlx::query("DELETE FROM research.subject_current_view WHERE subject_id = $1 AND scope = $2 AND predicate <> ALL($3)") + .bind(subject_id) + .bind(view_scope) + .bind(&kept) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(()) +} + +/// A materialized fold row served to the project team (R2) — pseudonymous. +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct CurrentViewRow { + pub predicate: String, + pub state: String, + pub view: Value, +} + +/// The project-scoped fold for a subject (PROJECT: claims + PUBLIC), as materialized. +pub async fn current_view(pool: &PgPool, subject_id: Uuid, scope: &str) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT predicate, state, view FROM research.subject_current_view \ + WHERE subject_id = $1 AND scope = $2 ORDER BY predicate", + ) + .bind(subject_id) + .bind(scope) + .fetch_all(pool) + .await?) +} diff --git a/rust/crates/du-db/src/sequencer.rs b/rust/crates/du-db/src/sequencer.rs new file mode 100644 index 00000000..20b6ef43 --- /dev/null +++ b/rust/crates/du-db/src/sequencer.rs @@ -0,0 +1,615 @@ +//! Sequencer-lab lookup — resolve a sequencing instrument id (from BAM/CRAM `@RG` +//! headers, e.g. `A00123`) to its sequencing laboratory, for the Edge analyzer. +//! +//! Resolves via the **preseeded** direct association +//! (`genomics.sequencer_instrument.lab_id` → `genomics.sequencing_lab`). The +//! consensus/curation path (`instrument_observation` → `instrument_association_ +//! proposal` → accept) is not live yet; when it is, accepting a proposal sets +//! `lab_id` and this lookup is unchanged. + +use crate::pagination::Page; +use crate::DbError; +use serde_json::json; +use sqlx::PgPool; +use std::collections::{HashMap, HashSet}; +use uuid::Uuid; + +/// A resolved instrument → lab association. +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct LabLookup { + pub instrument_id: String, + pub lab_name: String, + pub is_d2c: bool, + pub website_url: Option, + pub manufacturer: Option, + pub model_name: Option, +} + +/// Resolve a single instrument id to its lab. `None` when the instrument is +/// unknown or has no preseeded lab association. +pub async fn lookup_lab(pool: &PgPool, instrument_id: &str) -> Result, DbError> { + Ok(sqlx::query_as::<_, LabLookup>( + "SELECT si.instrument_id, sl.name AS lab_name, sl.is_d2c, sl.website_url, \ + si.manufacturer, si.model_name \ + FROM genomics.sequencer_instrument si \ + JOIN genomics.sequencing_lab sl ON sl.id = si.lab_id \ + WHERE si.instrument_id = $1", + ) + .bind(instrument_id) + .fetch_optional(pool) + .await?) +} + +/// Every preseeded instrument → lab association (the Edge's bulk cache seed), +/// ordered by instrument id. +pub async fn lab_instruments(pool: &PgPool) -> Result, DbError> { + Ok(sqlx::query_as::<_, LabLookup>( + "SELECT si.instrument_id, sl.name AS lab_name, sl.is_d2c, sl.website_url, \ + si.manufacturer, si.model_name \ + FROM genomics.sequencer_instrument si \ + JOIN genomics.sequencing_lab sl ON sl.id = si.lab_id \ + ORDER BY si.instrument_id", + ) + .fetch_all(pool) + .await?) +} + +// ── consensus engine ───────────────────────────────────────────────────────── +// +// Observations are derived from the federation: each `fed.sequencerun` carrying +// an `@RG` instrument id, joined to its `fed.biosample.center_name` (the claimed +// lab), is one citizen observation. They aggregate per instrument into a single +// active `instrument_association_proposal` (dominant lab + confidence + status); +// a curator accept sets `sequencer_instrument.lab_id`, which is what `lookup_lab` +// resolves. The AppView aggregates + proposes; it does not auto-decide unless +// `auto_accept` is explicitly enabled. + +/// Center names that are not real labs — excluded from observations. +const GENERIC_CENTERS: &[&str] = + &["", "unknown", "self", "home", "n/a", "na", "none", "not provided", "unspecified", "private"]; + +/// Consensus thresholds + confidence weights (spec defaults). +#[derive(Debug, Clone)] +pub struct ConsensusConfig { + /// Minimum observations before a proposal is created at all. + pub min_observations: i64, + /// Distinct citizens (or observations) for `READY_FOR_REVIEW`. + pub ready_for_review: i64, + /// Observations for auto-acceptance (only if `auto_accept`). + pub auto_accept_threshold: i64, + /// Distinct citizens required for `READY_FOR_REVIEW` / auto-acceptance. + pub min_distinct_citizens: i64, + /// Dominant-lab agreement ratio required (else held as a conflict). + pub agreement_ratio: f64, + /// Auto-accept unanimous, well-supported proposals (default off — curator-gated). + pub auto_accept: bool, + pub w_observation: f64, + pub w_citizen: f64, + pub w_recency: f64, + pub w_level: f64, +} + +impl Default for ConsensusConfig { + fn default() -> Self { + Self { + min_observations: 2, + ready_for_review: 5, + auto_accept_threshold: 10, + min_distinct_citizens: 3, + agreement_ratio: 0.9, + auto_accept: false, + w_observation: 0.4, + w_citizen: 0.3, + w_recency: 0.2, + w_level: 0.1, + } + } +} + +/// Outcome of [`recompute_consensus`]. +#[derive(Debug, Default, Clone)] +pub struct ConsensusReport { + pub instruments: i64, + pub observations_upserted: u64, + pub observations_pruned: u64, + pub proposals_active: u64, + pub proposals_ready: u64, + pub conflicts: u64, + pub auto_accepted: u64, +} + +/// Confidence score in [0,1] from the spec's weighted blend. `conf_level` is the +/// mean per-claim confidence weight (KNOWN=1.0 / INFERRED=0.7 / GUESSED=0.3) and +/// `recency` the freshest observation's recency factor (1.0 ≤30d, linear decay to +/// 0 over a year, 0.5 when undated) — both computed in SQL across the instrument's +/// observations, from explicit `instrumentObservation` records where present. +fn confidence(cfg: &ConsensusConfig, obs: i64, citizens: i64, conf_level: f64, recency: f64) -> f64 { + let f_obs = (obs as f64 / cfg.auto_accept_threshold.max(1) as f64).min(1.0); + let f_cit = (citizens as f64 / cfg.min_distinct_citizens.max(1) as f64).min(1.0); + (cfg.w_observation * f_obs + cfg.w_citizen * f_cit + cfg.w_recency * recency + cfg.w_level * conf_level).min(1.0) +} + +/// Get-or-create a sequencing lab by name; returns its id. +async fn get_or_create_lab(conn: &mut sqlx::PgConnection, name: &str, is_d2c: bool) -> Result { + if let Some(id) = sqlx::query_scalar::<_, i64>( + "INSERT INTO genomics.sequencing_lab (name, is_d2c) VALUES ($1, $2) \ + ON CONFLICT (name) DO NOTHING RETURNING id", + ) + .bind(name) + .bind(is_d2c) + .fetch_optional(&mut *conn) + .await? + { + return Ok(id); + } + Ok(sqlx::query_scalar::<_, i64>("SELECT id FROM genomics.sequencing_lab WHERE name = $1") + .bind(name) + .fetch_one(&mut *conn) + .await?) +} + +/// Recompute instrument→lab consensus from the federation: refresh observations, +/// then regenerate the active proposal set (dominant lab, counts, confidence, +/// status). Curator-decided proposals (`ACCEPTED`/`REJECTED`) and already-resolved +/// instruments (`lab_id` set) are preserved/skipped. +/// Advisory-lock key guarding concurrent recomputes (the hourly job vs. a manual +/// `run-once`). A second caller no-ops instead of interleaving the observation +/// refresh and proposal regeneration. +const CONSENSUS_ADVISORY_KEY: i64 = 0x434F_4E53_4E53; // "CONSNS" + +pub async fn recompute_consensus(pool: &PgPool, cfg: &ConsensusConfig) -> Result { + // Hold a dedicated connection for the duration so only one recompute runs at a + // time. Unlock on every path — a leaked session lock would ride a pooled + // connection back into reuse and wedge all future recomputes. + let mut lock_conn = pool.acquire().await?; + let locked: bool = sqlx::query_scalar("SELECT pg_try_advisory_lock($1)") + .bind(CONSENSUS_ADVISORY_KEY) + .fetch_one(&mut *lock_conn) + .await?; + if !locked { + return Ok(ConsensusReport::default()); + } + let result = recompute_locked(pool, cfg).await; + let _ = sqlx::query("SELECT pg_advisory_unlock($1)") + .bind(CONSENSUS_ADVISORY_KEY) + .execute(&mut *lock_conn) + .await; + result +} + +async fn recompute_locked(pool: &PgPool, cfg: &ConsensusConfig) -> Result { + let mut rep = ConsensusReport::default(); + let generic: Vec = GENERIC_CENTERS.iter().map(|s| s.to_string()).collect(); + + // 1) Ensure a sequencer_instrument row for every federated instrument id — + // from both implicit sequenceruns and explicit instrumentObservation records. + sqlx::query( + "INSERT INTO genomics.sequencer_instrument (instrument_id, model_name) \ + SELECT s.instrument_id, (array_agg(s.instrument_model) FILTER (WHERE s.instrument_model IS NOT NULL))[1] \ + FROM fed.sequencerun s \ + WHERE s.instrument_id IS NOT NULL AND btrim(s.instrument_id) <> '' \ + GROUP BY s.instrument_id \ + ON CONFLICT (instrument_id) DO NOTHING", + ) + .execute(pool) + .await?; + sqlx::query( + "INSERT INTO genomics.sequencer_instrument (instrument_id, model_name) \ + SELECT o.instrument_id, (array_agg(o.instrument_model) FILTER (WHERE o.instrument_model IS NOT NULL))[1] \ + FROM fed.instrument_observation o \ + WHERE o.instrument_id IS NOT NULL AND btrim(o.instrument_id) <> '' \ + GROUP BY o.instrument_id \ + ON CONFLICT (instrument_id) DO NOTHING", + ) + .execute(pool) + .await?; + + // 2) Refresh observations from fed.sequencerun ⋈ fed.biosample (upsert by uri). + rep.observations_upserted = sqlx::query( + "INSERT INTO genomics.instrument_observation \ + (instrument_id, lab_name, biosample_ref, platform, instrument_model, confidence, atproto) \ + SELECT si.id, btrim(b.center_name), s.biosample_ref, s.platform_name, s.instrument_model, 'INFERRED', \ + jsonb_build_object('uri', s.at_uri, 'cid', s.cid, 'repo_did', s.did) \ + FROM fed.sequencerun s \ + JOIN fed.biosample b ON b.at_uri = s.biosample_ref \ + JOIN genomics.sequencer_instrument si ON si.instrument_id = s.instrument_id \ + WHERE s.instrument_id IS NOT NULL AND b.center_name IS NOT NULL \ + AND lower(btrim(b.center_name)) <> ALL($1::text[]) \ + ON CONFLICT ((atproto->>'uri')) WHERE atproto IS NOT NULL \ + DO UPDATE SET instrument_id = EXCLUDED.instrument_id, lab_name = EXCLUDED.lab_name, \ + biosample_ref = EXCLUDED.biosample_ref, platform = EXCLUDED.platform, \ + instrument_model = EXCLUDED.instrument_model", + ) + .bind(&generic) + .execute(pool) + .await? + .rows_affected(); + + // 2b) Fold in explicit citizen instrumentObservation records — these carry a + // real confidence level and observation timestamp (the implicit ones above + // are all INFERRED/undated). Keyed by the record's own uri, so they coexist + // with the sequencerun-derived rows rather than replacing them. + rep.observations_upserted += sqlx::query( + "INSERT INTO genomics.instrument_observation \ + (instrument_id, lab_name, biosample_ref, platform, instrument_model, flowcell_id, run_date, confidence, observed_at, atproto) \ + SELECT si.id, btrim(o.lab_name), o.biosample_ref, o.platform, o.instrument_model, o.flowcell_id, o.run_date, \ + upper(coalesce(o.confidence, 'INFERRED')), o.observed_at, \ + jsonb_build_object('uri', o.at_uri, 'cid', o.cid, 'repo_did', o.did) \ + FROM fed.instrument_observation o \ + JOIN genomics.sequencer_instrument si ON si.instrument_id = o.instrument_id \ + WHERE o.instrument_id IS NOT NULL AND o.lab_name IS NOT NULL \ + AND lower(btrim(o.lab_name)) <> ALL($1::text[]) \ + ON CONFLICT ((atproto->>'uri')) WHERE atproto IS NOT NULL \ + DO UPDATE SET instrument_id = EXCLUDED.instrument_id, lab_name = EXCLUDED.lab_name, \ + biosample_ref = EXCLUDED.biosample_ref, platform = EXCLUDED.platform, \ + instrument_model = EXCLUDED.instrument_model, flowcell_id = EXCLUDED.flowcell_id, \ + run_date = EXCLUDED.run_date, confidence = EXCLUDED.confidence, observed_at = EXCLUDED.observed_at", + ) + .bind(&generic) + .execute(pool) + .await? + .rows_affected(); + + // Prune observations no longer backed by a current fed record (either source). + rep.observations_pruned = sqlx::query( + "DELETE FROM genomics.instrument_observation o \ + WHERE o.atproto->>'uri' IS NOT NULL \ + AND o.atproto->>'uri' NOT IN (SELECT at_uri FROM fed.sequencerun WHERE instrument_id IS NOT NULL) \ + AND o.atproto->>'uri' NOT IN (SELECT at_uri FROM fed.instrument_observation WHERE instrument_id IS NOT NULL)", + ) + .execute(pool) + .await? + .rows_affected(); + + // 3) Aggregate. Per-instrument totals (distinct citizens overall) and per-lab claims. + #[derive(sqlx::FromRow)] + struct Total { + si_id: i64, + obs_total: i64, + citizens_total: i64, + conf_level: f64, + recency: f64, + } + let totals: Vec = sqlx::query_as( + "SELECT o.instrument_id AS si_id, count(*) AS obs_total, \ + count(DISTINCT o.atproto->>'repo_did') AS citizens_total, \ + avg(CASE upper(o.confidence) WHEN 'KNOWN' THEN 1.0 WHEN 'GUESSED' THEN 0.3 ELSE 0.7 END)::float8 AS conf_level, \ + COALESCE(max(CASE \ + WHEN o.observed_at IS NULL THEN 0.5 \ + WHEN o.observed_at > now() - interval '30 days' THEN 1.0 \ + ELSE GREATEST(0.0, 1.0 - EXTRACT(EPOCH FROM now() - o.observed_at) / EXTRACT(EPOCH FROM interval '365 days')) \ + END), 0.5)::float8 AS recency \ + FROM genomics.instrument_observation o \ + JOIN genomics.sequencer_instrument si ON si.id = o.instrument_id \ + WHERE si.lab_id IS NULL AND o.lab_name IS NOT NULL \ + GROUP BY o.instrument_id", + ) + .fetch_all(pool) + .await?; + let totals: HashMap = totals + .into_iter() + .map(|t| (t.si_id, (t.obs_total, t.citizens_total, t.conf_level, t.recency))) + .collect(); + + #[derive(sqlx::FromRow)] + struct Claim { + si_id: i64, + lab_name: String, + obs: i64, + citizens: i64, + } + let claims: Vec = sqlx::query_as( + "SELECT o.instrument_id AS si_id, o.lab_name, count(*) AS obs, \ + count(DISTINCT o.atproto->>'repo_did') AS citizens \ + FROM genomics.instrument_observation o \ + JOIN genomics.sequencer_instrument si ON si.id = o.instrument_id \ + WHERE si.lab_id IS NULL AND o.lab_name IS NOT NULL \ + GROUP BY o.instrument_id, o.lab_name", + ) + .fetch_all(pool) + .await?; + let mut by_instr: HashMap> = HashMap::new(); + for c in claims { + by_instr.entry(c.si_id).or_default().push((c.lab_name, c.obs, c.citizens)); + } + + // Curator decisions to honor: accepted instruments (skip) + rejected (instr,lab) pairs. + let terminal: Vec<(i64, Option, String)> = sqlx::query_as( + "SELECT instrument_id, proposed_lab_name, status FROM genomics.instrument_association_proposal \ + WHERE status IN ('ACCEPTED','REJECTED')", + ) + .fetch_all(pool) + .await?; + let mut accepted: HashSet = HashSet::new(); + let mut rejected: HashSet<(i64, String)> = HashSet::new(); + for (si_id, lab, status) in terminal { + if status == "ACCEPTED" { + accepted.insert(si_id); + } else if let Some(lab) = lab { + rejected.insert((si_id, lab)); + } + } + + // Regenerate the active proposal set in one transaction. Each unresolved + // instrument keeps at most one active (PENDING/READY) proposal, UPSERTed in + // place so its id stays stable across recomputes — a curator's open proposal + // survives a background run. Active proposals whose instrument fell out of the + // set (resolved, dropped below threshold, dominant lab rejected) are pruned at + // the end. + let mut tx = pool.begin().await?; + let mut active_ids: Vec = Vec::new(); + + for (si_id, mut labs) in by_instr { + rep.instruments += 1; + if accepted.contains(&si_id) { + continue; + } + let (obs_total, citizens_total, conf_level, recency) = totals.get(&si_id).copied().unwrap_or((0, 0, 0.7, 0.5)); + if obs_total < cfg.min_observations { + continue; + } + // Dominant lab: most distinct citizens, then most observations. + labs.sort_by(|a, b| b.2.cmp(&a.2).then(b.1.cmp(&a.1))); + let (lab_name, _obs, citizens) = labs[0].clone(); + if rejected.contains(&(si_id, lab_name.clone())) { + continue; // curator rejected this exact association + } + let agreement = if citizens_total > 0 { citizens as f64 / citizens_total as f64 } else { 0.0 }; + let conflict = agreement < cfg.agreement_ratio; + if conflict { + rep.conflicts += 1; + } + let score = confidence(cfg, obs_total, citizens_total, conf_level, recency); + let ready = !conflict && obs_total >= cfg.ready_for_review && citizens_total >= cfg.min_distinct_citizens; + + if cfg.auto_accept + && !conflict + && obs_total >= cfg.auto_accept_threshold + && citizens_total >= cfg.min_distinct_citizens + { + let lab_id = get_or_create_lab(&mut tx, &lab_name, false).await?; + sqlx::query("UPDATE genomics.sequencer_instrument SET lab_id = $2 WHERE id = $1") + .bind(si_id) + .bind(lab_id) + .execute(&mut *tx) + .await?; + sqlx::query( + "INSERT INTO genomics.instrument_association_proposal \ + (instrument_id, proposed_lab_name, observation_count, distinct_citizen_count, \ + confidence_score, status, accepted_lab_id, accepted_instrument_id) \ + VALUES ($1, $2, $3, $4, $5::float8::numeric, 'ACCEPTED', $6, $1)", + ) + .bind(si_id) + .bind(&lab_name) + .bind(obs_total as i32) + .bind(citizens_total as i32) + .bind(score) + .bind(lab_id) + .execute(&mut *tx) + .await?; + rep.auto_accepted += 1; + continue; + } + + let status = if ready { "READY_FOR_REVIEW" } else { "PENDING" }; + sqlx::query( + "INSERT INTO genomics.instrument_association_proposal \ + (instrument_id, proposed_lab_name, proposed_model, observation_count, \ + distinct_citizen_count, confidence_score, status) \ + VALUES ($1, $2, (SELECT model_name FROM genomics.sequencer_instrument WHERE id = $1), \ + $3, $4, $5::float8::numeric, $6) \ + ON CONFLICT (instrument_id) WHERE status IN ('PENDING','READY_FOR_REVIEW') \ + DO UPDATE SET proposed_lab_name = EXCLUDED.proposed_lab_name, \ + proposed_model = EXCLUDED.proposed_model, \ + observation_count = EXCLUDED.observation_count, \ + distinct_citizen_count = EXCLUDED.distinct_citizen_count, \ + confidence_score = EXCLUDED.confidence_score, status = EXCLUDED.status", + ) + .bind(si_id) + .bind(&lab_name) + .bind(obs_total as i32) + .bind(citizens_total as i32) + .bind(score) + .bind(status) + .execute(&mut *tx) + .await?; + active_ids.push(si_id); + rep.proposals_active += 1; + if ready { + rep.proposals_ready += 1; + } + } + // Drop active proposals for instruments no longer proposing. `<> ALL('{}')` + // is true for every row, so an empty active set clears them all. + sqlx::query( + "DELETE FROM genomics.instrument_association_proposal \ + WHERE status IN ('PENDING','READY_FOR_REVIEW') AND instrument_id <> ALL($1)", + ) + .bind(&active_ids) + .execute(&mut *tx) + .await?; + tx.commit().await?; + Ok(rep) +} + +/// A proposal for the curator queue. +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct ProposalView { + pub id: i64, + pub instrument_id: String, + pub proposed_lab_name: Option, + pub proposed_model: Option, + pub observation_count: i32, + pub distinct_citizen_count: i32, + pub confidence_score: Option, + pub status: String, +} + +/// A supporting observation (proposal detail). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct ObservationView { + pub lab_name: Option, + pub biosample_ref: Option, + pub platform: Option, + pub instrument_model: Option, + pub repo_did: Option, + pub confidence: Option, +} + +/// Paginated proposal list, optionally filtered by status, ranked by confidence. +pub async fn list_proposals( + pool: &PgPool, + status: Option<&str>, + page: i64, + page_size: i64, +) -> Result, DbError> { + let offset = Page::<()>::offset(page, page_size); + let limit = page_size.clamp(1, 200); + let items: Vec = sqlx::query_as( + "SELECT p.id, si.instrument_id, p.proposed_lab_name, p.proposed_model, \ + p.observation_count, p.distinct_citizen_count, p.confidence_score::float8 AS confidence_score, p.status \ + FROM genomics.instrument_association_proposal p \ + JOIN genomics.sequencer_instrument si ON si.id = p.instrument_id \ + WHERE ($1::text IS NULL OR p.status = $1) \ + ORDER BY p.confidence_score DESC NULLS LAST, p.id \ + LIMIT $2 OFFSET $3", + ) + .bind(status) + .bind(limit) + .bind(offset) + .fetch_all(pool) + .await?; + let total: i64 = sqlx::query_scalar( + "SELECT count(*) FROM genomics.instrument_association_proposal WHERE ($1::text IS NULL OR status = $1)", + ) + .bind(status) + .fetch_one(pool) + .await?; + Ok(Page { items, total, page: page.max(1), page_size: limit }) +} + +/// A proposal with its supporting observations. +pub async fn proposal_detail(pool: &PgPool, id: i64) -> Result)>, DbError> { + let Some(p): Option = sqlx::query_as( + "SELECT p.id, si.instrument_id, p.proposed_lab_name, p.proposed_model, \ + p.observation_count, p.distinct_citizen_count, p.confidence_score::float8 AS confidence_score, p.status \ + FROM genomics.instrument_association_proposal p \ + JOIN genomics.sequencer_instrument si ON si.id = p.instrument_id \ + WHERE p.id = $1", + ) + .bind(id) + .fetch_optional(pool) + .await? + else { + return Ok(None); + }; + let obs: Vec = sqlx::query_as( + "SELECT o.lab_name, o.biosample_ref, o.platform, o.instrument_model, \ + o.atproto->>'repo_did' AS repo_did, o.confidence \ + FROM genomics.instrument_observation o \ + WHERE o.instrument_id = (SELECT instrument_id FROM genomics.instrument_association_proposal WHERE id = $1) \ + ORDER BY o.lab_name, o.id", + ) + .bind(id) + .fetch_all(pool) + .await?; + Ok(Some((p, obs))) +} + +/// Accept a proposal: get-or-create the lab, set the instrument's `lab_id` (which +/// is what `lookup_lab` resolves), and mark the proposal `ACCEPTED`. The lab name +/// / model may be curator-overridden. Returns the resolved association. +pub async fn accept_proposal( + pool: &PgPool, + proposal_id: i64, + user_id: Uuid, + lab_name: &str, + manufacturer: Option<&str>, + model: Option<&str>, + is_d2c: Option, +) -> Result { + let mut tx = pool.begin().await?; + let si_id: i64 = sqlx::query_scalar( + "SELECT instrument_id FROM genomics.instrument_association_proposal WHERE id = $1 FOR UPDATE", + ) + .bind(proposal_id) + .fetch_optional(&mut *tx) + .await? + .ok_or_else(|| DbError::Conflict(format!("proposal {proposal_id} not found")))?; + + let lab_id = get_or_create_lab(&mut tx, lab_name, is_d2c.unwrap_or(false)).await?; + // Only touch an existing lab's d2c flag when the curator explicitly set it — an + // omitted flag must not silently clear a preseeded lab's is_d2c (which would + // mislabel every other instrument tied to that lab). + if let Some(d2c) = is_d2c { + sqlx::query("UPDATE genomics.sequencing_lab SET is_d2c = $2 WHERE id = $1") + .bind(lab_id) + .bind(d2c) + .execute(&mut *tx) + .await?; + } + sqlx::query( + "UPDATE genomics.sequencer_instrument \ + SET lab_id = $2, manufacturer = COALESCE($3, manufacturer), model_name = COALESCE($4, model_name) \ + WHERE id = $1", + ) + .bind(si_id) + .bind(lab_id) + .bind(manufacturer) + .bind(model) + .execute(&mut *tx) + .await?; + sqlx::query( + "UPDATE genomics.instrument_association_proposal \ + SET status = 'ACCEPTED', proposed_lab_name = $2, accepted_lab_id = $3, accepted_instrument_id = $1 \ + WHERE id = $4", + ) + .bind(si_id) + .bind(lab_name) + .bind(lab_id) + .bind(proposal_id) + .execute(&mut *tx) + .await?; + let hit: LabLookup = sqlx::query_as( + "SELECT si.instrument_id, sl.name AS lab_name, sl.is_d2c, sl.website_url, si.manufacturer, si.model_name \ + FROM genomics.sequencer_instrument si JOIN genomics.sequencing_lab sl ON sl.id = si.lab_id \ + WHERE si.id = $1", + ) + .bind(si_id) + .fetch_one(&mut *tx) + .await?; + // Audit in the same transaction — the decision and its trail commit together. + let new = json!({ "instrument_id": hit.instrument_id, "lab_name": hit.lab_name, "is_d2c": hit.is_d2c }); + crate::audit::log(&mut *tx, user_id, "instrument_proposal", proposal_id, "ACCEPT", None, Some(&new), None).await?; + tx.commit().await?; + Ok(hit) +} + +/// Reject a proposal (the dominant lab won't be re-proposed for this instrument). +/// Returns the (instrument_id, proposed_lab_name) for the audit comment, or `None` +/// if the proposal isn't in a reviewable state. +pub async fn reject_proposal( + pool: &PgPool, + proposal_id: i64, + user_id: Uuid, + reason: Option<&str>, +) -> Result)>, DbError> { + let mut tx = pool.begin().await?; + let row: Option<(String, Option)> = sqlx::query_as( + "UPDATE genomics.instrument_association_proposal p \ + SET status = 'REJECTED' \ + FROM genomics.sequencer_instrument si \ + WHERE p.id = $1 AND si.id = p.instrument_id AND p.status IN ('PENDING','READY_FOR_REVIEW') \ + RETURNING si.instrument_id, p.proposed_lab_name", + ) + .bind(proposal_id) + .fetch_optional(&mut *tx) + .await?; + if let Some((ref instrument, ref lab)) = row { + // Audit in the same transaction as the status change. + let new = json!({ "instrument_id": instrument, "rejected_lab": lab }); + crate::audit::log(&mut *tx, user_id, "instrument_proposal", proposal_id, "REJECT", None, Some(&new), reason).await?; + } + tx.commit().await?; + Ok(row) +} diff --git a/rust/crates/du-db/src/study.rs b/rust/crates/du-db/src/study.rs new file mode 100644 index 00000000..af69e05a --- /dev/null +++ b/rust/crates/du-db/src/study.rs @@ -0,0 +1,85 @@ +//! Genomic studies with their linked biosamples. There is no direct study→ +//! sample edge in the redesign; samples reach a study through its publications +//! (`pubs.publication_study` → `pubs.publication_biosample` → `core.biosample`). + +use crate::DbError; +use sqlx::PgPool; + +/// A study plus a brief list of its samples (as a JSONB array of +/// `{sample_guid, accession, source}`). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct StudyWithSamples { + pub id: i64, + pub accession: String, + pub title: Option, + pub center_name: Option, + pub samples: serde_json::Value, +} + +/// An ENA study that still lacks enriched metadata (title/center). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct EnaCandidate { + pub id: i64, + pub accession: String, +} + +/// ENA studies missing a title or center name (oldest first), capped at `limit`. +/// The enrichment job fetches these from the ENA portal and fills the gaps. +pub async fn needing_ena_enrichment(pool: &PgPool, limit: i64) -> Result, DbError> { + let rows: Vec = sqlx::query_as( + "SELECT id, accession FROM pubs.genomic_study \ + WHERE source = 'ENA' AND (title IS NULL OR center_name IS NULL) \ + ORDER BY id LIMIT $1", + ) + .bind(limit.clamp(1, 500)) + .fetch_all(pool) + .await?; + Ok(rows) +} + +/// Fill a study's gaps from ENA metadata. `COALESCE` only fills empty columns +/// (never clobbers curated values); `first_public` lands in `details` since it is +/// not the same as `submission_date`. Returns whether a row was updated. +pub async fn apply_ena_metadata( + pool: &PgPool, + id: i64, + title: Option<&str>, + center_name: Option<&str>, + first_public: Option, +) -> Result { + let n = sqlx::query( + "UPDATE pubs.genomic_study SET \ + title = COALESCE(title, $2), \ + center_name = COALESCE(center_name, $3), \ + details = CASE WHEN $4::date IS NOT NULL \ + THEN jsonb_set(details, '{ena_first_public}', to_jsonb($4::date::text)) \ + ELSE details END \ + WHERE id = $1", + ) + .bind(id) + .bind(title) + .bind(center_name) + .bind(first_public) + .execute(pool) + .await? + .rows_affected(); + Ok(n > 0) +} + +pub async fn with_samples(pool: &PgPool) -> Result, DbError> { + let rows: Vec = sqlx::query_as( + "SELECT s.id, s.accession, s.title, s.center_name, \ + COALESCE(jsonb_agg(DISTINCT jsonb_build_object( \ + 'sample_guid', b.sample_guid, 'accession', b.accession, 'source', b.source::text)) \ + FILTER (WHERE b.sample_guid IS NOT NULL), '[]'::jsonb) AS samples \ + FROM pubs.genomic_study s \ + LEFT JOIN pubs.publication_study ps ON ps.study_id = s.id \ + LEFT JOIN pubs.publication_biosample pb ON pb.publication_id = ps.publication_id \ + LEFT JOIN core.biosample b ON b.sample_guid = pb.sample_guid AND b.deleted = false \ + GROUP BY s.id, s.accession, s.title, s.center_name \ + ORDER BY s.accession", + ) + .fetch_all(pool) + .await?; + Ok(rows) +} diff --git a/rust/crates/du-db/src/support.rs b/rust/crates/du-db/src/support.rs new file mode 100644 index 00000000..29b037f6 --- /dev/null +++ b/rust/crates/du-db/src/support.rs @@ -0,0 +1,34 @@ +//! Support / contact messages (`support.contact_message`). The public contact +//! form inserts here; curators/admins triage later (status new→read→replied→closed). + +use crate::DbError; +use sqlx::PgPool; +use uuid::Uuid; + +/// A contact-form submission. `user_id` is set when the sender is signed in. +pub struct NewContactMessage<'a> { + pub user_id: Option, + pub sender_name: Option<&'a str>, + pub sender_email: Option<&'a str>, + pub subject: Option<&'a str>, + pub message: &'a str, + pub ip_address_hash: Option<&'a str>, +} + +/// Store a contact message; returns its id. +pub async fn create_message(pool: &PgPool, m: &NewContactMessage<'_>) -> Result { + let id: Uuid = sqlx::query_scalar( + "INSERT INTO support.contact_message \ + (user_id, sender_name, sender_email, subject, message, ip_address_hash) \ + VALUES ($1, $2, $3, $4, $5, $6) RETURNING id", + ) + .bind(m.user_id) + .bind(m.sender_name) + .bind(m.sender_email) + .bind(m.subject) + .bind(m.message) + .bind(m.ip_address_hash) + .fetch_one(pool) + .await?; + Ok(id) +} diff --git a/rust/crates/du-db/src/test_type.rs b/rust/crates/du-db/src/test_type.rs new file mode 100644 index 00000000..55ac8d30 --- /dev/null +++ b/rust/crates/du-db/src/test_type.rs @@ -0,0 +1,49 @@ +//! Test-type taxonomy reads (`genomics.test_type_definition`) joined to the +//! empirical coverage norm (`genomics.test_type_coverage_norm`, see +//! [`crate::coverage`]). Read-only reference data for the Edge/Navigator: the +//! advertised capabilities + the cohort's typical coverage for each test type. + +use crate::DbError; +use sqlx::PgPool; + +/// A test type's definition plus its empirical coverage norm (when computed). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct TestTypeInfo { + pub code: String, + pub display_name: String, + pub category: String, + pub vendor: Option, + pub target_type: Option, + /// Advertised minimum depth (curator/ETL-set). + pub expected_min_depth: Option, + pub supports_haplogroup_y: bool, + pub supports_haplogroup_mt: bool, + pub supports_autosomal_ibd: bool, + pub supports_ancestry: bool, + pub typical_file_formats: Vec, + /// Empirical norm (federated cohort): samples observed + typical depth/coverage. + pub norm_sample_count: Option, + pub norm_median_depth: Option, + pub norm_median_pct_30x: Option, +} + +const SELECT: &str = "SELECT ttd.code, ttd.display_name, ttd.category::text AS category, ttd.vendor, \ + ttd.target_type::text AS target_type, ttd.expected_min_depth, ttd.supports_haplogroup_y, \ + ttd.supports_haplogroup_mt, ttd.supports_autosomal_ibd, ttd.supports_ancestry, ttd.typical_file_formats, \ + n.sample_count AS norm_sample_count, n.median_mean_depth AS norm_median_depth, \ + n.median_pct_30x AS norm_median_pct_30x \ + FROM genomics.test_type_definition ttd \ + LEFT JOIN genomics.test_type_coverage_norm n ON n.test_type = ttd.code"; + +/// All test types, ordered by code. +pub async fn list(pool: &PgPool) -> Result, DbError> { + Ok(sqlx::query_as(&format!("{SELECT} ORDER BY ttd.code")).fetch_all(pool).await?) +} + +/// One test type by its code. +pub async fn get(pool: &PgPool, code: &str) -> Result, DbError> { + Ok(sqlx::query_as(&format!("{SELECT} WHERE ttd.code = $1")) + .bind(code) + .fetch_optional(pool) + .await?) +} diff --git a/rust/crates/du-db/src/testing.rs b/rust/crates/du-db/src/testing.rs new file mode 100644 index 00000000..c9074577 --- /dev/null +++ b/rust/crates/du-db/src/testing.rs @@ -0,0 +1,101 @@ +//! Test-only support: ephemeral, freshly-migrated databases. +//! +//! Destructive integration tests (full-mirror YBrowse reconcile, source-scoped +//! deletes) must never run against the shared `decodingus` dev database. This +//! module creates a uniquely-named database on the SAME server as a given +//! `DATABASE_URL`, migrates it, and drops it (`WITH (FORCE)`) when the guard is +//! dropped — so each such test gets a private, throwaway catalog. +//! +//! Public (rather than `#[cfg(test)]`) so integration tests in sibling crates +//! (e.g. du-jobs) can share it; it pulls in no extra dependencies and is unused +//! in production builds. + +use crate::{connect, run_migrations, DbError, PgPool}; +use sqlx::Connection; + +/// An isolated database that drops itself on `Drop`. +pub struct EphemeralDb { + pool: PgPool, + /// Maintenance-db DSN (`.../postgres`) used to create and later drop us. + admin_url: String, + db_name: String, +} + +impl EphemeralDb { + /// A pool connected to the isolated database. + pub fn pool(&self) -> &PgPool { + &self.pool + } +} + +impl Drop for EphemeralDb { + fn drop(&mut self) { + // Drop can't be async, and our own pool may still hold connections, so + // tear down on a throwaway thread+runtime with `WITH (FORCE)` (PG13+), + // which terminates lingering backends. Runs even on test panic. + let admin_url = self.admin_url.clone(); + let db_name = self.db_name.clone(); + let _ = std::thread::spawn(move || { + let Ok(rt) = tokio::runtime::Builder::new_current_thread().enable_all().build() else { + return; + }; + rt.block_on(async move { + if let Ok(mut conn) = sqlx::postgres::PgConnection::connect(&admin_url).await { + let _ = sqlx::query(&format!("DROP DATABASE IF EXISTS \"{db_name}\" WITH (FORCE)")) + .execute(&mut conn) + .await; + let _ = conn.close().await; + } + }); + }) + .join(); + } +} + +/// Split a `postgres://…/db?query` DSN into (everything up to the db name, db, +/// optional query) so we can swap the database segment. +fn split_dsn(dsn: &str) -> Option<(String, String)> { + let (base, query) = match dsn.split_once('?') { + Some((b, q)) => (b, Some(q)), + None => (dsn, None), + }; + let (host_part, _db) = base.rsplit_once('/')?; + let suffix = query.map(|q| format!("?{q}")).unwrap_or_default(); + Some((host_part.to_string(), suffix)) +} + +/// Create + migrate a private database on the same server as `base_url` +/// (a normal `DATABASE_URL`). The returned guard drops the database when it +/// goes out of scope. +pub async fn ephemeral_db(base_url: &str) -> Result { + let (host_part, query_suffix) = split_dsn(base_url) + .ok_or_else(|| DbError::Decode(format!("DATABASE_URL missing db path: {base_url:?}")))?; + + // Unique name without an RNG dep: pid + nanos + a process-wide counter, so + // tests running concurrently in one binary can't collide on a coarse clock. + use std::sync::atomic::{AtomicU64, Ordering}; + static SEQ: AtomicU64 = AtomicU64::new(0); + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + let seq = SEQ.fetch_add(1, Ordering::Relaxed); + let db_name = format!("du_test_{}_{}_{}", std::process::id(), nanos, seq); + + let admin_url = format!("{host_part}/postgres{query_suffix}"); + let new_url = format!("{host_part}/{db_name}{query_suffix}"); + + // CREATE DATABASE can't run inside a transaction — use a single connection. + let mut admin = sqlx::postgres::PgConnection::connect(&admin_url).await?; + sqlx::query(&format!("CREATE DATABASE \"{db_name}\"")) + .execute(&mut admin) + .await?; + admin.close().await?; + + // Small pool — these tests issue sequential queries; keeps the total + // connection count low when many test binaries run in parallel. + let pool = connect(&new_url, 2).await?; + run_migrations(&pool).await?; + + Ok(EphemeralDb { pool, admin_url, db_name }) +} diff --git a/rust/crates/du-db/src/tree_revision.rs b/rust/crates/du-db/src/tree_revision.rs new file mode 100644 index 00000000..dd12365e --- /dev/null +++ b/rust/crates/du-db/src/tree_revision.rs @@ -0,0 +1,35 @@ +//! Tree revision marker — the cache-revalidation token (ETag source) for the +//! haplogroup-tree API endpoints. A single persisted, monotonic counter +//! (`tree.tree_revision`, migration 0024) bumped explicitly by tree-mutating +//! operations (change-set apply, coordinate enrichment, YBrowse reconcile, +//! tree-init build) — not by a per-row trigger, to keep the hot per-variant +//! write path free. + +use crate::DbError; +use sqlx::types::chrono::{DateTime, Utc}; +use sqlx::PgPool; + +/// The current tree revision and when it last changed. +pub async fn current(pool: &PgPool) -> Result<(i64, DateTime), DbError> { + let row: (i64, DateTime) = + sqlx::query_as("SELECT revision, updated_at FROM tree.tree_revision WHERE id = 1") + .fetch_one(pool) + .await?; + Ok(row) +} + +/// Bump the tree revision (+1) and return the new value. Generic over the +/// executor so it runs standalone (`&PgPool`) or inside the mutating transaction +/// it marks (`&mut *tx`) — bumping in-txn keeps the marker atomic with the change. +pub async fn bump<'e, E>(executor: E) -> Result +where + E: sqlx::PgExecutor<'e>, +{ + let rev: i64 = sqlx::query_scalar( + "UPDATE tree.tree_revision SET revision = revision + 1, updated_at = now() \ + WHERE id = 1 RETURNING revision", + ) + .fetch_one(executor) + .await?; + Ok(rev) +} diff --git a/rust/crates/du-db/src/tree_sample.rs b/rust/crates/du-db/src/tree_sample.rs new file mode 100644 index 00000000..8796f771 --- /dev/null +++ b/rust/crates/du-db/src/tree_sample.rs @@ -0,0 +1,289 @@ +//! YFull-style sample placement: attach non-D2C biosamples as leaves under the tree node +//! their published haplogroup call resolves to. A biosample's call is the paper's stated +//! Y/mt haplogroup (`core.biosample.original_haplogroups`); resolution to a `tree.haplogroup` +//! node reuses [`crate::haplogroup::resolve_name_or_variant`] (name → alias → defining-variant +//! → normalization). D2C (`source = 'CITIZEN'`) samples are excluded; an unresolvable call is +//! kept `UNPLACED` for curator triage. Mirrors the advisory-locked declarative-recompute +//! discipline of the coverage/IBD engines. + +use crate::{pg_enum_label, DbError}; +use du_domain::enums::DnaType; +use serde_json::Value; +use sqlx::PgPool; +use std::collections::HashMap; +use uuid::Uuid; + +/// Advisory-lock key guarding concurrent placement recomputes. +const PLACEMENT_ADVISORY_KEY: i64 = 0x4C45_4146_5359; // "LEAFSY" + +/// Outcome of [`recompute_placements`]. +#[derive(Debug, Default, Clone)] +pub struct PlacementReport { + pub placed: u64, + pub unplaced: u64, +} + +/// The `original_haplogroups` JSONB keys carrying a DNA type's call (primary, fallback). +fn call_keys(dna_type: DnaType) -> (&'static str, &'static str) { + match dna_type { + DnaType::YDna => ("y", "y_result"), + DnaType::MtDna => ("mt", "mt_result"), + } +} + +/// Recompute leaf placements for `dna_type` from the current non-D2C biosample calls and the +/// current tree. Single-flighted by an advisory lock (a second caller no-ops); unlocks on every +/// path. Declarative (assign placements, prune samples that no longer qualify); bumps +/// `tree_revision` since placements feed the cached tree's per-node count. +pub async fn recompute_placements(pool: &PgPool, dna_type: DnaType) -> Result { + let mut lock = pool.acquire().await?; + let locked: bool = sqlx::query_scalar("SELECT pg_try_advisory_lock($1)") + .bind(PLACEMENT_ADVISORY_KEY) + .fetch_one(&mut *lock) + .await?; + if !locked { + return Ok(PlacementReport::default()); + } + let result = recompute_placements_locked(pool, dna_type).await; + let _ = sqlx::query("SELECT pg_advisory_unlock($1)") + .bind(PLACEMENT_ADVISORY_KEY) + .execute(&mut *lock) + .await; + result +} + +async fn recompute_placements_locked(pool: &PgPool, dna_type: DnaType) -> Result { + let (primary, fallback) = call_keys(dna_type); + + // Non-D2C, non-deleted samples + their published calls (the leaf candidates). + let rows: Vec<(Uuid, Value)> = sqlx::query_as( + "SELECT sample_guid, original_haplogroups FROM core.biosample \ + WHERE source <> 'CITIZEN'::core.biosample_source AND deleted = false", + ) + .fetch_all(pool) + .await?; + + // Curator-placed rows (status CURATED) are decisions we never auto-overwrite. + let dna_label = pg_enum_label(&dna_type)?; + let curated: std::collections::HashSet = sqlx::query_scalar::<_, Uuid>( + "SELECT sample_guid FROM tree.haplogroup_sample WHERE dna_type::text = $1 AND status = 'CURATED'", + ) + .bind(&dna_label) + .fetch_all(pool) + .await? + .into_iter() + .collect(); + + // Resolve each sample's call to a node id, caching by call text (calls repeat heavily). + let mut cache: HashMap> = HashMap::new(); + let mut processed: Vec = Vec::new(); + let mut placements: Vec<(Uuid, Option, String)> = Vec::new(); + let mut report = PlacementReport::default(); + for (guid, arr) in rows { + // Preserve curator placements: keep the row (protect from prune), don't re-resolve. + if curated.contains(&guid) { + processed.push(guid); + continue; + } + let Some(call) = crate::biosample::pick_original_call(&arr, primary, fallback) else { + continue; // no published call for this DNA type — not a leaf + }; + let node_id = match cache.get(&call) { + Some(id) => *id, + None => { + let id = match crate::haplogroup::resolve_name_or_variant(pool, &call, dna_type).await? { + Some(name) => crate::haplogroup::get_by_name(pool, &name, dna_type).await?.map(|h| h.id.0), + None => None, + }; + cache.insert(call.clone(), id); + id + } + }; + if node_id.is_some() { + report.placed += 1; + } else { + report.unplaced += 1; + } + processed.push(guid); + placements.push((guid, node_id, call)); + } + + let dna = &dna_label; + let mut tx = pool.begin().await?; + for (guid, node_id, call) in &placements { + let status = if node_id.is_some() { "PLACED" } else { "UNPLACED" }; + sqlx::query( + "INSERT INTO tree.haplogroup_sample (sample_guid, dna_type, haplogroup_id, call_text, status, refreshed_at) \ + VALUES ($1, $2::core.dna_type, $3, $4, $5, now()) \ + ON CONFLICT (sample_guid, dna_type) DO UPDATE SET \ + haplogroup_id = EXCLUDED.haplogroup_id, call_text = EXCLUDED.call_text, \ + status = EXCLUDED.status, refreshed_at = now()", + ) + .bind(guid) + .bind(dna) + .bind(node_id) + .bind(call) + .bind(status) + .execute(&mut *tx) + .await?; + } + // Prune placements whose sample no longer qualifies (deleted / now CITIZEN / lost its call). + sqlx::query("DELETE FROM tree.haplogroup_sample WHERE dna_type::text = $1 AND sample_guid <> ALL($2)") + .bind(dna) + .bind(&processed) + .execute(&mut *tx) + .await?; + crate::tree_revision::bump(&mut *tx).await?; + tx.commit().await?; + Ok(report) +} + +/// Direct PLACED sample counts per haplogroup node id (the serving rollup input — callers +/// sum cumulatively over the subtree). +pub async fn counts_by_node(pool: &PgPool, dna_type: DnaType) -> Result, DbError> { + let rows: Vec<(i64, i64)> = sqlx::query_as( + "SELECT haplogroup_id, count(*)::bigint FROM tree.haplogroup_sample \ + WHERE dna_type::text = $1 AND status IN ('PLACED','CURATED') AND haplogroup_id IS NOT NULL \ + GROUP BY haplogroup_id", + ) + .bind(pg_enum_label(&dna_type)?) + .fetch_all(pool) + .await?; + Ok(rows.into_iter().collect()) +} + +/// Cumulative PLACED counts: node id → number of samples at or below it, rolled up over the +/// **whole** tree (each placed sample climbs to every ancestor). Used by the depth-bounded web +/// cladogram, where a visible node may have placed descendants below the window. +pub async fn cumulative_counts(pool: &PgPool, dna_type: DnaType) -> Result, DbError> { + let rows: Vec<(i64, i64)> = sqlx::query_as( + "WITH RECURSIVE anc(start, id) AS ( \ + SELECT haplogroup_id, haplogroup_id FROM tree.haplogroup_sample \ + WHERE dna_type::text = $1 AND status IN ('PLACED','CURATED') AND haplogroup_id IS NOT NULL \ + UNION ALL \ + SELECT a.start, r.parent_haplogroup_id FROM anc a \ + JOIN tree.haplogroup_relationship r ON r.child_haplogroup_id = a.id AND r.valid_until IS NULL \ + ) \ + SELECT id, count(*)::bigint FROM anc GROUP BY id", + ) + .bind(pg_enum_label(&dna_type)?) + .fetch_all(pool) + .await?; + Ok(rows.into_iter().collect()) +} + +/// A placed leaf sample under a node (pseudonymous-safe: accession/alias + optional citation). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct LeafSample { + pub sample_guid: Uuid, + pub accession: Option, + pub alias: Option, + pub source: String, + pub pub_title: Option, + pub pub_doi: Option, + pub pub_url: Option, +} + +/// Placed-sample leaf labels per node id (samples sitting **directly** at the node), for the +/// cladogram's YFull-style tips. Label = accession, else alias, else the short guid. Bounded +/// to `node_ids` (the rendered window); caller caps per node for display. +pub async fn direct_labels(pool: &PgPool, dna_type: DnaType, node_ids: &[i64]) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT hs.haplogroup_id, COALESCE(b.accession, b.alias, left(b.sample_guid::text, 8)) AS label \ + FROM tree.haplogroup_sample hs \ + JOIN core.biosample b ON b.sample_guid = hs.sample_guid \ + WHERE hs.dna_type::text = $1 AND hs.status IN ('PLACED','CURATED') \ + AND hs.haplogroup_id = ANY($2) AND b.deleted = false \ + ORDER BY hs.haplogroup_id, label", + ) + .bind(pg_enum_label(&dna_type)?) + .bind(node_ids) + .fetch_all(pool) + .await?) +} + +/// An unresolved published call awaiting curator triage (no node matched). +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct UnplacedRow { + pub sample_guid: Uuid, + pub call_text: String, + pub accession: Option, + pub alias: Option, +} + +/// Samples whose published call didn't resolve to a node (`status = UNPLACED`) — the curator +/// triage queue, grouped naturally by `call_text`. +pub async fn unplaced(pool: &PgPool, dna_type: DnaType, limit: i64) -> Result, DbError> { + Ok(sqlx::query_as( + "SELECT hs.sample_guid, hs.call_text, b.accession, b.alias \ + FROM tree.haplogroup_sample hs \ + JOIN core.biosample b ON b.sample_guid = hs.sample_guid \ + WHERE hs.dna_type::text = $1 AND hs.status = 'UNPLACED' \ + ORDER BY hs.call_text, b.accession NULLS LAST LIMIT $2", + ) + .bind(pg_enum_label(&dna_type)?) + .bind(limit) + .fetch_all(pool) + .await?) +} + +/// Curator manual placement: resolve `node_name` and pin the sample's placement to it +/// (`status = CURATED` — a decision the recompute preserves, unlike auto `PLACED`). Bumps +/// `tree_revision` (the count changes). Returns `false` if the node or the sample's placement +/// row doesn't exist. +pub async fn place_sample(pool: &PgPool, sample_guid: Uuid, dna_type: DnaType, node_name: &str) -> Result { + let Some(name) = crate::haplogroup::resolve_name_or_variant(pool, node_name, dna_type).await? else { + return Ok(false); + }; + let Some(node) = crate::haplogroup::get_by_name(pool, &name, dna_type).await? else { + return Ok(false); + }; + let mut tx = pool.begin().await?; + let affected = sqlx::query( + "UPDATE tree.haplogroup_sample SET haplogroup_id = $1, status = 'CURATED', refreshed_at = now() \ + WHERE sample_guid = $2 AND dna_type::text = $3", + ) + .bind(node.id.0) + .bind(sample_guid) + .bind(pg_enum_label(&dna_type)?) + .execute(&mut *tx) + .await? + .rows_affected(); + if affected > 0 { + crate::tree_revision::bump(&mut *tx).await?; + } + tx.commit().await?; + Ok(affected > 0) +} + +/// The placed samples at-or-below a node (the YFull "open a clade → see its samples" list), +/// with each sample's most recent linked publication when present. +pub async fn samples_under(pool: &PgPool, node_name: &str, dna_type: DnaType) -> Result, DbError> { + Ok(sqlx::query_as( + "WITH RECURSIVE sub AS ( \ + SELECT id FROM tree.haplogroup \ + WHERE name = $1 AND haplogroup_type::text = $2 AND valid_until IS NULL \ + UNION ALL \ + SELECT r.child_haplogroup_id FROM tree.haplogroup_relationship r \ + JOIN sub ON r.parent_haplogroup_id = sub.id \ + WHERE r.valid_until IS NULL \ + ) \ + SELECT b.sample_guid, b.accession, b.alias, b.source::text AS source, \ + p.title AS pub_title, p.doi AS pub_doi, p.url AS pub_url \ + FROM tree.haplogroup_sample hs \ + JOIN sub ON sub.id = hs.haplogroup_id \ + JOIN core.biosample b ON b.sample_guid = hs.sample_guid \ + LEFT JOIN LATERAL ( \ + SELECT pub.title, pub.doi, pub.url FROM pubs.publication_biosample pb \ + JOIN pubs.publication pub ON pub.id = pb.publication_id \ + WHERE pb.sample_guid = b.sample_guid \ + ORDER BY pub.publication_date DESC NULLS LAST LIMIT 1 \ + ) p ON true \ + WHERE hs.dna_type::text = $2 AND hs.status IN ('PLACED','CURATED') AND b.deleted = false \ + ORDER BY b.accession NULLS LAST, b.sample_guid", + ) + .bind(node_name) + .bind(pg_enum_label(&dna_type)?) + .fetch_all(pool) + .await?) +} diff --git a/rust/crates/du-db/src/variant.rs b/rust/crates/du-db/src/variant.rs new file mode 100644 index 00000000..e2c4008a --- /dev/null +++ b/rust/crates/du-db/src/variant.rs @@ -0,0 +1,556 @@ +//! Queries for `core.variant`. Demonstrates the du-db mapping pattern: +//! enum columns are fetched as `::text` and parsed via serde; JSONB columns are +//! read through `sqlx::types::Json` into the du-domain payload structs. + +use crate::{parse_pg_enum, pg_enum_label, DbError, Page}; +use du_domain::enums::{DnaType, MutationType, NamingStatus}; +use du_domain::ids::VariantId; +use du_domain::variant::{Aliases, Annotations, Coordinates, NewVariant, Variant}; +use sqlx::types::Json; +use sqlx::PgPool; + +#[derive(sqlx::FromRow)] +struct VariantRow { + id: i64, + canonical_name: String, + mutation_type: String, + naming_status: String, + aliases: Json, + coordinates: Json, + annotations: Json, +} + +impl VariantRow { + fn into_domain(self) -> Result { + Ok(Variant { + id: VariantId(self.id), + canonical_name: self.canonical_name, + mutation_type: parse_pg_enum(&self.mutation_type, "mutation_type")?, + naming_status: parse_pg_enum(&self.naming_status, "naming_status")?, + aliases: self.aliases.0, + coordinates: self.coordinates.0, + annotations: self.annotations.0, + }) + } +} + +const SELECT: &str = "SELECT id, canonical_name, mutation_type::text AS mutation_type, \ + naming_status::text AS naming_status, aliases, coordinates, annotations FROM core.variant"; + +pub async fn get_by_id(pool: &PgPool, id: VariantId) -> Result, DbError> { + let row: Option = sqlx::query_as(&format!("{SELECT} WHERE id = $1")) + .bind(id.0) + .fetch_optional(pool) + .await?; + row.map(VariantRow::into_domain).transpose() +} + +/// Create a variant (scalar fields + aliases; coordinates/annotations default +/// empty and are managed elsewhere). Returns the new id. +pub async fn create( + pool: &PgPool, + canonical_name: &str, + mutation_type: MutationType, + naming_status: NamingStatus, + aliases: &Aliases, +) -> Result { + let aliases_json = serde_json::to_value(aliases).map_err(|e| DbError::Decode(e.to_string()))?; + let id: i64 = sqlx::query_scalar( + "INSERT INTO core.variant (canonical_name, mutation_type, naming_status, aliases) \ + VALUES ($1, $2::core.mutation_type, $3::core.naming_status, $4) RETURNING id", + ) + .bind(canonical_name) + .bind(pg_enum_label(&mutation_type)?) + .bind(pg_enum_label(&naming_status)?) + .bind(aliases_json) + .fetch_one(pool) + .await?; + Ok(VariantId(id)) +} + +/// Update a variant's scalar fields + aliases. Coordinates and annotations are +/// left untouched. Returns whether a row was affected. +pub async fn update( + pool: &PgPool, + id: VariantId, + canonical_name: &str, + mutation_type: MutationType, + naming_status: NamingStatus, + aliases: &Aliases, +) -> Result { + let aliases_json = serde_json::to_value(aliases).map_err(|e| DbError::Decode(e.to_string()))?; + let affected = sqlx::query( + "UPDATE core.variant SET canonical_name=$2, mutation_type=$3::core.mutation_type, \ + naming_status=$4::core.naming_status, aliases=$5, updated_at=now() WHERE id=$1", + ) + .bind(id.0) + .bind(canonical_name) + .bind(pg_enum_label(&mutation_type)?) + .bind(pg_enum_label(&naming_status)?) + .bind(aliases_json) + .execute(pool) + .await? + .rows_affected(); + Ok(affected > 0) +} + +/// Region types whose sequence is structurally unreliable for Y-SNP placement +/// (multi-copy / repeat-rich), so a variant landing inside one should not be +/// trusted as branch-defining without scrutiny. AZF intervals are deliberately +/// excluded: that's a functional annotation, and AZFa is largely single-copy +/// X-degenerate sequence. Sourced from `du_jobs::yregions` (T2T-CHM13 Y BEDs). +const UNRELIABLE_REGION_TYPES: [&str; 4] = + ["palindromic", "ampliconic", "inverted_repeat", "heterochromatin"]; + +/// Recompute `annotations.region_overlaps` for every variant from the current +/// `core.genome_region` set, comparing hs1 coordinates (1-based inclusive on +/// both sides). Each entry is `":"},initialize:function(t){c(this,t),this._attributions={}},onAdd:function(t){for(var e in(t.attributionControl=this)._container=P("div","leaflet-control-attribution"),Ie(this._container),t._layers)t._layers[e].getAttribution&&this.addAttribution(t._layers[e].getAttribution());return this._update(),t.on("layeradd",this._addAttribution,this),this._container},onRemove:function(t){t.off("layeradd",this._addAttribution,this)},_addAttribution:function(t){t.layer.getAttribution&&(this.addAttribution(t.layer.getAttribution()),t.layer.once("remove",function(){this.removeAttribution(t.layer.getAttribution())},this))},setPrefix:function(t){return this.options.prefix=t,this._update(),this},addAttribution:function(t){return t&&(this._attributions[t]||(this._attributions[t]=0),this._attributions[t]++,this._update()),this},removeAttribution:function(t){return t&&this._attributions[t]&&(this._attributions[t]--,this._update()),this},_update:function(){if(this._map){var t,e=[];for(t in this._attributions)this._attributions[t]&&e.push(t);var i=[];this.options.prefix&&i.push(this.options.prefix),e.length&&i.push(e.join(", ")),this._container.innerHTML=i.join(' ')}}}),n=(A.mergeOptions({attributionControl:!0}),A.addInitHook(function(){this.options.attributionControl&&(new Ke).addTo(this)}),B.Layers=Ve,B.Zoom=qe,B.Scale=Ge,B.Attribution=Ke,Ue.layers=function(t,e,i){return new Ve(t,e,i)},Ue.zoom=function(t){return new qe(t)},Ue.scale=function(t){return new Ge(t)},Ue.attribution=function(t){return new Ke(t)},et.extend({initialize:function(t){this._map=t},enable:function(){return this._enabled||(this._enabled=!0,this.addHooks()),this},disable:function(){return this._enabled&&(this._enabled=!1,this.removeHooks()),this},enabled:function(){return!!this._enabled}})),ft=(n.addTo=function(t,e){return t.addHandler(e,this),this},{Events:e}),Ye=b.touch?"touchstart mousedown":"mousedown",Xe=it.extend({options:{clickTolerance:3},initialize:function(t,e,i,n){c(this,n),this._element=t,this._dragStartTarget=e||t,this._preventOutline=i},enable:function(){this._enabled||(S(this._dragStartTarget,Ye,this._onDown,this),this._enabled=!0)},disable:function(){this._enabled&&(Xe._dragging===this&&this.finishDrag(!0),k(this._dragStartTarget,Ye,this._onDown,this),this._enabled=!1,this._moved=!1)},_onDown:function(t){var e,i;this._enabled&&(this._moved=!1,ve(this._element,"leaflet-zoom-anim")||(t.touches&&1!==t.touches.length?Xe._dragging===this&&this.finishDrag():Xe._dragging||t.shiftKey||1!==t.which&&1!==t.button&&!t.touches||((Xe._dragging=this)._preventOutline&&Me(this._element),Le(),re(),this._moving||(this.fire("down"),i=t.touches?t.touches[0]:t,e=Ce(this._element),this._startPoint=new p(i.clientX,i.clientY),this._startPos=Pe(this._element),this._parentScale=Ze(e),i="mousedown"===t.type,S(document,i?"mousemove":"touchmove",this._onMove,this),S(document,i?"mouseup":"touchend touchcancel",this._onUp,this)))))},_onMove:function(t){var e;this._enabled&&(t.touches&&1e&&(i.push(t[n]),o=n);oe.max.x&&(i|=2),t.ye.max.y&&(i|=8),i}function ri(t,e,i,n){var o=e.x,e=e.y,s=i.x-o,r=i.y-e,a=s*s+r*r;return 0this._layersMaxZoom&&this.setZoom(this._layersMaxZoom),void 0===this.options.minZoom&&this._layersMinZoom&&this.getZoom()t.y!=n.y>t.y&&t.x<(n.x-i.x)*(t.y-i.y)/(n.y-i.y)+i.x&&(l=!l);return l||yi.prototype._containsPoint.call(this,t,!0)}});var wi=ci.extend({initialize:function(t,e){c(this,e),this._layers={},t&&this.addData(t)},addData:function(t){var e,i,n,o=d(t)?t:t.features;if(o){for(e=0,i=o.length;es.x&&(r=i.x+a-s.x+o.x),i.x-r-n.x<(a=0)&&(r=i.x-n.x),i.y+e+o.y>s.y&&(a=i.y+e-s.y+o.y),i.y-a-n.y<0&&(a=i.y-n.y),(r||a)&&(this.options.keepInView&&(this._autopanning=!0),t.fire("autopanstart").panBy([r,a]))))},_getAnchor:function(){return m(this._source&&this._source._getPopupAnchor?this._source._getPopupAnchor():[0,0])}})),Ii=(A.mergeOptions({closePopupOnClick:!0}),A.include({openPopup:function(t,e,i){return this._initOverlay(Bi,t,e,i).openOn(this),this},closePopup:function(t){return(t=arguments.length?t:this._popup)&&t.close(),this}}),o.include({bindPopup:function(t,e){return this._popup=this._initOverlay(Bi,this._popup,t,e),this._popupHandlersAdded||(this.on({click:this._openPopup,keypress:this._onKeyPress,remove:this.closePopup,move:this._movePopup}),this._popupHandlersAdded=!0),this},unbindPopup:function(){return this._popup&&(this.off({click:this._openPopup,keypress:this._onKeyPress,remove:this.closePopup,move:this._movePopup}),this._popupHandlersAdded=!1,this._popup=null),this},openPopup:function(t){return this._popup&&(this instanceof ci||(this._popup._source=this),this._popup._prepareOpen(t||this._latlng)&&this._popup.openOn(this._map)),this},closePopup:function(){return this._popup&&this._popup.close(),this},togglePopup:function(){return this._popup&&this._popup.toggle(this),this},isPopupOpen:function(){return!!this._popup&&this._popup.isOpen()},setPopupContent:function(t){return this._popup&&this._popup.setContent(t),this},getPopup:function(){return this._popup},_openPopup:function(t){var e;this._popup&&this._map&&(Re(t),e=t.layer||t.target,this._popup._source!==e||e instanceof fi?(this._popup._source=e,this.openPopup(t.latlng)):this._map.hasLayer(this._popup)?this.closePopup():this.openPopup(t.latlng))},_movePopup:function(t){this._popup.setLatLng(t.latlng)},_onKeyPress:function(t){13===t.originalEvent.keyCode&&this._openPopup(t)}}),Ai.extend({options:{pane:"tooltipPane",offset:[0,0],direction:"auto",permanent:!1,sticky:!1,opacity:.9},onAdd:function(t){Ai.prototype.onAdd.call(this,t),this.setOpacity(this.options.opacity),t.fire("tooltipopen",{tooltip:this}),this._source&&(this.addEventParent(this._source),this._source.fire("tooltipopen",{tooltip:this},!0))},onRemove:function(t){Ai.prototype.onRemove.call(this,t),t.fire("tooltipclose",{tooltip:this}),this._source&&(this.removeEventParent(this._source),this._source.fire("tooltipclose",{tooltip:this},!0))},getEvents:function(){var t=Ai.prototype.getEvents.call(this);return this.options.permanent||(t.preclick=this.close),t},_initLayout:function(){var t="leaflet-tooltip "+(this.options.className||"")+" leaflet-zoom-"+(this._zoomAnimated?"animated":"hide");this._contentNode=this._container=P("div",t),this._container.setAttribute("role","tooltip"),this._container.setAttribute("id","leaflet-tooltip-"+h(this))},_updateLayout:function(){},_adjustPan:function(){},_setPosition:function(t){var e,i=this._map,n=this._container,o=i.latLngToContainerPoint(i.getCenter()),i=i.layerPointToContainerPoint(t),s=this.options.direction,r=n.offsetWidth,a=n.offsetHeight,h=m(this.options.offset),l=this._getAnchor(),i="top"===s?(e=r/2,a):"bottom"===s?(e=r/2,0):(e="center"===s?r/2:"right"===s?0:"left"===s?r:i.xthis.options.maxZoom||nthis.options.maxZoom||void 0!==this.options.minZoom&&oi.max.x)||!e.wrapLat&&(t.yi.max.y))return!1}return!this.options.bounds||(e=this._tileCoordsToBounds(t),g(this.options.bounds).overlaps(e))},_keyToBounds:function(t){return this._tileCoordsToBounds(this._keyToTileCoords(t))},_tileCoordsToNwSe:function(t){var e=this._map,i=this.getTileSize(),n=t.scaleBy(i),i=n.add(i);return[e.unproject(n,t.z),e.unproject(i,t.z)]},_tileCoordsToBounds:function(t){t=this._tileCoordsToNwSe(t),t=new s(t[0],t[1]);return t=this.options.noWrap?t:this._map.wrapLatLngBounds(t)},_tileCoordsToKey:function(t){return t.x+":"+t.y+":"+t.z},_keyToTileCoords:function(t){var t=t.split(":"),e=new p(+t[0],+t[1]);return e.z=+t[2],e},_removeTile:function(t){var e=this._tiles[t];e&&(T(e.el),delete this._tiles[t],this.fire("tileunload",{tile:e.el,coords:this._keyToTileCoords(t)}))},_initTile:function(t){M(t,"leaflet-tile");var e=this.getTileSize();t.style.width=e.x+"px",t.style.height=e.y+"px",t.onselectstart=u,t.onmousemove=u,b.ielt9&&this.options.opacity<1&&C(t,this.options.opacity)},_addTile:function(t,e){var i=this._getTilePos(t),n=this._tileCoordsToKey(t),o=this.createTile(this._wrapCoords(t),a(this._tileReady,this,t));this._initTile(o),this.createTile.length<2&&x(a(this._tileReady,this,t,null,o)),Z(o,i),this._tiles[n]={el:o,coords:t,current:!0},e.appendChild(o),this.fire("tileloadstart",{tile:o,coords:t})},_tileReady:function(t,e,i){e&&this.fire("tileerror",{error:e,tile:i,coords:t});var n=this._tileCoordsToKey(t);(i=this._tiles[n])&&(i.loaded=+new Date,this._map._fadeAnimated?(C(i.el,0),r(this._fadeFrame),this._fadeFrame=x(this._updateOpacity,this)):(i.active=!0,this._pruneTiles()),e||(M(i.el,"leaflet-tile-loaded"),this.fire("tileload",{tile:i.el,coords:t})),this._noTilesToLoad()&&(this._loading=!1,this.fire("load"),b.ielt9||!this._map._fadeAnimated?x(this._pruneTiles,this):setTimeout(a(this._pruneTiles,this),250)))},_getTilePos:function(t){return t.scaleBy(this.getTileSize()).subtract(this._level.origin)},_wrapCoords:function(t){var e=new p(this._wrapX?H(t.x,this._wrapX):t.x,this._wrapY?H(t.y,this._wrapY):t.y);return e.z=t.z,e},_pxBoundsToTileRange:function(t){var e=this.getTileSize();return new f(t.min.unscaleBy(e).floor(),t.max.unscaleBy(e).ceil().subtract([1,1]))},_noTilesToLoad:function(){for(var t in this._tiles)if(!this._tiles[t].loaded)return!1;return!0}});var Di=Ni.extend({options:{minZoom:0,maxZoom:18,subdomains:"abc",errorTileUrl:"",zoomOffset:0,tms:!1,zoomReverse:!1,detectRetina:!1,crossOrigin:!1,referrerPolicy:!1},initialize:function(t,e){this._url=t,(e=c(this,e)).detectRetina&&b.retina&&0')}}catch(t){}return function(t){return document.createElement("<"+t+' xmlns="urn:schemas-microsoft.com:vml" class="lvml">')}}(),zt={_initContainer:function(){this._container=P("div","leaflet-vml-container")},_update:function(){this._map._animatingZoom||(Wi.prototype._update.call(this),this.fire("update"))},_initPath:function(t){var e=t._container=Vi("shape");M(e,"leaflet-vml-shape "+(this.options.className||"")),e.coordsize="1 1",t._path=Vi("path"),e.appendChild(t._path),this._updateStyle(t),this._layers[h(t)]=t},_addPath:function(t){var e=t._container;this._container.appendChild(e),t.options.interactive&&t.addInteractiveTarget(e)},_removePath:function(t){var e=t._container;T(e),t.removeInteractiveTarget(e),delete this._layers[h(t)]},_updateStyle:function(t){var e=t._stroke,i=t._fill,n=t.options,o=t._container;o.stroked=!!n.stroke,o.filled=!!n.fill,n.stroke?(e=e||(t._stroke=Vi("stroke")),o.appendChild(e),e.weight=n.weight+"px",e.color=n.color,e.opacity=n.opacity,n.dashArray?e.dashStyle=d(n.dashArray)?n.dashArray.join(" "):n.dashArray.replace(/( *, *)/g," "):e.dashStyle="",e.endcap=n.lineCap.replace("butt","flat"),e.joinstyle=n.lineJoin):e&&(o.removeChild(e),t._stroke=null),n.fill?(i=i||(t._fill=Vi("fill")),o.appendChild(i),i.color=n.fillColor||n.color,i.opacity=n.fillOpacity):i&&(o.removeChild(i),t._fill=null)},_updateCircle:function(t){var e=t._point.round(),i=Math.round(t._radius),n=Math.round(t._radiusY||i);this._setPath(t,t._empty()?"M0 0":"AL "+e.x+","+e.y+" "+i+","+n+" 0,23592600")},_setPath:function(t,e){t._path.v=e},_bringToFront:function(t){fe(t._container)},_bringToBack:function(t){ge(t._container)}},qi=b.vml?Vi:ct,Gi=Wi.extend({_initContainer:function(){this._container=qi("svg"),this._container.setAttribute("pointer-events","none"),this._rootGroup=qi("g"),this._container.appendChild(this._rootGroup)},_destroyContainer:function(){T(this._container),k(this._container),delete this._container,delete this._rootGroup,delete this._svgSize},_update:function(){var t,e,i;this._map._animatingZoom&&this._bounds||(Wi.prototype._update.call(this),e=(t=this._bounds).getSize(),i=this._container,this._svgSize&&this._svgSize.equals(e)||(this._svgSize=e,i.setAttribute("width",e.x),i.setAttribute("height",e.y)),Z(i,t.min),i.setAttribute("viewBox",[t.min.x,t.min.y,e.x,e.y].join(" ")),this.fire("update"))},_initPath:function(t){var e=t._path=qi("path");t.options.className&&M(e,t.options.className),t.options.interactive&&M(e,"leaflet-interactive"),this._updateStyle(t),this._layers[h(t)]=t},_addPath:function(t){this._rootGroup||this._initContainer(),this._rootGroup.appendChild(t._path),t.addInteractiveTarget(t._path)},_removePath:function(t){T(t._path),t.removeInteractiveTarget(t._path),delete this._layers[h(t)]},_updatePath:function(t){t._project(),t._update()},_updateStyle:function(t){var e=t._path,t=t.options;e&&(t.stroke?(e.setAttribute("stroke",t.color),e.setAttribute("stroke-opacity",t.opacity),e.setAttribute("stroke-width",t.weight),e.setAttribute("stroke-linecap",t.lineCap),e.setAttribute("stroke-linejoin",t.lineJoin),t.dashArray?e.setAttribute("stroke-dasharray",t.dashArray):e.removeAttribute("stroke-dasharray"),t.dashOffset?e.setAttribute("stroke-dashoffset",t.dashOffset):e.removeAttribute("stroke-dashoffset")):e.setAttribute("stroke","none"),t.fill?(e.setAttribute("fill",t.fillColor||t.color),e.setAttribute("fill-opacity",t.fillOpacity),e.setAttribute("fill-rule",t.fillRule||"evenodd")):e.setAttribute("fill","none"))},_updatePoly:function(t,e){this._setPath(t,dt(t._parts,e))},_updateCircle:function(t){var e=t._point,i=Math.max(Math.round(t._radius),1),n="a"+i+","+(Math.max(Math.round(t._radiusY),1)||i)+" 0 1,0 ",e=t._empty()?"M0 0":"M"+(e.x-i)+","+e.y+n+2*i+",0 "+n+2*-i+",0 ";this._setPath(t,e)},_setPath:function(t,e){t._path.setAttribute("d",e)},_bringToFront:function(t){fe(t._path)},_bringToBack:function(t){ge(t._path)}});function Ki(t){return b.svg||b.vml?new Gi(t):null}b.vml&&Gi.include(zt),A.include({getRenderer:function(t){t=(t=t.options.renderer||this._getPaneRenderer(t.options.pane)||this.options.renderer||this._renderer)||(this._renderer=this._createRenderer());return this.hasLayer(t)||this.addLayer(t),t},_getPaneRenderer:function(t){var e;return"overlayPane"!==t&&void 0!==t&&(void 0===(e=this._paneRenderers[t])&&(e=this._createRenderer({pane:t}),this._paneRenderers[t]=e),e)},_createRenderer:function(t){return this.options.preferCanvas&&Ui(t)||Ki(t)}});var Yi=xi.extend({initialize:function(t,e){xi.prototype.initialize.call(this,this._boundsToLatLngs(t),e)},setBounds:function(t){return this.setLatLngs(this._boundsToLatLngs(t))},_boundsToLatLngs:function(t){return[(t=g(t)).getSouthWest(),t.getNorthWest(),t.getNorthEast(),t.getSouthEast()]}});Gi.create=qi,Gi.pointsToPath=dt,wi.geometryToLayer=bi,wi.coordsToLatLng=Li,wi.coordsToLatLngs=Ti,wi.latLngToCoords=Mi,wi.latLngsToCoords=zi,wi.getFeature=Ci,wi.asFeature=Zi,A.mergeOptions({boxZoom:!0});var _t=n.extend({initialize:function(t){this._map=t,this._container=t._container,this._pane=t._panes.overlayPane,this._resetStateTimeout=0,t.on("unload",this._destroy,this)},addHooks:function(){S(this._container,"mousedown",this._onMouseDown,this)},removeHooks:function(){k(this._container,"mousedown",this._onMouseDown,this)},moved:function(){return this._moved},_destroy:function(){T(this._pane),delete this._pane},_resetState:function(){this._resetStateTimeout=0,this._moved=!1},_clearDeferredResetState:function(){0!==this._resetStateTimeout&&(clearTimeout(this._resetStateTimeout),this._resetStateTimeout=0)},_onMouseDown:function(t){if(!t.shiftKey||1!==t.which&&1!==t.button)return!1;this._clearDeferredResetState(),this._resetState(),re(),Le(),this._startPoint=this._map.mouseEventToContainerPoint(t),S(document,{contextmenu:Re,mousemove:this._onMouseMove,mouseup:this._onMouseUp,keydown:this._onKeyDown},this)},_onMouseMove:function(t){this._moved||(this._moved=!0,this._box=P("div","leaflet-zoom-box",this._container),M(this._container,"leaflet-crosshair"),this._map.fire("boxzoomstart")),this._point=this._map.mouseEventToContainerPoint(t);var t=new f(this._point,this._startPoint),e=t.getSize();Z(this._box,t.min),this._box.style.width=e.x+"px",this._box.style.height=e.y+"px"},_finish:function(){this._moved&&(T(this._box),z(this._container,"leaflet-crosshair")),ae(),Te(),k(document,{contextmenu:Re,mousemove:this._onMouseMove,mouseup:this._onMouseUp,keydown:this._onKeyDown},this)},_onMouseUp:function(t){1!==t.which&&1!==t.button||(this._finish(),this._moved&&(this._clearDeferredResetState(),this._resetStateTimeout=setTimeout(a(this._resetState,this),0),t=new s(this._map.containerPointToLatLng(this._startPoint),this._map.containerPointToLatLng(this._point)),this._map.fitBounds(t).fire("boxzoomend",{boxZoomBounds:t})))},_onKeyDown:function(t){27===t.keyCode&&(this._finish(),this._clearDeferredResetState(),this._resetState())}}),Ct=(A.addInitHook("addHandler","boxZoom",_t),A.mergeOptions({doubleClickZoom:!0}),n.extend({addHooks:function(){this._map.on("dblclick",this._onDoubleClick,this)},removeHooks:function(){this._map.off("dblclick",this._onDoubleClick,this)},_onDoubleClick:function(t){var e=this._map,i=e.getZoom(),n=e.options.zoomDelta,i=t.originalEvent.shiftKey?i-n:i+n;"center"===e.options.doubleClickZoom?e.setZoom(i):e.setZoomAround(t.containerPoint,i)}})),Zt=(A.addInitHook("addHandler","doubleClickZoom",Ct),A.mergeOptions({dragging:!0,inertia:!0,inertiaDeceleration:3400,inertiaMaxSpeed:1/0,easeLinearity:.2,worldCopyJump:!1,maxBoundsViscosity:0}),n.extend({addHooks:function(){var t;this._draggable||(t=this._map,this._draggable=new Xe(t._mapPane,t._container),this._draggable.on({dragstart:this._onDragStart,drag:this._onDrag,dragend:this._onDragEnd},this),this._draggable.on("predrag",this._onPreDragLimit,this),t.options.worldCopyJump&&(this._draggable.on("predrag",this._onPreDragWrap,this),t.on("zoomend",this._onZoomEnd,this),t.whenReady(this._onZoomEnd,this))),M(this._map._container,"leaflet-grab leaflet-touch-drag"),this._draggable.enable(),this._positions=[],this._times=[]},removeHooks:function(){z(this._map._container,"leaflet-grab"),z(this._map._container,"leaflet-touch-drag"),this._draggable.disable()},moved:function(){return this._draggable&&this._draggable._moved},moving:function(){return this._draggable&&this._draggable._moving},_onDragStart:function(){var t,e=this._map;e._stop(),this._map.options.maxBounds&&this._map.options.maxBoundsViscosity?(t=g(this._map.options.maxBounds),this._offsetLimit=_(this._map.latLngToContainerPoint(t.getNorthWest()).multiplyBy(-1),this._map.latLngToContainerPoint(t.getSouthEast()).multiplyBy(-1).add(this._map.getSize())),this._viscosity=Math.min(1,Math.max(0,this._map.options.maxBoundsViscosity))):this._offsetLimit=null,e.fire("movestart").fire("dragstart"),e.options.inertia&&(this._positions=[],this._times=[])},_onDrag:function(t){var e,i;this._map.options.inertia&&(e=this._lastTime=+new Date,i=this._lastPos=this._draggable._absPos||this._draggable._newPos,this._positions.push(i),this._times.push(e),this._prunePositions(e)),this._map.fire("move",t).fire("drag",t)},_prunePositions:function(t){for(;1e.max.x&&(t.x=this._viscousLimit(t.x,e.max.x)),t.y>e.max.y&&(t.y=this._viscousLimit(t.y,e.max.y)),this._draggable._newPos=this._draggable._startPos.add(t))},_onPreDragWrap:function(){var t=this._worldWidth,e=Math.round(t/2),i=this._initialWorldOffset,n=this._draggable._newPos.x,o=(n-e+i)%t+e-i,n=(n+e+i)%t-e-i,t=Math.abs(o+i)e.getMaxZoom()&&1`, …) referenced from the path/`components` below. +#[derive(Serialize, ToSchema)] +pub struct Page { + pub items: Vec, + pub total: i64, + pub page: i64, + pub page_size: i64, + pub total_pages: i64, +} + +impl> From> for Page { + fn from(p: du_db::Page) -> Self { + let total_pages = p.total_pages(); + Page { + items: p.items.into_iter().map(T::from).collect(), + total: p.total, + page: p.page, + page_size: p.page_size, + total_pages, + } + } +} + +#[derive(Serialize, ToSchema, Clone)] +pub struct VariantDto { + pub id: i64, + pub canonical_name: String, + pub mutation_type: String, + pub naming_status: String, + pub common_names: Vec, + pub rs_ids: Vec, + /// Coordinates keyed by reference build: `{ "GRCh38": {contig, position, ...} }`. + pub coordinates: serde_json::Value, +} + +impl From for VariantDto { + fn from(v: du_domain::variant::Variant) -> Self { + let coordinates = serde_json::to_value(&v.coordinates).unwrap_or(serde_json::Value::Null); + VariantDto { + id: v.id.0, + canonical_name: v.canonical_name, + mutation_type: v.mutation_type.label().to_string(), + naming_status: v.naming_status.label().to_string(), + common_names: v.aliases.common_names, + rs_ids: v.aliases.rs_ids, + coordinates, + } + } +} + +/// A nested haplogroup tree node. +#[derive(Serialize, ToSchema)] +pub struct HaplogroupNodeDto { + pub id: i64, + pub name: String, + pub haplogroup_type: String, + pub formed_ybp: Option, + pub tmrca_ybp: Option, + /// Placed non-D2C sample leaves **at or below** this node (the YFull-style cumulative + /// count). Open the node's `/y-tree/node/{name}/samples` to list them. + pub sample_count: i64, + /// Defining variants for this node (with multi-build coordinates). Populated only by the + /// `/full` tree endpoints; omitted (empty) on the plain tree so existing clients are + /// unaffected. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub variants: Vec, + /// Child nodes. `no_recursion` stops utoipa's schema walk from recursing + /// infinitely on this self-reference (it emits a `$ref` instead). + #[schema(no_recursion)] + pub children: Vec, +} + +#[derive(Serialize, ToSchema)] +pub struct TreeDto { + /// Top-level node(s): one when a `rootHaplogroup` is given, else every root. + pub roots: Vec, +} + +/// A non-D2C biosample placed as a leaf at or below a haplogroup node (YFull-style). +#[derive(Serialize, ToSchema)] +pub struct LeafSampleDto { + pub sample_guid: String, + pub accession: Option, + pub alias: Option, + /// Origin (`EXTERNAL`, `ANCIENT`, `STANDARD`, `PGP`) — never `CITIZEN` (D2C is excluded). + pub source: String, + /// The most recent linked publication, when the sample is paper-referenced. + pub publication: Option, +} + +#[derive(Serialize, ToSchema)] +pub struct PublicationRefDto { + pub title: String, + pub doi: Option, + pub url: Option, +} + +#[derive(Serialize, ToSchema)] +pub struct LeafSamplesDto { + pub items: Vec, +} + +impl From for LeafSampleDto { + fn from(s: du_db::tree_sample::LeafSample) -> Self { + let publication = s.pub_title.map(|title| PublicationRefDto { title, doi: s.pub_doi, url: s.pub_url }); + LeafSampleDto { + sample_guid: s.sample_guid.to_string(), + accession: s.accession, + alias: s.alias, + source: s.source, + publication, + } + } +} + +/// Cheap cache-revalidation probe: the current tree revision + the full-tree ETag, +/// so a client can detect a newer tree without downloading it. +#[derive(Serialize, ToSchema)] +pub struct TreeVersionDto { + /// Monotonic revision, bumped by every tree-mutating operation. + pub revision: i64, + /// The `ETag` of `GET /…-tree/full` at this revision (use as `If-None-Match`). + pub etag: String, + /// When the tree last changed (RFC 3339). + pub updated_at: String, +} + +/// A sequencing instrument resolved to its laboratory (Edge `@RG` lookup). +#[derive(Serialize, ToSchema)] +pub struct SequencerLabDto { + /// The `@RG` instrument id (e.g. `A00123`). + pub instrument_id: String, + pub lab_name: String, + /// Direct-to-consumer lab (vs. clinical/academic). + pub is_d2c: bool, + pub manufacturer: Option, + pub model_name: Option, + pub website_url: Option, +} + +impl From for SequencerLabDto { + fn from(l: du_db::sequencer::LabLookup) -> Self { + Self { + instrument_id: l.instrument_id, + lab_name: l.lab_name, + is_d2c: l.is_d2c, + manufacturer: l.manufacturer, + model_name: l.model_name, + website_url: l.website_url, + } + } +} + +/// A defining variant of a discovery proposal (with cross-submitter support). +#[derive(Serialize, ToSchema)] +pub struct DiscoveryVariantDto { + pub name: Option, + pub supporting_sample_count: i32, +} + +/// A proposed haplogroup branch from the discovery consensus engine. +#[derive(Serialize, ToSchema)] +pub struct DiscoveryProposalDto { + pub id: i64, + pub proposed_name: Option, + pub parent_haplogroup: Option, + pub dna_type: Option, + pub status: String, + /// Supporting private-variant observations. + pub evidence_count: i32, + /// Distinct contributing samples. + pub submitter_count: i32, + pub confidence: Option, + /// Defining variants (detail only; empty in list responses). + #[serde(skip_serializing_if = "Vec::is_empty")] + pub variants: Vec, +} + +impl From for DiscoveryProposalDto { + fn from(s: du_db::proposal::ProposalSummary) -> Self { + Self { + id: s.id, + proposed_name: s.proposed_name, + parent_haplogroup: s.parent_name, + dna_type: s.dna_type, + status: s.status, + evidence_count: s.evidence_count, + submitter_count: s.submitter_count, + confidence: s.confidence, + variants: Vec::new(), + } + } +} + +impl DiscoveryProposalDto { + pub(crate) fn from_detail(d: du_db::proposal::ProposalDetail) -> Self { + let variants = d + .variants + .into_iter() + .map(|v| DiscoveryVariantDto { name: v.name, supporting_sample_count: v.supporting_sample_count }) + .collect(); + Self { variants, ..Self::from(d.summary) } + } +} + +/// Query for the single-instrument lab lookup. +#[derive(Deserialize, IntoParams)] +pub(crate) struct InstrumentParams { + /// The `@RG` instrument id to resolve, e.g. `A00123`. + pub(crate) instrument_id: String, +} + +#[derive(Serialize, ToSchema)] +pub struct CoverageBenchmarkDto { + pub lab: Option, + pub test_type: Option, + pub library_count: i64, + pub avg_mean_depth: Option, + pub avg_cov_10x: Option, + pub expected_min_depth: Option, + /// Whether the lab's average depth meets the advertised spec (when both known). + pub meets_spec: Option, + /// Average depth minus the advertised spec (positive = over, negative = under). + pub depth_delta: Option, +} + +impl From for CoverageBenchmarkDto { + fn from(c: du_domain::coverage::CoverageBenchmark) -> Self { + // Vendor conformance: compare the lab's average against the advertised spec. + let (meets_spec, depth_delta) = match (c.avg_mean_depth, c.expected_min_depth) { + (Some(avg), Some(exp)) => (Some(avg >= exp), Some(avg - exp)), + _ => (None, None), + }; + CoverageBenchmarkDto { + lab: c.lab, + test_type: c.test_type, + library_count: c.library_count, + avg_mean_depth: c.avg_mean_depth, + avg_cov_10x: c.avg_cov_10x, + expected_min_depth: c.expected_min_depth, + meets_spec, + depth_delta, + } + } +} + +/// A test type's definition + its empirical coverage norm. +#[derive(Serialize, ToSchema)] +pub struct TestTypeDto { + pub code: String, + pub display_name: String, + pub category: String, + pub vendor: Option, + pub target_type: Option, + pub expected_min_depth: Option, + pub supports_haplogroup_y: bool, + pub supports_haplogroup_mt: bool, + pub supports_autosomal_ibd: bool, + pub supports_ancestry: bool, + pub typical_file_formats: Vec, + /// Federated-cohort norm: samples observed + typical depth / 30× coverage. + pub norm_sample_count: Option, + pub norm_median_depth: Option, + pub norm_median_pct_30x: Option, +} + +impl From for TestTypeDto { + fn from(t: du_db::test_type::TestTypeInfo) -> Self { + Self { + code: t.code, + display_name: t.display_name, + category: t.category, + vendor: t.vendor, + target_type: t.target_type, + expected_min_depth: t.expected_min_depth, + supports_haplogroup_y: t.supports_haplogroup_y, + supports_haplogroup_mt: t.supports_haplogroup_mt, + supports_autosomal_ibd: t.supports_autosomal_ibd, + supports_ancestry: t.supports_ancestry, + typical_file_formats: t.typical_file_formats, + norm_sample_count: t.norm_sample_count, + norm_median_depth: t.norm_median_depth, + norm_median_pct_30x: t.norm_median_pct_30x, + } + } +} + +#[derive(Serialize, ToSchema)] +pub struct PublicationDto { + pub id: i64, + pub title: String, + pub doi: Option, + pub pubmed_id: Option, + pub journal: Option, + pub publication_date: Option, + pub authors: Option, + pub abstract_summary: Option, + pub url: Option, + pub cited_by_count: Option, + pub open_access_status: Option, +} + +impl From for PublicationDto { + fn from(p: du_domain::publication::Publication) -> Self { + PublicationDto { + id: p.id.0, + title: p.title, + doi: p.doi, + pubmed_id: p.pubmed_id, + journal: p.journal, + publication_date: p.publication_date, + authors: p.authors, + abstract_summary: p.abstract_summary, + url: p.url, + cited_by_count: p.cited_by_count, + open_access_status: p.open_access_status, + } + } +} + +#[derive(Serialize, ToSchema)] +pub struct BiosampleDto { + pub sample_guid: String, + pub source: String, + pub accession: Option, + pub alias: Option, + pub description: Option, + pub center_name: Option, + pub locked: bool, + pub source_attrs: serde_json::Value, + pub atproto: Option, +} + +impl From for BiosampleDto { + fn from(b: du_domain::biosample::Biosample) -> Self { + BiosampleDto { + sample_guid: b.sample_guid.0.to_string(), + source: b.source.label().to_string(), + accession: b.accession, + alias: b.alias, + description: b.description, + center_name: b.center_name, + locked: b.locked, + source_attrs: b.source_attrs, + atproto: b.atproto, + } + } +} + +// ── per-sample report DTOs ───────────────────────────────────────────────────── + +#[derive(Serialize, ToSchema)] +pub struct PathwayStepDto { + pub name: String, + pub formed_ybp: Option, + pub tmrca_ybp: Option, + pub defining_snps: Vec, +} + +#[derive(Serialize, ToSchema)] +pub struct HaplogroupPathwayDto { + /// Name as called on the sample. + pub called_name: String, + /// Matched tree node, or null when the call isn't placed in the tree. + pub resolved_name: Option, + pub dna_type: String, + /// `RECONCILED` (cross-technology consensus) / `FED_CONSENSUS` / `ORIGINAL`. + pub origin: String, + /// Consensus confidence ∈ [0,1] (reconciled calls only). + pub confidence: Option, + /// Sequencing runs reconciled into the consensus. + pub run_count: Option, + /// SNP concordance across the reconciled runs ∈ [0,1]. + pub snp_concordance: Option, + /// `COMPATIBLE` / `MINOR_DIVERGENCE` / `INCOMPATIBLE` … + pub compatibility_level: Option, + /// Root → tip clades (empty when unplaced). + pub steps: Vec, +} + +#[derive(Serialize, ToSchema)] +pub struct SequencingRunDto { + pub platform_name: Option, + pub instrument_model: Option, + pub test_type: Option, + pub library_layout: Option, + pub total_reads: Option, + pub read_length: Option, + pub mean_insert_size: Option, +} + +#[derive(Serialize, ToSchema)] +pub struct CoverageSummaryDto { + pub reference_build: Option, + pub aligner: Option, + pub mean_coverage: Option, + pub median_coverage: Option, + pub pct_10x: Option, + pub pct_20x: Option, + pub pct_30x: Option, + pub test_type: Option, + /// Advertised minimum depth for the test type, when known. + pub expected_min_depth: Option, + /// Empirical cohort median depth for the test type. + pub norm_median_depth: Option, + /// `BELOW` / `AT` / `ABOVE` the advertised spec (or cohort norm when no spec). + pub conformance: Option, +} + +#[derive(Serialize, ToSchema)] +pub struct AncestryDto { + pub analysis_method: Option, + pub panel_type: Option, + pub confidence_level: Option, + /// Continental rollup: `[{superPopulation, percentage}]`. + #[schema(value_type = Object)] + pub super_populations: serde_json::Value, + /// Sub-continental percentages (payload shape passed through verbatim). + #[schema(value_type = Object)] + pub components: serde_json::Value, +} + +#[derive(Serialize, ToSchema)] +pub struct SamplePublicationDto { + pub id: i64, + pub title: String, + pub doi: Option, + pub url: Option, + pub publication_date: Option, +} + +/// The public per-sample report (mirrors the `/sample/:slug` page). +#[derive(Serialize, ToSchema)] +pub struct SampleReportDto { + pub sample_guid: String, + pub source: String, + pub accession: Option, + pub alias: Option, + pub description: Option, + pub center_name: Option, + pub sex: Option, + pub latitude: Option, + pub longitude: Option, + pub is_federated: bool, + pub y_haplogroup: Option, + pub mt_haplogroup: Option, + pub sequencing: Vec, + pub coverage: Vec, + pub ancestry: Option, + pub publications: Vec, +} + +#[derive(Serialize, ToSchema)] +pub struct GenomeRegionDto { + pub id: i64, + pub region_type: String, + pub name: String, + pub coordinates: serde_json::Value, + pub properties: serde_json::Value, +} + +impl From for GenomeRegionDto { + fn from(r: du_domain::genome_region::GenomeRegion) -> Self { + GenomeRegionDto { + id: r.id, + region_type: r.region_type, + name: r.name, + coordinates: r.coordinates, + properties: r.properties, + } + } +} + +#[derive(Serialize, ToSchema)] +pub struct StudyDto { + pub id: i64, + pub accession: String, + pub title: Option, + pub center_name: Option, + /// Linked samples: `[{sample_guid, accession, source}]`. + pub samples: serde_json::Value, +} + +impl From for StudyDto { + fn from(s: du_db::study::StudyWithSamples) -> Self { + StudyDto { id: s.id, accession: s.accession, title: s.title, center_name: s.center_name, samples: s.samples } + } +} + +#[derive(Serialize, ToSchema)] +pub struct ExportMetadataDto { + pub variant_count: i64, + pub format: String, + pub generated_at: String, +} + +// ── federation reporting DTOs ────────────────────────────────────────────────── +// Population-level reports aggregated from the federated mirror (`fed.*`) with +// query-time SQL — the AppView aggregates and reports over Navigator-published +// anonymized summaries (no per-PDS fetch at request time). + +/// Coverage aggregated across mirrored alignment summaries, by reference build. +#[derive(Serialize, ToSchema)] +pub struct FedCoverageByBuildDto { + pub reference_build: Option, + pub samples: i64, + pub mean_coverage: Option, + pub mean_pct_30x: Option, +} + +impl From for FedCoverageByBuildDto { + fn from(c: du_db::fed::coverage::BuildCoverage) -> Self { + FedCoverageByBuildDto { + reference_build: c.reference_build, + samples: c.samples, + mean_coverage: c.mean_coverage, + mean_pct_30x: c.mean_pct_30x, + } + } +} + +/// Average ancestry share per continental super-population across mirrored breakdowns. +#[derive(Serialize, ToSchema)] +pub struct AncestryShareDto { + pub super_population: Option, + pub samples: i64, + pub avg_percentage: Option, +} + +impl From for AncestryShareDto { + fn from(s: du_db::fed::analytics::SuperPopulationShare) -> Self { + AncestryShareDto { + super_population: s.super_population, + samples: s.samples, + avg_percentage: s.avg_percentage, + } + } +} + +/// Count of a consensus Y/MT haplogroup across mirrored biosamples. +#[derive(Serialize, ToSchema)] +pub struct HaplogroupCountDto { + pub dna_type: String, + pub haplogroup: String, + pub samples: i64, +} + +impl From for HaplogroupCountDto { + fn from(h: du_db::fed::core::HaplogroupCount) -> Self { + HaplogroupCountDto { dna_type: h.dna_type, haplogroup: h.haplogroup, samples: h.samples } + } +} + +/// One marker of a Y haplogroup's aggregated (modal) STR signature. +#[derive(Serialize, ToSchema)] +pub struct StrSignatureMarkerDto { + pub marker: String, + /// Modal repeat count for simple markers (null for multi-copy — see `value_json`). + pub value: Option, + /// Full lexicon `strValue` (carries multi-copy `copies`). + #[schema(value_type = Object)] + pub value_json: Option, + pub confidence: Option, + pub supporting_samples: Option, + /// MODAL (computed) or MANUAL (curator override). + pub method: Option, +} + +impl From for StrSignatureMarkerDto { + fn from(s: du_db::ystr::SignatureMarker) -> Self { + StrSignatureMarkerDto { + marker: s.marker_name, + value: s.ancestral_value, + value_json: s.ancestral_json, + confidence: s.confidence, + supporting_samples: s.supporting_samples, + method: s.method, + } + } +} + +/// A contributing branch-age estimate (method-labeled — STR is one factor in the +/// combined age model; this is NOT the authoritative `tmrca_ybp`). +#[derive(Serialize, ToSchema)] +pub struct AgeEstimateDto { + pub method: String, + pub estimate_ybp: Option, + pub ci_low_ybp: Option, + pub ci_high_ybp: Option, + pub sample_count: Option, + pub marker_count: Option, + pub generation_years: Option, +} + +impl From for AgeEstimateDto { + fn from(e: du_db::ystr::AgeEstimate) -> Self { + AgeEstimateDto { + method: e.method, + estimate_ybp: e.estimate_ybp, + ci_low_ybp: e.ci_low_ybp, + ci_high_ybp: e.ci_high_ybp, + sample_count: e.sample_count, + marker_count: e.marker_count, + generation_years: e.generation_years, + } + } +} + +/// STR→branch prediction request: a query profile in the lexicon's +/// `strMarkerValue[]` shape (the same markers Navigator publishes). +#[derive(Deserialize, ToSchema)] +pub struct StrPredictRequest { + #[schema(value_type = Object)] + pub markers: serde_json::Value, + /// Provenance of the query STRs; drives the WGS-upgrade recommendation. + pub source: Option, + pub top_n: Option, +} + +/// One ranked predicted branch. +#[derive(Serialize, ToSchema)] +pub struct StrPredictionDto { + pub haplogroup: String, + pub distance: i32, + pub compared_markers: i64, + pub signature_markers: i64, +} + +#[derive(Serialize, ToSchema)] +pub struct StrPredictResponseDto { + pub query_markers: i64, + pub predictions: Vec, + /// True unless the query STRs are WGS-derived — the STR-panel→WGS nudge. + pub wgs_upgrade_recommended: bool, + pub note: String, +} + +// ── query params ───────────────────────────────────────────────────────────── + +#[derive(Deserialize, IntoParams)] +pub struct SearchParams { + /// Free-text filter. + pub query: Option, + /// 1-based page number. + pub page: Option, + /// Page size (clamped to ≤200). + pub page_size: Option, +} + +#[derive(Deserialize, IntoParams)] +pub struct PageParams { + pub page: Option, + pub page_size: Option, +} + +#[derive(Deserialize, IntoParams)] +pub struct RootParams { + /// Subtree root; omit for the full forest. + #[serde(rename = "rootHaplogroup")] + #[param(rename = "rootHaplogroup")] + pub root_haplogroup: Option, +} + +impl RootParams { + /// The non-empty subtree root, if any. + pub(crate) fn root(&self) -> Option<&str> { + self.root_haplogroup.as_deref().filter(|s| !s.is_empty()) + } +} diff --git a/rust/crates/du-web/src/api/mod.rs b/rust/crates/du-web/src/api/mod.rs new file mode 100644 index 00000000..b4a125e7 --- /dev/null +++ b/rust/crates/du-web/src/api/mod.rs @@ -0,0 +1,844 @@ +//! Public JSON API surface (`/api/v1/*`) — the Tapir replacement. Clean DTOs +//! (decoupled from the internal domain types) are mapped from `du-db` query +//! results and described with `utoipa`; Swagger UI is served at `/api`. +//! +//! Scope: ONLY the read-only public endpoints (tree, coverage, references/ +//! biosamples, variants, genome regions) plus the federated population reports +//! (`/api/v1/reports/*`) aggregated from the `fed.*` mirror. Curator/machine +//! management endpoints are deliberately NOT under `/api/v1` — they live under +//! `/manage/*` (change-sets, haplogroup merge, curation intake) and are not part +//! of this public OpenAPI document. + +use crate::error::AppError; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::http::{header, StatusCode}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Json, Router}; +use serde::Deserialize; +use utoipa::{IntoParams, OpenApi}; +use utoipa_swagger_ui::SwaggerUi; + +mod dto; +mod tree; +pub(crate) use dto::*; +use tree::*; + + +#[utoipa::path(get, path = "/api/v1/coverage/benchmarks", tag = "coverage", + responses((status = 200, description = "Coverage benchmarks by lab and test type", body = [CoverageBenchmarkDto])))] +async fn coverage_benchmarks(State(st): State) -> Result>, AppError> { + let rows = du_db::coverage::benchmarks(&st.pool).await?; + Ok(Json(rows.into_iter().map(CoverageBenchmarkDto::from).collect())) +} + +#[utoipa::path(get, path = "/api/v1/sequencer/lab", params(InstrumentParams), tag = "sequencer", + responses((status = 200, description = "The instrument's sequencing lab", body = SequencerLabDto), + (status = 404, description = "Unknown instrument or no lab association")))] +async fn sequencer_lab(State(st): State, Query(q): Query) -> Result, AppError> { + let id = q.instrument_id.trim(); + du_db::sequencer::lookup_lab(&st.pool, id) + .await? + .map(|l| Json(SequencerLabDto::from(l))) + .ok_or_else(|| AppError::NotFound(format!("instrument {id}"))) +} + +#[utoipa::path(get, path = "/api/v1/sequencer/lab-instruments", tag = "sequencer", + responses((status = 200, description = "All preseeded instrument→lab associations (bulk cache seed)", body = [SequencerLabDto])))] +async fn sequencer_lab_instruments(State(st): State) -> Result>, AppError> { + Ok(Json(du_db::sequencer::lab_instruments(&st.pool).await?.into_iter().map(SequencerLabDto::from).collect())) +} + +#[derive(Deserialize, IntoParams)] +struct DiscoveryQuery { + /// DNA arm: `Y_DNA` or `MT_DNA`. + #[serde(rename = "type")] + dna_type: Option, + /// Proposal status (e.g. `READY_FOR_REVIEW`, `SPLIT_CANDIDATE`). + status: Option, + /// Parent (terminal) haplogroup name. + parent: Option, + /// Minimum distinct contributing samples. + min_consensus: Option, + page: Option, + page_size: Option, +} + +#[utoipa::path(get, path = "/api/v1/discovery/proposals", params(DiscoveryQuery), tag = "discovery", + responses((status = 200, description = "Proposed haplogroup branches (paginated)", body = Page)))] +async fn discovery_proposals(State(st): State, Query(q): Query) -> Result>, AppError> { + let filter = du_db::proposal::ProposalFilter { + status: q.status.as_deref().filter(|s| !s.is_empty()), + dna_type: q.dna_type.as_deref().filter(|s| !s.is_empty()), + parent: q.parent.as_deref().filter(|s| !s.is_empty()), + min_consensus: q.min_consensus, + }; + let page = du_db::proposal::list(&st.pool, &filter, q.page.unwrap_or(1), q.page_size.unwrap_or(50)).await?; + Ok(Json(page.into())) +} + +#[utoipa::path(get, path = "/api/v1/discovery/proposals/{id}", + params(("id" = i64, Path, description = "Proposal id")), tag = "discovery", + responses((status = 200, description = "A proposal with its defining variants", body = DiscoveryProposalDto), + (status = 404, description = "Not found")))] +async fn discovery_proposal(State(st): State, Path(id): Path) -> Result, AppError> { + du_db::proposal::get(&st.pool, id) + .await? + .map(|d| Json(DiscoveryProposalDto::from_detail(d))) + .ok_or_else(|| AppError::NotFound(format!("proposal {id}"))) +} + +#[utoipa::path(get, path = "/api/v1/test-types", tag = "test-types", + responses((status = 200, description = "Test-type taxonomy + empirical coverage norms", body = [TestTypeDto])))] +async fn test_types(State(st): State) -> Result>, AppError> { + Ok(Json(du_db::test_type::list(&st.pool).await?.into_iter().map(TestTypeDto::from).collect())) +} + +#[utoipa::path(get, path = "/api/v1/test-types/{code}", + params(("code" = String, Path, description = "Test-type code (e.g. WGS, BIG_Y_700)")), tag = "test-types", + responses((status = 200, description = "A test type + its coverage norm", body = TestTypeDto), + (status = 404, description = "Unknown test type")))] +async fn test_type_by_code(State(st): State, Path(code): Path) -> Result, AppError> { + du_db::test_type::get(&st.pool, &code) + .await? + .map(|t| Json(TestTypeDto::from(t))) + .ok_or_else(|| AppError::NotFound(format!("test type {code}"))) +} + +#[utoipa::path(get, path = "/api/v1/haplogroups/{haplogroupName}/str-signature", tag = "tree", + responses((status = 200, description = "Aggregated modal Y-STR signature for a haplogroup", body = [StrSignatureMarkerDto])))] +async fn haplogroup_str_signature( + State(st): State, + Path(name): Path, +) -> Result>, AppError> { + let rows = du_db::ystr::branch_signature(&st.pool, &name).await?; + Ok(Json(rows.into_iter().map(StrSignatureMarkerDto::from).collect())) +} + +#[utoipa::path(get, path = "/api/v1/haplogroups/{haplogroupName}/age", tag = "tree", + responses((status = 200, description = "Contributing branch-age estimates (e.g. STR_VARIANCE)", body = [AgeEstimateDto])))] +async fn haplogroup_age( + State(st): State, + Path(name): Path, +) -> Result>, AppError> { + let rows = du_db::ystr::branch_age_estimates(&st.pool, &name).await?; + Ok(Json(rows.into_iter().map(AgeEstimateDto::from).collect())) +} + +#[utoipa::path(post, path = "/api/v1/str/predict", tag = "tree", request_body = StrPredictRequest, + responses((status = 200, description = "STR→branch predictions (ranked by genetic distance)", body = StrPredictResponseDto)))] +async fn str_predict( + State(st): State, + Json(req): Json, +) -> Result, AppError> { + let query = du_db::ystr::parse_markers(&req.markers); + if query.is_empty() { + return Err(AppError::BadRequest("no parseable STR markers in request".into())); + } + let top_n = req.top_n.unwrap_or(10).clamp(1, 50); + // Require meaningful marker overlap (up to 8) so a branch can't rank off one marker. + let min_compared = query.len().clamp(1, 8); + let preds = du_db::ystr::predict(&st.pool, &query, top_n, min_compared).await?; + + let wgs_derived = matches!(req.source.as_deref(), Some("WGS_DERIVED") | Some("BIG_Y_DERIVED")); + let note = if wgs_derived { + "Predicted from WGS-derived STRs; SNP calls supersede STR prediction.".to_string() + } else { + "STR-based predictions are probabilistic. Upgrade to WGS / Big Y for SNP-confirmed branch placement.".to_string() + }; + Ok(Json(StrPredictResponseDto { + query_markers: query.len() as i64, + predictions: preds + .into_iter() + .map(|p| StrPredictionDto { + haplogroup: p.haplogroup, + distance: p.distance, + compared_markers: p.compared_markers as i64, + signature_markers: p.signature_markers as i64, + }) + .collect(), + wgs_upgrade_recommended: !wgs_derived, + note, + })) +} + +#[utoipa::path(get, path = "/api/v1/reports/coverage", tag = "reports", + responses((status = 200, description = "Federated coverage aggregated by reference build", body = [FedCoverageByBuildDto])))] +async fn reports_coverage(State(st): State) -> Result>, AppError> { + let rows = du_db::fed::coverage::aggregate_by_build(&st.pool).await?; + Ok(Json(rows.into_iter().map(FedCoverageByBuildDto::from).collect())) +} + +#[utoipa::path(get, path = "/api/v1/reports/ancestry", tag = "reports", + responses((status = 200, description = "Average ancestry share by continental super-population", body = [AncestryShareDto])))] +async fn reports_ancestry(State(st): State) -> Result>, AppError> { + let rows = du_db::fed::analytics::super_population_distribution(&st.pool).await?; + Ok(Json(rows.into_iter().map(AncestryShareDto::from).collect())) +} + +#[utoipa::path(get, path = "/api/v1/reports/haplogroups", tag = "reports", + responses((status = 200, description = "Y/MT haplogroup distribution across mirrored biosamples", body = [HaplogroupCountDto])))] +async fn reports_haplogroups(State(st): State) -> Result>, AppError> { + let rows = du_db::fed::core::haplogroup_distribution(&st.pool).await?; + Ok(Json(rows.into_iter().map(HaplogroupCountDto::from).collect())) +} + +#[utoipa::path(get, path = "/api/v1/references/details", params(SearchParams), tag = "references", + responses((status = 200, description = "Publications (paginated)", body = Page)))] +async fn references_details( + State(st): State, + Query(q): Query, +) -> Result>, AppError> { + let page = du_db::publication::search(&st.pool, q.query.as_deref(), q.page.unwrap_or(1), q.page_size.unwrap_or(25)).await?; + Ok(Json(page.into())) +} + +#[utoipa::path(get, path = "/api/v1/references/details/{publicationId}/biosamples", + params(("publicationId" = i64, Path, description = "Publication id"), PageParams), tag = "references", + responses((status = 200, description = "Biosamples linked to a publication", body = Page)))] +async fn biosample_report( + State(st): State, + Path(publication_id): Path, + Query(q): Query, +) -> Result>, AppError> { + let page = du_db::biosample::for_publication( + &st.pool, + du_domain::ids::PublicationId(publication_id), + q.page.unwrap_or(1), + q.page_size.unwrap_or(50), + ) + .await?; + Ok(Json(page.into())) +} + +/// Best display name for a defining variant: canonical name, else first alias. +fn snp_name(v: &du_db::haplogroup::VariantInfo) -> Option { + if let Some(n) = v.canonical_name.as_deref().filter(|s| !s.is_empty()) { + return Some(n.to_string()); + } + v.aliases + .get("common_names") + .and_then(serde_json::Value::as_array) + .and_then(|a| a.first()) + .and_then(serde_json::Value::as_str) + .map(str::to_string) +} + +fn pathway_dto(call: &du_db::biosample::HaplogroupCall, p: du_db::haplogroup::Pathway) -> HaplogroupPathwayDto { + use du_db::biosample::HaplogroupCallOrigin; + HaplogroupPathwayDto { + called_name: call.name.clone(), + resolved_name: p.resolved_name, + dna_type: call.dna_type.label().to_string(), + origin: match call.origin { + HaplogroupCallOrigin::Reconciled => "RECONCILED", + HaplogroupCallOrigin::FedConsensus => "FED_CONSENSUS", + HaplogroupCallOrigin::Original => "ORIGINAL", + } + .to_string(), + confidence: call.confidence, + run_count: call.run_count, + snp_concordance: call.snp_concordance, + compatibility_level: call.compatibility_level.clone(), + steps: p + .steps + .into_iter() + .map(|s| PathwayStepDto { + name: s.name, + formed_ybp: s.formed_ybp, + tmrca_ybp: s.tmrca_ybp, + defining_snps: s.defining_snps.iter().filter_map(snp_name).collect(), + }) + .collect(), + } +} + +#[utoipa::path(get, path = "/api/v1/samples/{slug}", + params(("slug" = String, Path, description = "Sample slug, accession, alias, or guid")), tag = "references", + responses((status = 200, description = "Public per-sample report", body = SampleReportDto), + (status = 404, description = "Not found or not public")))] +async fn sample_report(State(st): State, Path(slug): Path) -> Result, AppError> { + // The API never exposes private samples (no curator preview here). + let rep = du_db::biosample::report(&st.pool, &slug) + .await? + .filter(|r| r.identity.is_public) + .ok_or_else(|| AppError::NotFound(format!("sample {slug}")))?; + + let y_haplogroup = match &rep.y { + Some(c) => Some(pathway_dto(c, du_db::haplogroup::pathway(&st.pool, &c.name, c.dna_type).await?)), + None => None, + }; + let mt_haplogroup = match &rep.mt { + Some(c) => Some(pathway_dto(c, du_db::haplogroup::pathway(&st.pool, &c.name, c.dna_type).await?)), + None => None, + }; + + let id = &rep.identity; + let dto = SampleReportDto { + sample_guid: id.sample_guid.0.to_string(), + source: id.source.label().to_string(), + accession: id.accession.clone(), + alias: id.alias.clone(), + description: id.description.clone(), + center_name: id.center_name.clone(), + sex: id.sex.clone(), + latitude: id.origin.map(|o| o.lat), + longitude: id.origin.map(|o| o.lon), + is_federated: id.is_federated, + y_haplogroup, + mt_haplogroup, + sequencing: rep + .sequencing + .iter() + .map(|r| SequencingRunDto { + platform_name: r.platform_name.clone(), + instrument_model: r.instrument_model.clone(), + test_type: r.test_type.clone(), + library_layout: r.library_layout.clone(), + total_reads: r.total_reads, + read_length: r.read_length, + mean_insert_size: r.mean_insert_size, + }) + .collect(), + coverage: rep + .coverage + .iter() + .map(|c| CoverageSummaryDto { + reference_build: c.reference_build.clone(), + aligner: c.aligner.clone(), + mean_coverage: c.mean_coverage, + median_coverage: c.median_coverage, + pct_10x: c.pct_10x, + pct_20x: c.pct_20x, + pct_30x: c.pct_30x, + test_type: c.test_type.clone(), + expected_min_depth: c.expected_min_depth, + norm_median_depth: c.norm_median_depth, + conformance: c.conformance.clone(), + }) + .collect(), + ancestry: rep.ancestry.as_ref().map(|a| AncestryDto { + analysis_method: a.analysis_method.clone(), + panel_type: a.panel_type.clone(), + confidence_level: a.confidence_level, + super_populations: a.super_populations.clone(), + components: a.components.clone(), + }), + publications: rep + .publications + .iter() + .map(|p| SamplePublicationDto { + id: p.id.0, + title: p.title.clone(), + doi: p.doi.clone(), + url: p.url.clone(), + publication_date: p.publication_date, + }) + .collect(), + }; + Ok(Json(dto)) +} + +#[utoipa::path(get, path = "/api/v1/biosample/studies", tag = "references", + responses((status = 200, description = "Genomic studies with their linked samples", body = [StudyDto])))] +async fn biosample_studies(State(st): State) -> Result>, AppError> { + let rows = du_db::study::with_samples(&st.pool).await?; + Ok(Json(rows.into_iter().map(StudyDto::from).collect())) +} + +#[utoipa::path(get, path = "/api/v1/variants", params(SearchParams), tag = "variants", + responses((status = 200, description = "Variants (paginated)", body = Page)))] +async fn list_variants( + State(st): State, + Query(q): Query, +) -> Result>, AppError> { + let page = du_db::variant::search(&st.pool, q.query.as_deref(), q.page.unwrap_or(1), q.page_size.unwrap_or(25)).await?; + Ok(Json(page.into())) +} + +#[utoipa::path(get, path = "/api/v1/variants/{variantId}", + params(("variantId" = i64, Path, description = "Variant id")), tag = "variants", + responses((status = 200, description = "A single variant", body = VariantDto), (status = 404, description = "Not found")))] +async fn get_variant(State(st): State, Path(id): Path) -> Result, AppError> { + let v = du_db::variant::get_by_id(&st.pool, du_domain::ids::VariantId(id)) + .await? + .ok_or_else(|| AppError::NotFound(format!("variant {id}")))?; + Ok(Json(v.into())) +} + +#[utoipa::path(get, path = "/api/v1/haplogroups/{haplogroupName}/variants", + params(("haplogroupName" = String, Path, description = "Haplogroup name")), tag = "variants", + responses((status = 200, description = "Variants defining a haplogroup", body = [VariantDto])))] +async fn variants_by_haplogroup( + State(st): State, + Path(name): Path, +) -> Result>, AppError> { + let vs = du_db::variant::for_haplogroup_name(&st.pool, &name).await?; + Ok(Json(vs.into_iter().map(VariantDto::from).collect())) +} + +#[utoipa::path(get, path = "/api/v1/variants/export/metadata", tag = "variants", + responses((status = 200, description = "Export size + freshness", body = ExportMetadataDto)))] +async fn export_metadata(State(st): State) -> Result, AppError> { + let variant_count = du_db::variant::count(&st.pool).await?; + Ok(Json(ExportMetadataDto { + variant_count, + format: "csv".into(), + generated_at: chrono::Utc::now().to_rfc3339(), + })) +} + +#[utoipa::path(get, path = "/api/v1/variants/export", tag = "variants", + responses((status = 200, description = "Variant catalog as CSV", content_type = "text/csv")))] +async fn export_variants(State(st): State) -> Result { + let variants = du_db::variant::export_all(&st.pool).await?; + let mut csv = String::from("id,canonical_name,mutation_type,naming_status,builds,common_names,rs_ids\n"); + for v in &variants { + let mut builds: Vec<&str> = v.coordinates.0.keys().map(String::as_str).collect(); + builds.sort_unstable(); + csv.push_str(&format!( + "{},{},{},{},{},{},{}\n", + v.id.0, + csv_field(&v.canonical_name), + v.mutation_type.label(), + v.naming_status.label(), + csv_field(&builds.join(";")), + csv_field(&v.aliases.common_names.join(";")), + csv_field(&v.aliases.rs_ids.join(";")), + )); + } + Ok(( + StatusCode::OK, + [ + (header::CONTENT_TYPE, "text/csv; charset=utf-8"), + (header::CONTENT_DISPOSITION, "attachment; filename=\"variants.csv\""), + ], + csv, + ) + .into_response()) +} + +#[utoipa::path(get, path = "/api/v1/variants/export.gff", tag = "variants", + responses((status = 200, description = "DU-named variants as GFF3 (GRCh38) for propagation", content_type = "text/plain")))] +async fn export_variants_gff(State(st): State) -> Result { + // Propagation feed for the DU naming authority: minted DU names + GRCh38 + // coordinates as GFF3, for YBrowse/external tools to pick up. + let variants = du_db::variant::export_du_named(&st.pool).await?; + let mut gff = String::from("##gff-version 3\n"); + for v in &variants { + let Some(c) = v.coordinates.0.get("GRCh38") else { continue }; + let name = &v.canonical_name; + let mut attrs = format!("ID={name};Name={name}"); + if let Some(anc) = &c.ancestral { + attrs.push_str(&format!(";allele_anc={anc}")); + } + if let Some(der) = &c.derived { + attrs.push_str(&format!(";allele_der={der}")); + } + // GFF3 is 1-based, inclusive; a SNV spans a single position. + gff.push_str(&format!( + "{}\tDecodingUs\tSNV\t{}\t{}\t.\t.\t.\t{}\n", + c.contig, c.position, c.position, attrs + )); + } + Ok(( + StatusCode::OK, + [ + (header::CONTENT_TYPE, "text/plain; charset=utf-8"), + (header::CONTENT_DISPOSITION, "attachment; filename=\"decodingus-variants.gff3\""), + ], + gff, + ) + .into_response()) +} + +#[utoipa::path(get, path = "/api/v1/genome-regions", tag = "genome-regions", + responses((status = 200, description = "Reference builds with region coordinates", body = [String])))] +async fn list_region_builds(State(st): State) -> Result>, AppError> { + Ok(Json(du_db::genome_region::distinct_builds(&st.pool).await?)) +} + +#[utoipa::path(get, path = "/api/v1/genome-regions/{build}", + params(("build" = String, Path, description = "Reference build, e.g. GRCh38")), tag = "genome-regions", + responses((status = 200, description = "Regions for a build", body = [GenomeRegionDto])))] +async fn regions_by_build( + State(st): State, + Path(build): Path, +) -> Result>, AppError> { + let rows = du_db::genome_region::for_build(&st.pool, &build).await?; + Ok(Json(rows.into_iter().map(GenomeRegionDto::from).collect())) +} + +/// Minimal CSV field escaping (quote when the value contains a comma/quote/newline). +fn csv_field(s: &str) -> String { + if s.contains([',', '"', '\n']) { + format!("\"{}\"", s.replace('"', "\"\"")) + } else { + s.to_string() + } +} + +// ── OpenAPI document + router ───────────────────────────────────────────────── + +#[derive(OpenApi)] +#[openapi( + info(title = "DecodingUs API", version = "1.0.0", description = "Public read API for the DecodingUs AppView."), + paths( + y_tree, mt_tree, y_tree_full, mt_tree_full, y_tree_version, mt_tree_version, y_node_samples, mt_node_samples, coverage_benchmarks, sequencer_lab, sequencer_lab_instruments, discovery_proposals, discovery_proposal, test_types, test_type_by_code, references_details, biosample_report, sample_report, biosample_studies, + list_variants, get_variant, variants_by_haplogroup, export_metadata, export_variants, + export_variants_gff, list_region_builds, regions_by_build, + reports_coverage, reports_ancestry, reports_haplogroups, + haplogroup_str_signature, haplogroup_age, str_predict, + ), + components(schemas( + VariantDto, HaplogroupNodeDto, TreeDto, TreeVersionDto, LeafSampleDto, PublicationRefDto, LeafSamplesDto, CoverageBenchmarkDto, SequencerLabDto, DiscoveryProposalDto, DiscoveryVariantDto, Page, TestTypeDto, PublicationDto, BiosampleDto, + SampleReportDto, HaplogroupPathwayDto, PathwayStepDto, SequencingRunDto, CoverageSummaryDto, + AncestryDto, SamplePublicationDto, + GenomeRegionDto, StudyDto, ExportMetadataDto, Page, Page, Page, + FedCoverageByBuildDto, AncestryShareDto, HaplogroupCountDto, StrSignatureMarkerDto, + StrPredictRequest, StrPredictionDto, StrPredictResponseDto, AgeEstimateDto, + )), + tags( + (name = "tree", description = "Y/MT haplogroup trees"), + (name = "variants", description = "Variant catalog"), + (name = "coverage", description = "Sequencing coverage benchmarks"), + (name = "sequencer", description = "Sequencer instrument → lab lookup"), + (name = "discovery", description = "Proposed haplogroup branches (discovery consensus)"), + (name = "test-types", description = "Test-type taxonomy + empirical coverage norms"), + (name = "references", description = "Publications, biosamples, studies"), + (name = "genome-regions", description = "Multi-build genome regions"), + (name = "reports", description = "Population reports aggregated from the federated mirror"), + ) +)] +pub struct ApiDoc; + +pub fn router() -> Router { + Router::new() + .route("/api/v1/y-tree", get(y_tree)) + .route("/api/v1/mt-tree", get(mt_tree)) + .route("/api/v1/y-tree/full", get(y_tree_full)) + .route("/api/v1/mt-tree/full", get(mt_tree_full)) + .route("/api/v1/y-tree/version", get(y_tree_version)) + .route("/api/v1/mt-tree/version", get(mt_tree_version)) + .route("/api/v1/y-tree/node/:name/samples", get(y_node_samples)) + .route("/api/v1/mt-tree/node/:name/samples", get(mt_node_samples)) + .route("/api/v1/coverage/benchmarks", get(coverage_benchmarks)) + .route("/api/v1/sequencer/lab", get(sequencer_lab)) + .route("/api/v1/sequencer/lab-instruments", get(sequencer_lab_instruments)) + .route("/api/v1/discovery/proposals", get(discovery_proposals)) + .route("/api/v1/discovery/proposals/:id", get(discovery_proposal)) + .route("/api/v1/test-types", get(test_types)) + .route("/api/v1/test-types/:code", get(test_type_by_code)) + .route("/api/v1/reports/coverage", get(reports_coverage)) + .route("/api/v1/reports/ancestry", get(reports_ancestry)) + .route("/api/v1/reports/haplogroups", get(reports_haplogroups)) + .route("/api/v1/references/details", get(references_details)) + .route("/api/v1/references/details/:publication_id/biosamples", get(biosample_report)) + .route("/api/v1/samples/:slug", get(sample_report)) + .route("/api/v1/biosample/studies", get(biosample_studies)) + .route("/api/v1/variants", get(list_variants)) + .route("/api/v1/variants/export", get(export_variants)) + .route("/api/v1/variants/export.gff", get(export_variants_gff)) + .route("/api/v1/variants/export/metadata", get(export_metadata)) + .route("/api/v1/variants/:variant_id", get(get_variant)) + .route("/api/v1/haplogroups/:haplogroup_name/variants", get(variants_by_haplogroup)) + .route("/api/v1/haplogroups/:haplogroup_name/str-signature", get(haplogroup_str_signature)) + .route("/api/v1/haplogroups/:haplogroup_name/age", get(haplogroup_age)) + .route("/api/v1/str/predict", post(str_predict)) + .route("/api/v1/genome-regions", get(list_region_builds)) + .route("/api/v1/genome-regions/:build", get(regions_by_build)) + .merge(SwaggerUi::new("/api").url("/api/openapi.json", ApiDoc::openapi())) +} + +#[cfg(test)] +mod tests { + use super::{HaplogroupNodeDto, TreeDto, VariantDto}; + + /// Pins the `/y-tree/full` JSON contract the Navigator's `parse_decodingus_json` consumes: + /// snake_case node fields, a nested `children` array, and per-node `variants[].coordinates` + /// keyed by build label (`hs1`/`GRCh38`). The plain tree omits `variants` entirely. + #[test] + fn full_tree_node_serializes_with_variants_and_coordinates() { + let variant = VariantDto { + id: 5, + canonical_name: "M207".into(), + mutation_type: "SNP".into(), + naming_status: "named".into(), + common_names: vec![], + rs_ids: vec![], + coordinates: serde_json::json!({ + "hs1": {"contig": "chrY", "position": 2_800_000, "ancestral": "A", "derived": "G"} + }), + }; + let node = HaplogroupNodeDto { + id: 10, + name: "R-M207".into(), + haplogroup_type: "Y_DNA".into(), + formed_ybp: None, + tmrca_ybp: None, + sample_count: 0, + variants: vec![variant], + children: vec![], + }; + let v = serde_json::to_value(TreeDto { roots: vec![node] }).unwrap(); + let root = &v["roots"][0]; + assert_eq!(root["id"], 10); + assert_eq!(root["haplogroup_type"], "Y_DNA"); // snake_case, no rename_all + let var = &root["variants"][0]; + assert_eq!(var["canonical_name"], "M207"); + assert_eq!(var["coordinates"]["hs1"]["position"], 2_800_000); + assert_eq!(var["coordinates"]["hs1"]["derived"], "G"); + } + + #[test] + fn plain_tree_omits_empty_variants() { + let node = HaplogroupNodeDto { + id: 1, + name: "A".into(), + haplogroup_type: "Y_DNA".into(), + formed_ybp: None, + tmrca_ybp: None, + sample_count: 0, + variants: vec![], + children: vec![], + }; + let v = serde_json::to_value(TreeDto { roots: vec![node] }).unwrap(); + assert!(v["roots"][0].get("variants").is_none(), "empty variants must be omitted"); + } + + /// End-to-end: a placed non-D2C sample shows as a cumulative `sample_count` on the tree + /// node and in the node's leaf list; a D2C (CITIZEN) sample never appears. + #[tokio::test] + async fn tree_carries_sample_count_and_leaf_list() { + use axum::body::{to_bytes, Body}; + use axum::http::{Request, StatusCode}; + use tower::ServiceExt; + + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping tree-samples endpoint test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + sqlx::query("INSERT INTO tree.haplogroup (name, haplogroup_type) VALUES ('R-M269', 'Y_DNA'::core.dna_type)") + .execute(&pool) + .await + .unwrap(); + // One EXTERNAL (paper) sample + one CITIZEN (D2C) sample, both calling R-M269. + for (src, acc) in [("EXTERNAL", "EX-1"), ("CITIZEN", "CIT-1")] { + sqlx::query( + "INSERT INTO core.biosample (source, accession, original_haplogroups) \ + VALUES ($1::core.biosample_source, $2, '[{\"y\":\"R-M269\"}]'::jsonb)", + ) + .bind(src) + .bind(acc) + .execute(&pool) + .await + .unwrap(); + } + du_db::tree_sample::recompute_placements(&pool, du_domain::enums::DnaType::YDna).await.unwrap(); + let state = crate::state::AppState { pool, key: tower_cookies::Key::generate(), oauth: None }; + + let get = |state: crate::state::AppState, uri: &'static str| async move { + crate::routes::app(state) + .oneshot(Request::builder().uri(uri).body(Body::empty()).unwrap()) + .await + .unwrap() + }; + + // The node carries sample_count = 1 (only the non-D2C sample). + let t = get(state.clone(), "/api/v1/y-tree?rootHaplogroup=R-M269").await; + assert_eq!(t.status(), StatusCode::OK); + let tv: serde_json::Value = serde_json::from_slice(&to_bytes(t.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert_eq!(tv["roots"][0]["name"], "R-M269"); + assert_eq!(tv["roots"][0]["sample_count"], 1); + + // The leaf list has the paper sample and not the D2C one. + let s = get(state, "/api/v1/y-tree/node/R-M269/samples").await; + assert_eq!(s.status(), StatusCode::OK); + let sv: serde_json::Value = serde_json::from_slice(&to_bytes(s.into_body(), usize::MAX).await.unwrap()).unwrap(); + let items = sv["items"].as_array().unwrap(); + assert_eq!(items.len(), 1); + assert_eq!(items[0]["accession"], "EX-1"); + assert_eq!(items[0]["source"], "EXTERNAL"); + } + + #[test] + fn etag_varies_by_shape_dna_root_revision() { + use super::tree_etag; + use du_domain::enums::DnaType; + let base = tree_etag(true, DnaType::YDna, None, 5); + assert_eq!(base, "\"full-y-*-r5\""); + assert_ne!(base, tree_etag(false, DnaType::YDna, None, 5)); // shape + assert_ne!(base, tree_etag(true, DnaType::MtDna, None, 5)); // dna + assert_ne!(base, tree_etag(true, DnaType::YDna, Some("R-M269"), 5)); // root + assert_ne!(base, tree_etag(true, DnaType::YDna, None, 6)); // revision + } + + #[test] + fn if_none_match_handles_list_and_wildcard() { + use super::if_none_match; + use axum::http::{header, HeaderMap, HeaderValue}; + let etag = "\"full-y-*-r5\""; + let mut h = HeaderMap::new(); + assert!(!if_none_match(&h, etag)); // absent + h.insert(header::IF_NONE_MATCH, HeaderValue::from_static("\"full-y-*-r5\"")); + assert!(if_none_match(&h, etag)); + h.insert(header::IF_NONE_MATCH, HeaderValue::from_static("\"other\", \"full-y-*-r5\"")); + assert!(if_none_match(&h, etag), "matches one of a list"); + h.insert(header::IF_NONE_MATCH, HeaderValue::from_static("*")); + assert!(if_none_match(&h, etag), "wildcard matches"); + h.insert(header::IF_NONE_MATCH, HeaderValue::from_static("\"stale-r1\"")); + assert!(!if_none_match(&h, etag), "non-matching validator"); + } + + /// Full conditional-GET cycle against an ephemeral DB: 200 + ETag → 304 on + /// `If-None-Match` → 200 again once the revision marker bumps. + #[tokio::test] + async fn conditional_get_304_until_revision_bumps() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping tree-cache test"); + return; + }; + use axum::body::{to_bytes, Body}; + use axum::http::{header, Request, StatusCode}; + use tower::ServiceExt; + + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + let state = super::AppState { pool: pool.clone(), key: tower_cookies::Key::generate(), oauth: None }; + let app = super::router().with_state(state); + + let plain = || Request::builder().uri("/api/v1/y-tree/full").body(Body::empty()).unwrap(); + let with_inm = |etag: &str| { + Request::builder().uri("/api/v1/y-tree/full").header(header::IF_NONE_MATCH, etag).body(Body::empty()).unwrap() + }; + + // 1) First fetch: 200 + ETag + Last-Modified. + let r1 = app.clone().oneshot(plain()).await.unwrap(); + assert_eq!(r1.status(), StatusCode::OK); + let etag = r1.headers().get(header::ETAG).unwrap().to_str().unwrap().to_string(); + assert!(r1.headers().contains_key(header::LAST_MODIFIED)); + + // 2) Revalidate with the ETag: 304, empty body. + let r2 = app.clone().oneshot(with_inm(&etag)).await.unwrap(); + assert_eq!(r2.status(), StatusCode::NOT_MODIFIED); + assert_eq!(r2.headers().get(header::ETAG).unwrap().to_str().unwrap(), etag); + assert!(to_bytes(r2.into_body(), usize::MAX).await.unwrap().is_empty(), "304 carries no body"); + + // 3) Bump the revision → ETag changes → the old validator no longer matches. + du_db::tree_revision::bump(&pool).await.expect("bump"); + let r3 = app.clone().oneshot(with_inm(&etag)).await.unwrap(); + assert_eq!(r3.status(), StatusCode::OK, "stale validator → full payload"); + let etag3 = r3.headers().get(header::ETAG).unwrap().to_str().unwrap().to_string(); + assert_ne!(etag3, etag, "ETag advanced with the revision"); + + // 4) /version reports the current revision/ETag without a body fetch. + let rv = app.clone().oneshot(Request::builder().uri("/api/v1/y-tree/version").body(Body::empty()).unwrap()).await.unwrap(); + assert_eq!(rv.status(), StatusCode::OK); + let vbody = to_bytes(rv.into_body(), usize::MAX).await.unwrap(); + let v: serde_json::Value = serde_json::from_slice(&vbody).unwrap(); + assert_eq!(v["etag"].as_str().unwrap(), etag3); + assert!(v["revision"].as_i64().unwrap() >= 2); + } + + #[test] + fn sequencer_lab_dto_serializes_snake_case() { + use super::SequencerLabDto; + let dto = SequencerLabDto { + instrument_id: "A00123".into(), + lab_name: "Nebula Genomics".into(), + is_d2c: true, + manufacturer: Some("Illumina".into()), + model_name: Some("NovaSeq 6000".into()), + website_url: Some("https://nebula.org".into()), + }; + let v = serde_json::to_value(dto).unwrap(); + assert_eq!(v["instrument_id"], "A00123"); + assert_eq!(v["lab_name"], "Nebula Genomics"); + assert_eq!(v["is_d2c"], true); + assert_eq!(v["model_name"], "NovaSeq 6000"); + assert_eq!(v["website_url"], "https://nebula.org"); + } + + /// Sequencer endpoints over HTTP (routing + error mapping). Against an empty + /// catalog: an unknown instrument → 404, the bulk list → 200 with `[]`. + /// (The 200-with-data resolution is covered by `du-db/tests/sequencer.rs`.) + #[tokio::test] + async fn sequencer_endpoints_route_and_404() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping sequencer endpoint test"); + return; + }; + use axum::body::{to_bytes, Body}; + use axum::http::{Request, StatusCode}; + use tower::ServiceExt; + + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let state = super::AppState { pool: db.pool().clone(), key: tower_cookies::Key::generate(), oauth: None }; + let app = super::router().with_state(state); + + let r404 = app.clone().oneshot(Request::builder().uri("/api/v1/sequencer/lab?instrument_id=NOPE").body(Body::empty()).unwrap()).await.unwrap(); + assert_eq!(r404.status(), StatusCode::NOT_FOUND); + + let rl = app.clone().oneshot(Request::builder().uri("/api/v1/sequencer/lab-instruments").body(Body::empty()).unwrap()).await.unwrap(); + assert_eq!(rl.status(), StatusCode::OK); + let list: serde_json::Value = serde_json::from_slice(&to_bytes(rl.into_body(), usize::MAX).await.unwrap()).unwrap(); + // The bulk list carries the 0038-seeded YDNA-Warehouse ties (≥ 36). + let items = list.as_array().unwrap(); + assert!(items.len() >= 36); + assert!(items.iter().any(|i| i["instrument_id"] == "A00186" && i["lab_name"] == "Family Tree DNA")); + } + + /// Discovery proposal endpoints over HTTP: an unknown id → 404, the list → 200 + /// with an empty paginated body against an empty catalog. + #[tokio::test] + async fn discovery_endpoints_route_and_404() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping discovery endpoint test"); + return; + }; + use axum::body::{to_bytes, Body}; + use axum::http::{Request, StatusCode}; + use tower::ServiceExt; + + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let state = super::AppState { pool: db.pool().clone(), key: tower_cookies::Key::generate(), oauth: None }; + let app = super::router().with_state(state); + + let r404 = app.clone().oneshot(Request::builder().uri("/api/v1/discovery/proposals/999999").body(Body::empty()).unwrap()).await.unwrap(); + assert_eq!(r404.status(), StatusCode::NOT_FOUND); + + let rl = app.clone().oneshot(Request::builder().uri("/api/v1/discovery/proposals?type=Y_DNA").body(Body::empty()).unwrap()).await.unwrap(); + assert_eq!(rl.status(), StatusCode::OK); + let page: serde_json::Value = serde_json::from_slice(&to_bytes(rl.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert_eq!(page["total"], 0); + assert!(page["items"].as_array().unwrap().is_empty()); + } + + /// Test-type endpoints over HTTP: an unknown code → 404, the list → 200 `[]` + /// against an unseeded catalog. + #[tokio::test] + async fn test_type_endpoints_route_and_404() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping test-type endpoint test"); + return; + }; + use axum::body::{to_bytes, Body}; + use axum::http::{Request, StatusCode}; + use tower::ServiceExt; + + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let state = super::AppState { pool: db.pool().clone(), key: tower_cookies::Key::generate(), oauth: None }; + let app = super::router().with_state(state); + + let r404 = app.clone().oneshot(Request::builder().uri("/api/v1/test-types/NOPE").body(Body::empty()).unwrap()).await.unwrap(); + assert_eq!(r404.status(), StatusCode::NOT_FOUND); + + let rl = app.clone().oneshot(Request::builder().uri("/api/v1/test-types").body(Body::empty()).unwrap()).await.unwrap(); + assert_eq!(rl.status(), StatusCode::OK); + let list: serde_json::Value = serde_json::from_slice(&to_bytes(rl.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert!(list.as_array().unwrap().is_empty()); + } +} diff --git a/rust/crates/du-web/src/api/tree.rs b/rust/crates/du-web/src/api/tree.rs new file mode 100644 index 00000000..929598c7 --- /dev/null +++ b/rust/crates/du-web/src/api/tree.rs @@ -0,0 +1,217 @@ +//! Tree API: haplotree assembly from `du-db` subtree rows, ETag/conditional-GET +//! cache revalidation, and the `/api/v1/{y,mt}-tree[...]` handlers. Wire DTOs live +//! in [`super::dto`]; the router + OpenAPI doc that mount these live in [`super`]. + +use super::dto::{HaplogroupNodeDto, LeafSampleDto, LeafSamplesDto, RootParams, TreeDto, TreeVersionDto, VariantDto}; +use crate::error::AppError; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::http::{header, HeaderMap, StatusCode}; +use axum::response::{IntoResponse, Response}; +use axum::Json; +use du_domain::enums::DnaType; +use std::collections::HashMap; + +// ── tree assembly ──────────────────────────────────────────────────────────── + +fn assemble_forest( + nodes: Vec, + variants: &HashMap>, + counts: &HashMap, +) -> Vec { + let mut by_parent: HashMap, Vec> = HashMap::new(); + for n in nodes { + by_parent.entry(n.parent_id).or_default().push(n); + } + build_level(None, &by_parent, variants, counts, 0) +} + +fn build_level( + parent: Option, + by_parent: &HashMap, Vec>, + variants: &HashMap>, + counts: &HashMap, + depth: u16, +) -> Vec { + // Depth guard: tree-merge data can contain cycles; cap recursion defensively. + if depth > 256 { + return Vec::new(); + } + let mut kids = match by_parent.get(&parent) { + Some(k) => k.iter().collect::>(), + None => return Vec::new(), + }; + kids.sort_by(|a, b| a.name.cmp(&b.name)); + kids.into_iter() + .map(|n| { + let children = build_level(Some(n.id), by_parent, variants, counts, depth + 1); + // Cumulative: this node's own placed leaves + everything under its children. + let sample_count = + counts.get(&n.id).copied().unwrap_or(0) + children.iter().map(|c| c.sample_count).sum::(); + HaplogroupNodeDto { + id: n.id, + name: n.name.clone(), + haplogroup_type: n.haplogroup_type.clone(), + formed_ybp: n.formed_ybp, + tmrca_ybp: n.tmrca_ybp, + sample_count, + variants: variants.get(&n.id).cloned().unwrap_or_default(), + children, + } + }) + .collect() +} + +async fn build_tree(st: &AppState, dna: DnaType, root: Option<&str>) -> Result { + let nodes = du_db::haplogroup::subtree(&st.pool, dna, root).await?; + let counts = du_db::tree_sample::counts_by_node(&st.pool, dna).await?; + Ok(TreeDto { roots: assemble_forest(nodes, &HashMap::new(), &counts) }) +} + +/// Like [`build_tree`] but embeds each node's defining variants (with multi-build +/// coordinates) — one payload a client can build a placement tree from without per-node +/// fetches. Variants are loaded for the whole lineage in one query and grouped by node. +async fn build_tree_full(st: &AppState, dna: DnaType, root: Option<&str>) -> Result { + let nodes = du_db::haplogroup::subtree(&st.pool, dna, root).await?; + let mut variants: HashMap> = HashMap::new(); + for (hid, v) in du_db::variant::for_dna_type_grouped(&st.pool, dna).await? { + variants.entry(hid).or_default().push(VariantDto::from(v)); + } + let counts = du_db::tree_sample::counts_by_node(&st.pool, dna).await?; + Ok(TreeDto { roots: assemble_forest(nodes, &variants, &counts) }) +} + +// ── tree cache revalidation (ETag / conditional GET) ───────────────────────── + +/// The cache token for a tree representation. Strong ETag keyed on the persisted +/// tree revision (`du_db::tree_revision`) plus the things that vary the payload: +/// full-vs-plain, dna type, and subtree root. The revision is bumped by every +/// tree-mutating op (topology, variant set, coordinate enrichment, naming), so a +/// matching `If-None-Match` is a safe 304. +pub(crate) fn tree_etag(full: bool, dna: DnaType, root: Option<&str>, revision: i64) -> String { + let shape = if full { "full" } else { "plain" }; + let dna = if matches!(dna, DnaType::YDna) { "y" } else { "mt" }; + format!("\"{shape}-{dna}-{}-r{revision}\"", root.unwrap_or("*")) +} + +/// Whether the request's `If-None-Match` matches our current `etag` (a `*` +/// wildcard or a comma-separated list of strong validators). +pub(crate) fn if_none_match(headers: &HeaderMap, etag: &str) -> bool { + let Some(val) = headers.get(header::IF_NONE_MATCH).and_then(|v| v.to_str().ok()) else { + return false; + }; + val.split(',').map(str::trim).any(|t| t == "*" || t == etag) +} + +/// HTTP-date (`Last-Modified`) for a revision timestamp. +fn http_date(ts: chrono::DateTime) -> String { + ts.format("%a, %d %b %Y %H:%M:%S GMT").to_string() +} + +/// Conditional GET for a tree endpoint: read the cheap revision marker, build the +/// ETag, and short-circuit to **304** when `If-None-Match` matches — *before* the +/// expensive tree query/serialization. Otherwise build the payload and attach the +/// `ETag` / `Last-Modified` / `Cache-Control: no-cache` headers. +async fn tree_conditional( + st: &AppState, + headers: &HeaderMap, + dna: DnaType, + root: Option<&str>, + full: bool, +) -> Result { + let (revision, updated_at) = du_db::tree_revision::current(&st.pool).await?; + let etag = tree_etag(full, dna, root, revision); + let last_modified = http_date(updated_at); + let cache_headers = [ + (header::ETAG, etag.clone()), + (header::LAST_MODIFIED, last_modified), + (header::CACHE_CONTROL, "no-cache".to_string()), + ]; + if if_none_match(headers, &etag) { + return Ok((StatusCode::NOT_MODIFIED, cache_headers).into_response()); + } + let dto = if full { build_tree_full(st, dna, root).await? } else { build_tree(st, dna, root).await? }; + Ok((StatusCode::OK, cache_headers, Json(dto)).into_response()) +} + +/// The `/…-tree/version` body: revision + the full-tree ETag, so the Edge can +/// check the version (and prime an `If-None-Match`) without fetching the tree. +async fn tree_version(st: &AppState, dna: DnaType) -> Result, AppError> { + let (revision, updated_at) = du_db::tree_revision::current(&st.pool).await?; + Ok(Json(TreeVersionDto { + revision, + etag: tree_etag(true, dna, None, revision), + updated_at: updated_at.to_rfc3339(), + })) +} + +// ── handlers ───────────────────────────────────────────────────────────────── + +#[utoipa::path(get, path = "/api/v1/y-tree", params(RootParams), tag = "tree", + responses((status = 200, description = "Y-chromosome haplogroup tree", body = TreeDto), + (status = 304, description = "Not modified (ETag matched If-None-Match)")))] +pub(crate) async fn y_tree(State(st): State, headers: HeaderMap, Query(q): Query) -> Result { + tree_conditional(&st, &headers, DnaType::YDna, q.root(), false).await +} + +#[utoipa::path(get, path = "/api/v1/mt-tree", params(RootParams), tag = "tree", + responses((status = 200, description = "Mitochondrial haplogroup tree", body = TreeDto), + (status = 304, description = "Not modified (ETag matched If-None-Match)")))] +pub(crate) async fn mt_tree(State(st): State, headers: HeaderMap, Query(q): Query) -> Result { + tree_conditional(&st, &headers, DnaType::MtDna, q.root(), false).await +} + +#[utoipa::path(get, path = "/api/v1/y-tree/full", params(RootParams), tag = "tree", + responses((status = 200, description = "Y-chromosome haplogroup tree with per-node defining variants", body = TreeDto), + (status = 304, description = "Not modified (ETag matched If-None-Match)")))] +pub(crate) async fn y_tree_full(State(st): State, headers: HeaderMap, Query(q): Query) -> Result { + tree_conditional(&st, &headers, DnaType::YDna, q.root(), true).await +} + +#[utoipa::path(get, path = "/api/v1/mt-tree/full", params(RootParams), tag = "tree", + responses((status = 200, description = "Mitochondrial haplogroup tree with per-node defining variants", body = TreeDto), + (status = 304, description = "Not modified (ETag matched If-None-Match)")))] +pub(crate) async fn mt_tree_full(State(st): State, headers: HeaderMap, Query(q): Query) -> Result { + tree_conditional(&st, &headers, DnaType::MtDna, q.root(), true).await +} + +#[utoipa::path(get, path = "/api/v1/y-tree/version", tag = "tree", + responses((status = 200, description = "Current Y-tree revision + ETag (cheap cache-revalidation probe)", body = TreeVersionDto)))] +pub(crate) async fn y_tree_version(State(st): State) -> Result, AppError> { + tree_version(&st, DnaType::YDna).await +} + +#[utoipa::path(get, path = "/api/v1/mt-tree/version", tag = "tree", + responses((status = 200, description = "Current mt-tree revision + ETag (cheap cache-revalidation probe)", body = TreeVersionDto)))] +pub(crate) async fn mt_tree_version(State(st): State) -> Result, AppError> { + tree_version(&st, DnaType::MtDna).await +} + +async fn node_samples(st: &AppState, dna: DnaType, name: &str) -> Result, AppError> { + // Resolve the requested name/SNP to a canonical node, then list its at-or-below leaves. + let Some(node) = du_db::haplogroup::resolve_name_or_variant(&st.pool, name, dna).await? else { + return Err(AppError::NotFound(format!("haplogroup {name}"))); + }; + let items = du_db::tree_sample::samples_under(&st.pool, &node, dna) + .await? + .into_iter() + .map(LeafSampleDto::from) + .collect(); + Ok(Json(LeafSamplesDto { items })) +} + +#[utoipa::path(get, path = "/api/v1/y-tree/node/{name}/samples", + params(("name" = String, Path, description = "Haplogroup name or defining SNP")), tag = "tree", + responses((status = 200, description = "Non-D2C sample leaves at or below the Y node", body = LeafSamplesDto), + (status = 404, description = "Unknown haplogroup")))] +pub(crate) async fn y_node_samples(State(st): State, Path(name): Path) -> Result, AppError> { + node_samples(&st, DnaType::YDna, &name).await +} + +#[utoipa::path(get, path = "/api/v1/mt-tree/node/{name}/samples", + params(("name" = String, Path, description = "Haplogroup name or defining variant")), tag = "tree", + responses((status = 200, description = "Non-D2C sample leaves at or below the mt node", body = LeafSamplesDto), + (status = 404, description = "Unknown haplogroup")))] +pub(crate) async fn mt_node_samples(State(st): State, Path(name): Path) -> Result, AppError> { + node_samples(&st, DnaType::MtDna, &name).await +} diff --git a/rust/crates/du-web/src/auth.rs b/rust/crates/du-web/src/auth.rs new file mode 100644 index 00000000..8a7fdfd8 --- /dev/null +++ b/rust/crates/du-web/src/auth.rs @@ -0,0 +1,130 @@ +//! Session auth: password hashing/verification, the signed-cookie session, and +//! request extractors. Federated (AT Protocol) login lands later in du-atproto; +//! this covers local credential login + RBAC. + +use crate::state::AppState; +use argon2::password_hash::{rand_core::OsRng, PasswordHash, PasswordHasher, PasswordVerifier, SaltString}; +use argon2::Argon2; +use axum::extract::FromRequestParts; +use axum::http::request::Parts; +use axum::response::{IntoResponse, Redirect, Response}; +use serde::{Deserialize, Serialize}; +use tower_cookies::Cookies; +use uuid::Uuid; + +pub const SESSION_COOKIE: &str = "session"; + +/// Hash a new password with Argon2id. +pub fn hash_password(password: &str) -> Result { + let salt = SaltString::generate(&mut OsRng); + Argon2::default() + .hash_password(password.as_bytes(), &salt) + .map(|h| h.to_string()) + .map_err(|e| e.to_string()) +} + +/// Verify a password against a stored hash. Supports Argon2 (new) and bcrypt +/// (legacy) hashes, dispatching on the hash prefix. +pub fn verify_password(password: &str, hash: &str) -> bool { + if hash.starts_with("$argon2") { + PasswordHash::new(hash) + .map(|parsed| Argon2::default().verify_password(password.as_bytes(), &parsed).is_ok()) + .unwrap_or(false) + } else if hash.starts_with("$2") { + bcrypt::verify(password, hash).unwrap_or(false) + } else { + false + } +} + +/// Authenticated session, stored in a signed cookie. +#[derive(Clone, Serialize, Deserialize)] +pub struct Session { + pub user_id: Uuid, + pub display_name: String, + pub roles: Vec, +} + +impl Session { + pub fn has_role(&self, role: &str) -> bool { + self.roles.iter().any(|r| r == role) + } + #[allow(dead_code)] // part of the Session API; used by admin-only routes to come + pub fn is_admin(&self) -> bool { + self.has_role("Admin") + } + /// May use the curator tools. + pub fn is_curator(&self) -> bool { + self.has_role("Admin") || self.has_role("TreeCurator") || self.has_role("Curator") + } +} + +fn read_session(cookies: &Cookies, state: &AppState) -> Option { + let value = cookies.signed(&state.key).get(SESSION_COOKIE)?; + serde_json::from_str(value.value()).ok() +} + +/// Minimal user info the shared navbar needs. +pub struct NavUser { + pub display_name: String, + pub is_curator: bool, +} + +/// Optional current user — never rejects. +pub struct MaybeUser(pub Option); + +impl MaybeUser { + /// Navbar view of the current user, if signed in. + pub fn nav(&self) -> Option { + self.0.as_ref().map(|s| NavUser { + display_name: s.display_name.clone(), + is_curator: s.is_curator(), + }) + } +} + +#[axum::async_trait] +impl FromRequestParts for MaybeUser { + type Rejection = std::convert::Infallible; + + async fn from_request_parts(parts: &mut Parts, state: &AppState) -> Result { + let cookies = Cookies::from_request_parts(parts, state).await.expect("cookie layer present"); + Ok(MaybeUser(read_session(&cookies, state))) + } +} + +/// A session with curator privileges, or a redirect to /login. Use as a handler +/// argument to gate curator routes. +pub struct Curator(pub Session); + +#[axum::async_trait] +impl FromRequestParts for Curator { + type Rejection = Response; + + async fn from_request_parts(parts: &mut Parts, state: &AppState) -> Result { + let cookies = Cookies::from_request_parts(parts, state).await.expect("cookie layer present"); + match read_session(&cookies, state) { + Some(s) if s.is_curator() => Ok(Curator(s)), + Some(_) => Err(crate::error::AppError::Forbidden.into_response()), + None => Err(Redirect::to("/login").into_response()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn argon2_hash_roundtrips_and_rejects_wrong() { + let h = hash_password("correct horse").unwrap(); + assert!(h.starts_with("$argon2")); + assert!(verify_password("correct horse", &h)); + assert!(!verify_password("Tr0ub4dor", &h)); + } + + #[test] + fn unknown_hash_format_is_rejected() { + assert!(!verify_password("x", "plaintext")); + } +} diff --git a/rust/crates/du-web/src/error.rs b/rust/crates/du-web/src/error.rs new file mode 100644 index 00000000..d3a5aa64 --- /dev/null +++ b/rust/crates/du-web/src/error.rs @@ -0,0 +1,49 @@ +//! Handler error type. Maps data-layer failures to HTTP responses. + +use axum::http::StatusCode; +use axum::response::{IntoResponse, Response}; + +pub enum AppError { + Db(du_db::DbError), + NotFound(String), + Forbidden, + /// A user-facing validation/conflict message rendered as 422. + BadRequest(String), + /// An upstream/federation call failed (DID resolution, PDS, OAuth) — 502. + Upstream(String), +} + +impl From for AppError { + fn from(e: du_db::DbError) -> Self { + // A surfaced precondition/uniqueness conflict is a client error (422), + // not a 500. + match e { + du_db::DbError::Conflict(msg) => AppError::BadRequest(msg), + other => AppError::Db(other), + } + } +} + +impl From for AppError { + fn from(e: du_atproto::AtprotoError) -> Self { + AppError::Upstream(e.to_string()) + } +} + +impl IntoResponse for AppError { + fn into_response(self) -> Response { + match self { + AppError::Db(e) => { + tracing::error!(error = %e, "database error"); + (StatusCode::INTERNAL_SERVER_ERROR, "internal server error").into_response() + } + AppError::NotFound(what) => (StatusCode::NOT_FOUND, format!("not found: {what}")).into_response(), + AppError::Forbidden => (StatusCode::FORBIDDEN, "forbidden").into_response(), + AppError::BadRequest(msg) => (StatusCode::UNPROCESSABLE_ENTITY, msg).into_response(), + AppError::Upstream(msg) => { + tracing::warn!(error = %msg, "upstream/federation error"); + (StatusCode::BAD_GATEWAY, "upstream error").into_response() + } + } + } +} diff --git a/rust/crates/du-web/src/htmx.rs b/rust/crates/du-web/src/htmx.rs new file mode 100644 index 00000000..23851259 --- /dev/null +++ b/rust/crates/du-web/src/htmx.rs @@ -0,0 +1,110 @@ +//! HTMX request/response plumbing for server-driven hypermedia (plan §4). + +use axum::extract::FromRequestParts; +use axum::http::header::{HeaderName, HeaderValue}; +use axum::http::request::Parts; +use axum::response::{IntoResponseParts, ResponseParts}; +use std::convert::Infallible; + +/// Whether the request came from HTMX, and whether it is a history-restore. +/// +/// `wants_fragment()` is the negotiation rule: serve just the inner fragment for +/// HTMX-driven swaps, but serve the FULL page for normal navigations AND for +/// htmx history restoration (back/forward), which expects the whole document. +pub struct HxRequest { + pub is_htmx: bool, + pub is_history_restore: bool, + /// The `HX-Target` element id, if the swap names one. + pub target: Option, +} + +impl HxRequest { + /// Serve the inner fragment only when this is an HTMX swap aimed at the + /// given target id — NOT for boosted full-page navigations (which target the + /// body and expect a whole document) nor history restoration. + pub fn wants_fragment_for(&self, target_id: &str) -> bool { + self.is_htmx + && !self.is_history_restore + && self.target.as_deref() == Some(target_id) + } +} + +#[axum::async_trait] +impl FromRequestParts for HxRequest { + type Rejection = Infallible; + + async fn from_request_parts(parts: &mut Parts, _: &S) -> Result { + let has = |name: &str| parts.headers.get(name).is_some_and(|v| v == "true"); + let target = parts + .headers + .get("hx-target") + .and_then(|v| v.to_str().ok()) + .map(str::to_owned); + Ok(HxRequest { + is_htmx: has("hx-request"), + is_history_restore: has("hx-history-restore-request"), + target, + }) + } +} + +/// Builder for HTMX response headers, so state transitions are server-driven +/// (HX-Push-Url / HX-Trigger / HX-Redirect / HX-Location / HX-Reswap). +#[derive(Default)] +pub struct HxHeaders { + push_url: Option, + trigger: Option, + redirect: Option, + location: Option, + reswap: Option, +} + +// Builder surface kept complete for upcoming write flows (curator CRUD, forms); +// only push_url is exercised by the read-only slice so far. +#[allow(dead_code)] +impl HxHeaders { + pub fn new() -> Self { + Self::default() + } + pub fn push_url(mut self, url: impl Into) -> Self { + self.push_url = Some(url.into()); + self + } + pub fn trigger(mut self, event: impl Into) -> Self { + self.trigger = Some(event.into()); + self + } + pub fn redirect(mut self, url: impl Into) -> Self { + self.redirect = Some(url.into()); + self + } + pub fn location(mut self, url: impl Into) -> Self { + self.location = Some(url.into()); + self + } + pub fn reswap(mut self, spec: impl Into) -> Self { + self.reswap = Some(spec.into()); + self + } +} + +impl IntoResponseParts for HxHeaders { + type Error = Infallible; + + fn into_response_parts(self, mut res: ResponseParts) -> Result { + let headers = res.headers_mut(); + let mut set = |name: &'static str, val: Option| { + if let Some(v) = val { + if let Ok(hv) = HeaderValue::from_str(&v) { + headers.insert(HeaderName::from_static(name), hv); + } + } + }; + set("hx-push-url", self.push_url); + set("hx-trigger", self.trigger); + set("hx-redirect", self.redirect); + set("hx-location", self.location); + set("hx-reswap", self.reswap); + Ok(res) + } +} diff --git a/rust/crates/du-web/src/i18n.rs b/rust/crates/du-web/src/i18n.rs new file mode 100644 index 00000000..5bb33cac --- /dev/null +++ b/rust/crates/du-web/src/i18n.rs @@ -0,0 +1,188 @@ +//! Lightweight i18n: Play-style `key=value` catalogs embedded at compile time, +//! a `Lang` + `T` translator, and a `Locale` extractor that resolves the active +//! language from the `lang` cookie then `Accept-Language` (default English). +//! +//! Replaces Play's `messages`/`Messages`. Keeping it dependency-free (no fluent) +//! matches the catalog format the project already used. + +use axum::extract::FromRequestParts; +use axum::http::header::{ACCEPT_LANGUAGE, COOKIE}; +use axum::http::request::Parts; +use std::collections::HashMap; +use std::convert::Infallible; +use std::sync::OnceLock; + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum Lang { + En, + Es, + Fr, +} + +impl Lang { + pub fn code(self) -> &'static str { + match self { + Lang::En => "en", + Lang::Es => "es", + Lang::Fr => "fr", + } + } + + pub fn parse(s: &str) -> Option { + match s.get(0..2).map(str::to_ascii_lowercase).as_deref() { + Some("en") => Some(Lang::En), + Some("es") => Some(Lang::Es), + Some("fr") => Some(Lang::Fr), + _ => None, + } + } + + /// All languages, for rendering the switcher. + pub fn all() -> [Lang; 3] { + [Lang::En, Lang::Es, Lang::Fr] + } +} + +const EN_SRC: &str = include_str!("../../../locales/en.txt"); +const ES_SRC: &str = include_str!("../../../locales/es.txt"); +const FR_SRC: &str = include_str!("../../../locales/fr.txt"); + +fn parse_catalog(src: &'static str) -> HashMap<&'static str, &'static str> { + src.lines() + .filter_map(|line| { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + return None; + } + line.split_once('=').map(|(k, v)| (k.trim(), v.trim())) + }) + .collect() +} + +fn catalog(lang: Lang) -> &'static HashMap<&'static str, &'static str> { + static EN: OnceLock> = OnceLock::new(); + static ES: OnceLock> = OnceLock::new(); + static FR: OnceLock> = OnceLock::new(); + match lang { + Lang::En => EN.get_or_init(|| parse_catalog(EN_SRC)), + Lang::Es => ES.get_or_init(|| parse_catalog(ES_SRC)), + Lang::Fr => FR.get_or_init(|| parse_catalog(FR_SRC)), + } +} + +/// Translator for one language. Cheap to copy. +#[derive(Clone, Copy)] +pub struct T { + pub lang: Lang, +} + +impl T { + pub fn new(lang: Lang) -> Self { + T { lang } + } + + /// Look up a key in the active language, falling back to English, then to + /// the key itself. Returned slices are `'static` (from the embedded catalogs) + /// or the borrowed key, so no allocation. + pub fn get<'a>(&self, key: &'a str) -> &'a str { + catalog(self.lang) + .get(key) + .or_else(|| catalog(Lang::En).get(key)) + .copied() + .unwrap_or(key) + } + + /// True when `lang` is the active language (for highlighting the switcher). + pub fn is(&self, lang: Lang) -> bool { + self.lang == lang + } + + /// Options for the language switcher: (code, localized label, active). + pub fn languages(&self) -> Vec { + Lang::all() + .into_iter() + .map(|l| LangOption { + code: l.code(), + label: self.get(match l { + Lang::En => "lang.en", + Lang::Es => "lang.es", + Lang::Fr => "lang.fr", + }), + active: self.is(l), + }) + .collect() + } +} + +pub struct LangOption { + pub code: &'static str, + pub label: &'static str, + pub active: bool, +} + +/// Per-request locale: the translator plus the current path (percent-encoded) +/// so the language switcher can return the user to the same page. +pub struct Locale { + pub t: T, + /// Current path+query, percent-encoded for use as a `?next=` value. + pub next: String, +} + +fn lang_from_cookie(parts: &Parts) -> Option { + let raw = parts.headers.get(COOKIE)?.to_str().ok()?; + raw.split(';') + .filter_map(|kv| kv.trim().split_once('=')) + .find(|(k, _)| *k == "lang") + .and_then(|(_, v)| Lang::parse(v)) +} + +fn lang_from_accept(parts: &Parts) -> Option { + let raw = parts.headers.get(ACCEPT_LANGUAGE)?.to_str().ok()?; + // First tag wins (ignore q-weights for our small set). + raw.split(',').next().and_then(|tag| Lang::parse(tag.trim())) +} + +#[axum::async_trait] +impl FromRequestParts for Locale { + type Rejection = Infallible; + + async fn from_request_parts(parts: &mut Parts, _: &S) -> Result { + let lang = lang_from_cookie(parts) + .or_else(|| lang_from_accept(parts)) + .unwrap_or(Lang::En); + let path_q = parts + .uri + .path_and_query() + .map(|pq| pq.as_str()) + .unwrap_or("/"); + let next = percent_encoding::utf8_percent_encode( + path_q, + percent_encoding::NON_ALPHANUMERIC, + ) + .to_string(); + Ok(Locale { t: T::new(lang), next }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn fallback_chain_en_then_key() { + let es = T::new(Lang::Es); + assert_eq!(es.get("nav.home"), "Inicio"); + // a key only present implicitly falls back to English, then the key. + assert_eq!(es.get("does.not.exist"), "does.not.exist"); + } + + #[test] + fn catalogs_share_keys_with_english() { + let en = catalog(Lang::En); + for lang in [Lang::Es, Lang::Fr] { + for k in en.keys() { + assert!(catalog(lang).contains_key(k), "{} missing key {k}", lang.code()); + } + } + } +} diff --git a/rust/crates/du-web/src/main.rs b/rust/crates/du-web/src/main.rs new file mode 100644 index 00000000..7f26bbd8 --- /dev/null +++ b/rust/crates/du-web/src/main.rs @@ -0,0 +1,77 @@ +//! DecodingUs web binary (Axum). HTML + (later) JSON API + firehose. +//! +//! Public surface (trees, variants, references, map, coverage) plus session +//! auth and the curator tools. Server-rendered with Askama, driven by HTMX. +//! +//! Dev helper: `decodingus hash-password ` prints an Argon2 hash for +//! seeding `ident.user_login_info.password_hash`. + +use std::net::SocketAddr; +use tower_cookies::Key; + +mod api; +mod auth; +mod error; +mod htmx; +mod i18n; +mod render; +mod oauth; +mod routes; +mod sig; +mod state; +mod tree_layout; + +use state::AppState; + +/// Derive a 64-byte cookie-signing key from APP_SECRET (extending short secrets +/// so dev defaults work; set a long random APP_SECRET in production). +fn cookie_key() -> Key { + let secret = std::env::var("APP_SECRET") + .unwrap_or_else(|_| "dev-insecure-app-secret-change-me-in-production".to_string()); + let mut seed = secret.into_bytes(); + let base = seed.clone(); + while seed.len() < 64 { + seed.extend_from_slice(&base); + } + Key::from(&seed[..64]) +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + // Dev helper: hash a password and exit (no DB needed). + let args: Vec = std::env::args().collect(); + if args.get(1).map(String::as_str) == Some("hash-password") { + let pw = args.get(2).cloned().unwrap_or_default(); + println!("{}", auth::hash_password(&pw).map_err(anyhow::Error::msg)?); + return Ok(()); + } + + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "info,du_web=debug".into()), + ) + .init(); + + let app = match std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) { + Some(url) => { + let pool = du_db::connect(&url, 8).await?; + du_db::run_migrations(&pool).await?; + let oauth = oauth::OauthClient::from_env(); + tracing::info!(oauth = oauth.is_some(), "connected to database; migrations applied"); + routes::app(AppState { pool, key: cookie_key(), oauth }) + } + None => { + tracing::warn!("DATABASE_URL not set — serving /health only"); + routes::health_only() + } + }; + + let port: u16 = std::env::var("PORT").ok().and_then(|p| p.parse().ok()).unwrap_or(9000); + let addr = SocketAddr::from(([0, 0, 0, 0], port)); + tracing::info!(%addr, "decodingus web starting"); + + let listener = tokio::net::TcpListener::bind(addr).await?; + axum::serve(listener, app).await?; + Ok(()) +} diff --git a/rust/crates/du-web/src/oauth.rs b/rust/crates/du-web/src/oauth.rs new file mode 100644 index 00000000..c6bf5022 --- /dev/null +++ b/rust/crates/du-web/src/oauth.rs @@ -0,0 +1,413 @@ +//! AT Protocol OAuth client wiring. Serves the client-metadata + JWKS documents +//! (concrete artifacts the Edge team registers/reviews) and drives the login +//! handshake: resolve handle -> DID -> PDS -> authorization server, PAR, redirect, +//! then token exchange on callback and session establishment. +//! +//! The interactive flow needs a live PDS/authorization server, so it is verified +//! jointly with the Edge team. See docs/atproto-oauth-findings.md. + +use crate::auth::{Session, SESSION_COOKIE}; +use crate::error::AppError; +use crate::state::AppState; +use axum::extract::{Query, State}; +use axum::response::{IntoResponse, Redirect, Response}; +use axum::routing::get; +use axum::{Json, Router}; +use du_atproto::did::Did; +use du_atproto::oauth::{ + authorize_url, client_assertion, discover_auth_server, dpop_proof, par_form, par_form_public, + token_form, token_form_public, AuthServerMetadata, ClientMetadata, EcKey, Pkce, +}; +use du_atproto::Resolver; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; +use tower_cookies::{Cookie, Cookies}; + +const FLOW_COOKIE: &str = "du_oauth_flow"; + +/// Configured AT Protocol OAuth client (absent when OAuth isn't configured). +pub struct OauthClient { + pub ec_key: EcKey, + pub metadata: ClientMetadata, + pub scope: String, + pub http: reqwest::Client, + pub resolver: Resolver, + /// Dev-only: a fixed PDS to use as the authorization server (`DU_OAUTH_DEV_PDS`), + /// bypassing handle→DID→PDS resolution. Enables the `/login/atproto/dev` + /// public (loopback) client flow against a local PDS. + pub dev_pds: Option, + /// Dev-only: the loopback redirect URI for the public client + /// (`DU_OAUTH_LOOPBACK` + `/oauth/callback`). + pub loopback_redirect: Option, +} + +impl OauthClient { + /// Build from env. Returns None (OAuth disabled) when `OAUTH_BASE_URL` is unset. + pub fn from_env() -> Option> { + let base_url = std::env::var("OAUTH_BASE_URL").ok().filter(|s| !s.is_empty())?; + let scope = std::env::var("OAUTH_SCOPE") + .unwrap_or_else(|_| "atproto transition:generic".to_string()); + let ec_key = match std::env::var("OAUTH_EC_KEY") { + Ok(b64) => match EcKey::from_base64(&b64) { + Ok(k) => k, + Err(e) => { + tracing::error!(error = %e, "OAUTH_EC_KEY invalid; OAuth disabled"); + return None; + } + }, + Err(_) => { + let k = EcKey::generate(); + tracing::warn!( + "OAUTH_EC_KEY unset — generated an ephemeral key (set OAUTH_EC_KEY={} to persist)", + k.to_base64() + ); + k + } + }; + let metadata = ClientMetadata::confidential_web(&base_url, &scope); + Some(Arc::new(OauthClient { + ec_key, + metadata, + scope, + http: build_http_client(), + resolver: Resolver::new(), + dev_pds: std::env::var("DU_OAUTH_DEV_PDS").ok().filter(|s| !s.is_empty()), + loopback_redirect: std::env::var("DU_OAUTH_LOOPBACK") + .ok() + .filter(|s| !s.is_empty()) + .map(|b| format!("{}/oauth/callback", b.trim_end_matches('/'))), + })) + } +} + +/// Build the OAuth HTTP client. In dev, optionally trust a local CA +/// (`DU_OAUTH_DEV_CA`, a PEM path) and pin a host→IP (`DU_OAUTH_DEV_RESOLVE`, +/// `host:ip`) so a TLS-proxied local PDS at its canonical `https://` name is +/// reachable without editing `/etc/hosts`. Plain default client otherwise. +fn build_http_client() -> reqwest::Client { + let mut builder = reqwest::Client::builder(); + if let Some(ca_path) = std::env::var("DU_OAUTH_DEV_CA").ok().filter(|s| !s.is_empty()) { + match std::fs::read(&ca_path).map_err(|e| e.to_string()).and_then(|pem| { + reqwest::Certificate::from_pem(&pem).map_err(|e| e.to_string()) + }) { + Ok(cert) => { + builder = builder.add_root_certificate(cert); + tracing::warn!(ca = %ca_path, "OAuth dev: trusting local CA"); + } + Err(e) => tracing::error!(error = %e, "DU_OAUTH_DEV_CA unreadable; ignoring"), + } + } + if let Some(spec) = std::env::var("DU_OAUTH_DEV_RESOLVE").ok().filter(|s| !s.is_empty()) { + if let Some((host, ip)) = spec.rsplit_once(':') { + if let Ok(addr) = format!("{ip}:443").parse::() { + builder = builder.resolve(host, addr); + tracing::warn!(%host, %ip, "OAuth dev: pinned host→IP for resolution"); + } + } + } + builder.build().unwrap_or_default() +} + +/// Minimal percent-encoding for embedding a redirect_uri in a loopback client_id. +fn pct(s: &str) -> String { + let mut out = String::new(); + for b in s.bytes() { + match b { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => out.push(b as char), + _ => out.push_str(&format!("%{b:02X}")), + } + } + out +} + +pub fn router() -> Router { + Router::new() + .route("/oauth/client-metadata.json", get(client_metadata)) + .route("/oauth/jwks.json", get(jwks)) + .route("/login/atproto", get(login)) + .route("/login/atproto/dev", get(login_dev)) + .route("/oauth/callback", get(callback)) +} + +fn now() -> i64 { + SystemTime::now().duration_since(UNIX_EPOCH).map(|d| d.as_secs() as i64).unwrap_or(0) +} + +fn require(st: &AppState) -> Result<&Arc, AppError> { + st.oauth.as_ref().ok_or_else(|| AppError::NotFound("OAuth not configured".into())) +} + +async fn client_metadata(State(st): State) -> Result { + let oc = require(&st)?; + Ok(Json(&oc.metadata).into_response()) +} + +async fn jwks(State(st): State) -> Result { + let oc = require(&st)?; + Ok(Json(serde_json::json!({ "keys": [oc.ec_key.public_jwk()] })).into_response()) +} + +#[derive(Deserialize)] +struct LoginQuery { + handle: String, +} + +#[derive(Serialize, Deserialize)] +struct FlowState { + state: String, + verifier: String, + token_endpoint: String, + issuer: String, + /// Public (loopback) client flow — token exchange uses PKCE without a + /// client assertion. Defaults to the confidential path for back-compat. + #[serde(default)] + public: bool, + /// The client_id used at PAR/authorize (loopback client_id for the public flow). + #[serde(default)] + client_id: String, + /// The redirect_uri registered with this flow. + #[serde(default)] + redirect_uri: String, +} + +/// POST a form with a DPoP proof, retrying once if the server demands a nonce. +async fn post_with_dpop( + oc: &OauthClient, + url: &str, + form: &[(String, String)], +) -> Result { + // First attempt: no nonce yet (the server supplies one via a 400 + DPoP-Nonce). + let proof = dpop_proof(&oc.ec_key, "POST", url, now(), None, None); + let resp = oc + .http + .post(url) + .header("DPoP", proof) + .form(form) + .send() + .await + .map_err(|e| AppError::Upstream(e.to_string()))?; + + if resp.status().is_success() { + return resp.json().await.map_err(|e| AppError::Upstream(e.to_string())); + } + + // Retry once with the server-supplied DPoP nonce, if it offered one. + let Some(server_nonce) = resp.headers().get("DPoP-Nonce").and_then(|v| v.to_str().ok()) else { + return Err(AppError::Upstream(format!("oauth endpoint {}: {}", url, resp.status()))); + }; + let proof = dpop_proof(&oc.ec_key, "POST", url, now(), Some(server_nonce), None); + let retry = oc + .http + .post(url) + .header("DPoP", proof) + .form(form) + .send() + .await + .map_err(|e| AppError::Upstream(e.to_string()))?; + if retry.status().is_success() { + return retry.json().await.map_err(|e| AppError::Upstream(e.to_string())); + } + Err(AppError::Upstream(format!("oauth endpoint {}: {}", url, retry.status()))) +} + +async fn resolve_pds_and_authserver( + oc: &OauthClient, + handle_or_did: &str, +) -> Result<(Did, AuthServerMetadata), AppError> { + let did = if handle_or_did.starts_with("did:") { + Did::parse(handle_or_did)? + } else { + oc.resolver.resolve_handle(handle_or_did).await? + }; + let pds = oc.resolver.resolve_pds(&did).await?; + let meta = discover_auth_server(&oc.http, &pds).await?; + Ok((did, meta)) +} + +/// Start the OAuth flow: PAR, then redirect the user to the authorization server. +async fn login( + State(st): State, + cookies: Cookies, + Query(q): Query, +) -> Result { + let oc = require(&st)?; + let handle = q.handle.trim(); + let (_did, meta) = resolve_pds_and_authserver(oc, handle).await?; + + let par_endpoint = meta + .pushed_authorization_request_endpoint + .clone() + .ok_or_else(|| AppError::Upstream("authorization server has no PAR endpoint".into()))?; + + let pkce = Pkce::generate(); + let state = du_atproto::oauth::random_token(); + let redirect_uri = oc.metadata.redirect_uris[0].clone(); + let assertion = client_assertion(&oc.ec_key, &oc.metadata.client_id, &meta.issuer, now()); + let form = par_form( + &oc.metadata.client_id, + &redirect_uri, + &oc.scope, + &state, + &pkce.challenge, + Some(handle), + &assertion, + ); + + let par: serde_json::Value = post_with_dpop(oc, &par_endpoint, &form).await?; + let request_uri = par + .get("request_uri") + .and_then(|v| v.as_str()) + .ok_or_else(|| AppError::Upstream("PAR response missing request_uri".into()))?; + + // Stash the flow state (signed, short-lived) for the callback. + let flow = FlowState { + state, + verifier: pkce.verifier, + token_endpoint: meta.token_endpoint.clone(), + issuer: meta.issuer.clone(), + public: false, + client_id: oc.metadata.client_id.clone(), + redirect_uri, + }; + let mut cookie = Cookie::new(FLOW_COOKIE, serde_json::to_string(&flow).unwrap()); + cookie.set_path("/"); + cookie.set_http_only(true); + cookie.set_max_age(tower_cookies::cookie::time::Duration::minutes(10)); + cookie.set_same_site(tower_cookies::cookie::SameSite::Lax); + cookies.signed(&st.key).add(cookie); + + Ok(Redirect::to(&authorize_url(&meta.authorization_endpoint, &oc.metadata.client_id, request_uri)).into_response()) +} + +/// Dev-only login against a fixed local PDS (`DU_OAUTH_DEV_PDS`) as a **public +/// (loopback) client** — PKCE + DPoP, no client assertion, no hosted client +/// metadata. Bypasses handle→DID→PDS resolution (which needs public DNS/HTTPS). +/// Used to exercise the full handshake against a local TLS-proxied PDS. +async fn login_dev( + State(st): State, + cookies: Cookies, + Query(q): Query, +) -> Result { + let oc = require(&st)?; + let pds = oc + .dev_pds + .clone() + .ok_or_else(|| AppError::NotFound("dev OAuth not enabled (set DU_OAUTH_DEV_PDS)".into()))?; + let redirect_uri = oc + .loopback_redirect + .clone() + .ok_or_else(|| AppError::Upstream("dev OAuth needs DU_OAUTH_LOOPBACK".into()))?; + let handle = q.handle.trim(); + + let meta = discover_auth_server(&oc.http, &pds).await?; + let par_endpoint = meta + .pushed_authorization_request_endpoint + .clone() + .ok_or_else(|| AppError::Upstream("authorization server has no PAR endpoint".into()))?; + + let pkce = Pkce::generate(); + let state = du_atproto::oauth::random_token(); + // atproto loopback client: client_id carries the redirect_uri + scope. + let client_id = format!("http://localhost?redirect_uri={}&scope={}", pct(&redirect_uri), pct("atproto")); + let form = par_form_public(&client_id, &redirect_uri, "atproto", &state, &pkce.challenge, Some(handle)); + + let par: serde_json::Value = post_with_dpop(oc, &par_endpoint, &form).await?; + let request_uri = par + .get("request_uri") + .and_then(|v| v.as_str()) + .ok_or_else(|| AppError::Upstream("PAR response missing request_uri".into()))?; + + let flow = FlowState { + state, + verifier: pkce.verifier, + token_endpoint: meta.token_endpoint.clone(), + issuer: meta.issuer.clone(), + public: true, + client_id: client_id.clone(), + redirect_uri, + }; + let mut cookie = Cookie::new(FLOW_COOKIE, serde_json::to_string(&flow).unwrap()); + cookie.set_path("/"); + cookie.set_http_only(true); + cookie.set_max_age(tower_cookies::cookie::time::Duration::minutes(10)); + cookie.set_same_site(tower_cookies::cookie::SameSite::Lax); + cookies.signed(&st.key).add(cookie); + + Ok(Redirect::to(&authorize_url(&meta.authorization_endpoint, &client_id, request_uri)).into_response()) +} + +#[derive(Deserialize)] +struct CallbackQuery { + code: Option, + state: Option, + error: Option, +} + +/// Handle the authorization redirect: exchange the code for tokens, resolve the +/// user, and establish a session. +async fn callback( + State(st): State, + cookies: Cookies, + Query(q): Query, +) -> Result { + let oc = require(&st)?; + if let Some(err) = q.error { + return Err(AppError::BadRequest(format!("authorization denied: {err}"))); + } + let code = q.code.ok_or_else(|| AppError::BadRequest("missing code".into()))?; + let returned_state = q.state.unwrap_or_default(); + + let signed = cookies.signed(&st.key); + let flow: FlowState = signed + .get(FLOW_COOKIE) + .and_then(|c| serde_json::from_str(c.value()).ok()) + .ok_or_else(|| AppError::BadRequest("no/expired oauth flow".into()))?; + if flow.state != returned_state { + return Err(AppError::BadRequest("state mismatch".into())); + } + // One-shot: clear the flow cookie. + let mut clear = Cookie::new(FLOW_COOKIE, ""); + clear.set_path("/"); + signed.remove(clear); + + // Public (loopback) flow: PKCE only. Confidential flow: private_key_jwt. + let form = if flow.public { + token_form_public(&flow.client_id, &flow.redirect_uri, &code, &flow.verifier) + } else { + let redirect_uri = oc.metadata.redirect_uris[0].clone(); + let assertion = client_assertion(&oc.ec_key, &oc.metadata.client_id, &flow.issuer, now()); + token_form(&oc.metadata.client_id, &redirect_uri, &code, &flow.verifier, &assertion) + }; + let tokens: serde_json::Value = post_with_dpop(oc, &flow.token_endpoint, &form).await?; + + let did = tokens + .get("sub") + .and_then(|v| v.as_str()) + .ok_or_else(|| AppError::Upstream("token response missing sub (DID)".into()))? + .to_string(); + + // Resolve the handle/display from the DID document (best-effort). + let handle = match Did::parse(&did) { + Ok(d) => oc.resolver.resolve_did(&d).await.ok().and_then(|doc| doc.handle()), + Err(_) => None, + }; + let display = handle.clone().unwrap_or_else(|| did.clone()); + + let user_id = + du_db::auth::upsert_user_by_did(&st.pool, &did, handle.as_deref(), Some(&display)).await?; + let (display_name, roles) = du_db::auth::session_info(&st.pool, user_id).await?; + + let session = Session { + user_id: user_id.0, + display_name: display_name.unwrap_or(display), + roles, + }; + let mut sc = Cookie::new(SESSION_COOKIE, serde_json::to_string(&session).unwrap()); + sc.set_path("/"); + sc.set_http_only(true); + sc.set_same_site(tower_cookies::cookie::SameSite::Lax); + cookies.signed(&st.key).add(sc); + + Ok(Redirect::to("/").into_response()) +} diff --git a/rust/crates/du-web/src/render.rs b/rust/crates/du-web/src/render.rs new file mode 100644 index 00000000..8104ea63 --- /dev/null +++ b/rust/crates/du-web/src/render.rs @@ -0,0 +1,20 @@ +//! Renders an Askama template into an HTML response. Used instead of the +//! `askama_axum` IntoResponse integration to keep the rendering path explicit +//! and version-independent. + +use axum::http::{header, StatusCode}; +use axum::response::{IntoResponse, Response}; + +pub fn html(t: &T) -> Response { + match t.render() { + Ok(body) => ( + [(header::CONTENT_TYPE, "text/html; charset=utf-8")], + body, + ) + .into_response(), + Err(e) => { + tracing::error!(error = %e, "template render failed"); + (StatusCode::INTERNAL_SERVER_ERROR, "template error").into_response() + } + } +} diff --git a/rust/crates/du-web/src/routes/auth_routes.rs b/rust/crates/du-web/src/routes/auth_routes.rs new file mode 100644 index 00000000..f3252a32 --- /dev/null +++ b/rust/crates/du-web/src/routes/auth_routes.rs @@ -0,0 +1,91 @@ +//! Local credential login/logout with a signed-cookie session. + +use crate::auth::{verify_password, Session, SESSION_COOKIE}; +use crate::error::AppError; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::State; +use axum::response::{IntoResponse, Redirect, Response}; +use axum::routing::{get, post}; +use axum::{Form, Router}; +use serde::Deserialize; +use tower_cookies::{Cookie, Cookies}; + +pub fn router() -> Router { + Router::new() + .route("/login", get(login_form).post(authenticate)) + .route("/logout", post(logout)) +} + +#[derive(askama::Template)] +#[template(path = "auth/login.html")] +struct LoginTemplate { + t: T, + next: String, + user: Option, + handle: String, + error: bool, +} + +#[derive(Deserialize)] +struct LoginForm { + handle: String, + password: String, +} + +async fn login_form(locale: Locale, user: crate::auth::MaybeUser) -> Response { + html(&LoginTemplate { t: locale.t, next: locale.next, user: user.nav(), handle: String::new(), error: false }) +} + +async fn authenticate( + State(st): State, + cookies: Cookies, + locale: Locale, + Form(form): Form, +) -> Result { + let credential = du_db::auth::find_credential(&st.pool, form.handle.trim()).await?; + let ok = match &credential { + Some(c) => c.password_hash.as_deref().is_some_and(|h| verify_password(&form.password, h)), + None => false, + }; + if !ok { + // Re-render with a generic error (don't reveal which field failed). + return Ok(html(&LoginTemplate { + t: locale.t, + next: locale.next, + user: None, + handle: form.handle, + error: true, + })); + } + + let user_id = credential.unwrap().user_id; + let (display_name, roles) = du_db::auth::session_info(&st.pool, user_id).await?; + let session = Session { + user_id: user_id.0, + display_name: display_name.unwrap_or_else(|| form.handle.clone()), + roles, + }; + let value = serde_json::to_string(&session).map_err(|e| AppError::BadRequest(e.to_string()))?; + + let mut cookie = Cookie::new(SESSION_COOKIE, value); + cookie.set_path("/"); + cookie.set_http_only(true); + cookie.set_same_site(tower_cookies::cookie::SameSite::Lax); + cookies.signed(&st.key).add(cookie); + + let dest = if session_is_curator(&session) { "/curator" } else { "/" }; + Ok(Redirect::to(dest).into_response()) +} + +fn session_is_curator(s: &Session) -> bool { + s.roles.iter().any(|r| r == "Admin" || r == "TreeCurator" || r == "Curator") +} + +async fn logout(State(st): State, cookies: Cookies) -> Response { + let mut cookie = Cookie::new(SESSION_COOKIE, ""); + cookie.set_path("/"); + cookies.signed(&st.key).remove(cookie); + Redirect::to("/login").into_response() +} diff --git a/rust/crates/du-web/src/routes/change_sets.rs b/rust/crates/du-web/src/routes/change_sets.rs new file mode 100644 index 00000000..3880d377 --- /dev/null +++ b/rust/crates/du-web/src/routes/change_sets.rs @@ -0,0 +1,356 @@ +//! Curator change-set / merge review UI. The two-panel HTMX screen over the +//! tree-versioning engine: a list of change sets (left) and a review panel +//! (right) showing the diff, per-change approve/reject, comments, and the +//! lifecycle actions (start review → approve → apply / discard). The JSON +//! management API lives in `versioning.rs`; this mirrors the proposals UI. + +use crate::auth::{Curator, NavUser}; +use crate::error::AppError; +use crate::htmx::HxHeaders; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Form, Router}; +use serde::Deserialize; + +const CHANGED: &str = "change-set-changed"; + +pub fn router() -> Router { + Router::new() + .route("/curator/change-sets", get(page)) + .route("/curator/change-sets/fragment", get(list)) + .route("/curator/change-sets/:id/panel", get(panel)) + .route("/curator/change-sets/:id/start-review", post(start_review)) + .route("/curator/change-sets/:id/approve-all", post(approve_all)) + .route("/curator/change-sets/:id/changes/:change_id/review", post(review_change)) + .route("/curator/change-sets/:id/apply", post(apply)) + .route("/curator/change-sets/:id/discard", post(discard)) + .route("/curator/change-sets/:id/comments", post(add_comment)) +} + +// ── list ────────────────────────────────────────────────────────────────────── + +struct Row { + id: i64, + source: String, + dna_type: String, + status: String, + change_count: i64, + created_by: String, + created_at: String, +} + +struct ListView { + status: String, + rows: Vec, + page: i64, + total: i64, + total_pages: i64, +} + +#[derive(Deserialize)] +struct ListQuery { + status: Option, + page: Option, +} + +fn fmt_dt(dt: chrono::DateTime) -> String { + dt.format("%Y-%m-%d %H:%M").to_string() +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + let status = q.status.as_deref().filter(|s| !s.is_empty()); + let result = du_db::change_set::list(&st.pool, None, status, q.page.unwrap_or(1), 20).await?; + let (page, total, total_pages) = (result.page, result.total, result.total_pages()); + let rows = result + .items + .into_iter() + .map(|s| Row { + id: s.id, + source: s.source, + dna_type: s.haplogroup_type.unwrap_or_else(|| "—".into()), + status: s.status, + change_count: s.change_count, + created_by: s.created_by.unwrap_or_else(|| "—".into()), + created_at: fmt_dt(s.created_at), + }) + .collect(); + Ok(ListView { status: q.status.clone().unwrap_or_default(), rows, page, total, total_pages }) +} + +#[derive(askama::Template)] +#[template(path = "curator/change-sets/page.html")] +struct PageTemplate { + t: T, + next: String, + user: Option, + list: ListView, +} +#[derive(askama::Template)] +#[template(path = "curator/change-sets/list.html")] +struct ListTemplate { + t: T, + list: ListView, +} + +async fn page( + Curator(s): Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&PageTemplate { + t: locale.t, + next: locale.next, + user: Some(NavUser { display_name: s.display_name, is_curator: true }), + list, + })) +} + +async fn list( + _c: Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&ListTemplate { t: locale.t, list })) +} + +// ── detail / review panel ─────────────────────────────────────────────────── + +struct ChangeRow { + id: i64, + change_type: String, + name: String, + status: String, + new_values: String, +} + +struct DiffRow { + diff_type: String, + name: String, + detail: String, +} + +struct CommentRow { + by: String, + comment: String, + at: String, +} + +struct DetailView { + id: i64, + source: String, + dna_type: String, + status: String, + description: String, + change_count: i64, + created_by: String, + created_at: String, + promoted_by: Option, + promoted_at: Option, + added: i64, + removed: i64, + modified: i64, + reparented: i64, + diff: Vec, + changes: Vec, + comments: Vec, + /// DRAFT/READY_FOR_REVIEW → can move to review. + can_start: bool, + /// UNDER_REVIEW → per-change approve/reject + approve-all are live. + can_review: bool, + /// READY_FOR_REVIEW/UNDER_REVIEW → applying APPROVED changes is allowed. + can_apply: bool, + /// Anything not yet terminal can be discarded. + can_discard: bool, + /// Optional banner after an action (e.g. apply summary, error). + notice: Option, +} + +#[derive(askama::Template)] +#[template(path = "curator/change-sets/detail.html")] +struct DetailTemplate { + t: T, + cs: DetailView, +} + +fn pretty(v: &serde_json::Value) -> String { + serde_json::to_string_pretty(v).unwrap_or_default() +} + +async fn build_detail(st: &AppState, id: i64, notice: Option) -> Result { + let d = du_db::change_set::get(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("change set {id}")))?; + let diff = du_db::change_set::diff(&st.pool, id).await?; + let status = d.summary.status.clone(); + let can_start = matches!(status.as_str(), "DRAFT" | "READY_FOR_REVIEW"); + let can_review = status == "UNDER_REVIEW"; + let can_apply = matches!(status.as_str(), "READY_FOR_REVIEW" | "UNDER_REVIEW"); + let can_discard = !matches!(status.as_str(), "APPLIED" | "DISCARDED"); + + Ok(DetailView { + id: d.summary.id, + source: d.summary.source, + dna_type: d.summary.haplogroup_type.unwrap_or_else(|| "—".into()), + status, + description: d.summary.description.unwrap_or_default(), + change_count: d.summary.change_count, + created_by: d.summary.created_by.unwrap_or_else(|| "—".into()), + created_at: fmt_dt(d.summary.created_at), + promoted_by: d.summary.promoted_by, + promoted_at: d.summary.promoted_at.map(fmt_dt), + added: diff.summary.added, + removed: diff.summary.removed, + modified: diff.summary.modified, + reparented: diff.summary.reparented, + diff: diff + .entries + .into_iter() + .map(|e| DiffRow { diff_type: e.diff_type, name: e.name, detail: pretty(&e.detail) }) + .collect(), + changes: d + .changes + .into_iter() + .map(|c| ChangeRow { + id: c.id, + change_type: c.change_type, + name: c.haplogroup_name.unwrap_or_else(|| "—".into()), + status: c.status, + new_values: c.new_values.as_ref().map(pretty).unwrap_or_default(), + }) + .collect(), + comments: d + .comments + .into_iter() + .map(|c| CommentRow { by: c.commented_by, comment: c.comment, at: fmt_dt(c.created_at) }) + .collect(), + can_start, + can_review, + can_apply, + can_discard, + notice, + }) +} + +async fn detail_response(st: &AppState, t: T, id: i64, notice: Option) -> Result { + let cs = build_detail(st, id, notice).await?; + Ok(html(&DetailTemplate { t, cs })) +} + +/// Render the panel and fire the list-refresh trigger (used after every action). +async fn changed_response(st: &AppState, t: T, id: i64, notice: Option) -> Result { + let body = detail_response(st, t, id, notice).await?; + Ok((HxHeaders::new().trigger(CHANGED), body).into_response()) +} + +async fn panel( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + detail_response(&st, locale.t, id, None).await +} + +// ── lifecycle actions ───────────────────────────────────────────────────────── + +async fn start_review( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let ok = du_db::change_set::start_review(&st.pool, id).await?; + let notice = (!ok).then(|| locale.t.get("cs.notice.no_transition").to_string()); + changed_response(&st, locale.t, id, notice).await +} + +async fn approve_all( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let n = du_db::change_set::approve_all(&st.pool, id).await?; + let notice = Some(format!("{} {}", n, locale.t.get("cs.notice.approved"))); + changed_response(&st, locale.t, id, notice).await +} + +#[derive(Deserialize)] +struct ReviewForm { + action: String, +} + +async fn review_change( + _c: Curator, + State(st): State, + locale: Locale, + Path((id, change_id)): Path<(i64, i64)>, + Form(f): Form, +) -> Result { + let approve = f.action == "APPROVE"; + du_db::change_set::review_change(&st.pool, change_id, approve).await?; + changed_response(&st, locale.t, id, None).await +} + +async fn apply( + cur: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + match du_db::change_set::apply(&st.pool, id, &cur.0.display_name).await { + Ok(r) => { + let notice = Some(format!( + "{}: +{} ~{} -{} ⤳{} ✎{} (skip {})", + locale.t.get("cs.notice.applied"), + r.created, + r.updated, + r.deleted, + r.reparented, + r.variant_edits, + r.skipped, + )); + changed_response(&st, locale.t, id, notice).await + } + Err(du_db::DbError::Conflict(msg)) => changed_response(&st, locale.t, id, Some(msg)).await, + Err(e) => Err(e.into()), + } +} + +async fn discard( + cur: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let ok = du_db::change_set::discard(&st.pool, id, &cur.0.display_name).await?; + let notice = (!ok).then(|| locale.t.get("cs.notice.no_transition").to_string()); + changed_response(&st, locale.t, id, notice).await +} + +#[derive(Deserialize)] +struct CommentForm { + comment: String, +} + +async fn add_comment( + cur: Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + if !f.comment.trim().is_empty() { + du_db::change_set::add_comment(&st.pool, id, &cur.0.display_name, f.comment.trim()).await?; + } + // No list-state change → just re-render the panel (keeps the comment thread fresh). + detail_response(&st, locale.t, id, None).await +} diff --git a/rust/crates/du-web/src/routes/coverage.rs b/rust/crates/du-web/src/routes/coverage.rs new file mode 100644 index 00000000..3278df30 --- /dev/null +++ b/rust/crates/du-web/src/routes/coverage.rs @@ -0,0 +1,137 @@ +//! Public coverage benchmarks: observed sequencing coverage by lab and test +//! type, aggregated from the alignment-metadata coverage JSONB. + +use crate::error::AppError; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Query, State}; +use axum::response::Response; +use axum::routing::get; +use axum::Router; +use serde::Deserialize; + +pub fn router() -> Router { + Router::new() + .route("/coverage-benchmarks", get(benchmarks)) + .route("/coverage/labs", get(labs_page)) + .route("/coverage/labs/fragment", get(labs_fragment)) +} + +struct BenchRow { + lab: String, + test_type: String, + libraries: i64, + mean_depth: String, + cov_10x: String, + expected: String, + /// Observed mean depth meets the test type's expected minimum. + meets: bool, +} + +#[derive(askama::Template)] +#[template(path = "coverage/benchmarks.html")] +struct CoverageTemplate { + t: T, + next: String, + user: Option, + rows: Vec, +} + +fn fmt_depth(v: Option) -> String { + v.map(|d| format!("{d:.1}×")).unwrap_or_else(|| "—".into()) +} +fn fmt_pct(v: Option) -> String { + v.map(|d| format!("{d:.1}%")).unwrap_or_else(|| "—".into()) +} + +fn to_bench_row(b: du_domain::coverage::CoverageBenchmark) -> BenchRow { + BenchRow { + lab: b.lab.unwrap_or_else(|| "—".into()), + test_type: b.test_type.unwrap_or_else(|| "—".into()), + libraries: b.library_count, + mean_depth: fmt_depth(b.avg_mean_depth), + cov_10x: fmt_pct(b.avg_cov_10x), + expected: b.expected_min_depth.map(|d| format!("{d:.0}×")).unwrap_or_else(|| "—".into()), + meets: matches!( + (b.avg_mean_depth, b.expected_min_depth), + (Some(obs), Some(exp)) if obs >= exp + ), + } +} + +async fn benchmarks( + State(st): State, + locale: Locale, + user: crate::auth::MaybeUser, +) -> Result { + let rows = du_db::coverage::benchmarks(&st.pool).await?.into_iter().map(to_bench_row).collect(); + Ok(html(&CoverageTemplate { t: locale.t, next: locale.next, user: user.nav(), rows })) +} + +// ── per-lab drill-down ───────────────────────────────────────────────────────── + +struct LabRow { + lab: String, + libraries: i64, + test_types: usize, +} + +#[derive(askama::Template)] +#[template(path = "coverage/labs.html")] +struct LabsTemplate { + t: T, + next: String, + user: Option, + labs: Vec, +} + +#[derive(askama::Template)] +#[template(path = "coverage/lab_rows.html")] +struct LabRowsTemplate { + t: T, + lab: String, + rows: Vec, +} + +#[derive(Deserialize)] +struct LabQuery { + lab: Option, +} + +/// Collapse the per-(lab, test-type) benchmarks into one row per lab. +fn lab_summaries(benches: &[du_domain::coverage::CoverageBenchmark]) -> Vec { + use std::collections::BTreeMap; + let mut by_lab: BTreeMap = BTreeMap::new(); + for b in benches { + let lab = b.lab.clone().unwrap_or_else(|| "—".into()); + let e = by_lab.entry(lab).or_default(); + e.0 += b.library_count; + e.1 += 1; + } + by_lab.into_iter().map(|(lab, (libraries, test_types))| LabRow { lab, libraries, test_types }).collect() +} + +async fn labs_page( + State(st): State, + locale: Locale, + user: crate::auth::MaybeUser, +) -> Result { + let labs = lab_summaries(&du_db::coverage::benchmarks(&st.pool).await?); + Ok(html(&LabsTemplate { t: locale.t, next: locale.next, user: user.nav(), labs })) +} + +async fn labs_fragment( + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let lab = q.lab.unwrap_or_default(); + let rows = du_db::coverage::benchmarks(&st.pool) + .await? + .into_iter() + .filter(|b| b.lab.clone().unwrap_or_else(|| "—".into()) == lab) + .map(to_bench_row) + .collect(); + Ok(html(&LabRowsTemplate { t: locale.t, lab, rows })) +} diff --git a/rust/crates/du-web/src/routes/curation.rs b/rust/crates/du-web/src/routes/curation.rs new file mode 100644 index 00000000..907d9b25 --- /dev/null +++ b/rust/crates/du-web/src/routes/curation.rs @@ -0,0 +1,280 @@ +//! Curation proposals. Navigator submits variant/branch proposals to a +//! machine-authenticated intake endpoint; curators review/promote them in the +//! web UI. (Intake auth is an X-API-Key for now; it becomes the OAuth bearer +//! once the handshake with Edge is live.) + +use crate::auth::{Curator, NavUser}; +use crate::error::AppError; +use crate::htmx::HxHeaders; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::http::{HeaderMap, StatusCode}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Form, Json, Router}; +use du_db::proposal::SubmitProposal; +use du_domain::enums::DnaType; +use serde::Deserialize; +use serde_json::Value; +use uuid::Uuid; + +const CHANGED: &str = "proposal-changed"; + +pub fn router() -> Router { + Router::new() + .route("/manage/curation/proposals", post(intake)) + .route("/curator/proposals", get(page)) + .route("/curator/proposals/fragment", get(list)) + .route("/curator/proposals/:id/panel", get(panel)) + .route("/curator/proposals/:id/review", post(review)) + .route("/curator/proposals/:id/promote", post(promote)) +} + +// ── intake (Navigator → AppView) ───────────────────────────────────────────── +#[derive(Deserialize)] +struct ProposalIn { + proposed_name: String, + parent_haplogroup: Option, + dna_type: Option, + sample_guid: Option, + proposed_by: Option, + evidence: Option, +} + +fn parse_dna(s: Option<&str>) -> DnaType { + match s { + Some("MT_DNA") => DnaType::MtDna, + _ => DnaType::YDna, + } +} + +/// Require the curation API key (X-API-Key == DU_CURATION_API_KEY). +fn check_api_key(headers: &HeaderMap) -> Result<(), AppError> { + match std::env::var("DU_CURATION_API_KEY").ok().filter(|s| !s.is_empty()) { + None => Err(AppError::Upstream("curation intake not configured".into())), + Some(expected) => { + let provided = headers.get("x-api-key").and_then(|v| v.to_str().ok()).unwrap_or(""); + if provided == expected { + Ok(()) + } else { + Err(AppError::Forbidden) + } + } + } +} + +async fn intake( + State(st): State, + headers: HeaderMap, + Json(body): Json, +) -> Result { + check_api_key(&headers)?; + if body.proposed_name.trim().is_empty() { + return Err(AppError::BadRequest("proposed_name is required".into())); + } + let submit = SubmitProposal { + proposed_name: body.proposed_name.trim().to_string(), + parent_haplogroup: body.parent_haplogroup, + dna_type: parse_dna(body.dna_type.as_deref()), + sample_guid: body.sample_guid, + proposed_by: body.proposed_by, + evidence: body.evidence.unwrap_or_else(|| serde_json::json!({})), + }; + let (id, created) = du_db::proposal::submit(&st.pool, &submit).await?; + Ok(( + StatusCode::CREATED, + Json(serde_json::json!({ "id": id, "pooled": !created })), + ) + .into_response()) +} + +// ── curator review queue ───────────────────────────────────────────────────── +struct Row { + id: i64, + name: String, + parent: String, + dna: String, + status: String, + evidence_count: i32, + submitter_count: i32, + confidence: String, +} + +/// A defining variant of a proposal (name + cross-submitter support). +struct VarRow { + name: String, + support: i32, +} + +struct ListView { + status: String, + rows: Vec, + page: i64, + total: i64, + total_pages: i64, +} + +#[derive(Deserialize)] +struct ListQuery { + status: Option, + page: Option, +} + +fn to_row(s: du_db::proposal::ProposalSummary) -> Row { + Row { + id: s.id, + name: s.proposed_name.filter(|n| !n.is_empty()).unwrap_or_else(|| "(unnamed)".into()), + parent: s.parent_name.unwrap_or_else(|| "—".into()), + dna: s.dna_type.unwrap_or_default(), + status: s.status, + evidence_count: s.evidence_count, + submitter_count: s.submitter_count, + confidence: s.confidence.map(|c| format!("{c:.2}")).unwrap_or_else(|| "—".into()), + } +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + let status = q.status.as_deref().filter(|s| !s.is_empty()); + let filter = du_db::proposal::ProposalFilter { status, ..Default::default() }; + let result = du_db::proposal::list(&st.pool, &filter, q.page.unwrap_or(1), 20).await?; + let (page, total, total_pages) = (result.page, result.total, result.total_pages()); + Ok(ListView { + status: q.status.clone().unwrap_or_default(), + rows: result.items.into_iter().map(to_row).collect(), + page, + total, + total_pages, + }) +} + +#[derive(askama::Template)] +#[template(path = "curator/proposals/page.html")] +struct PageTemplate { + t: T, + next: String, + user: Option, + list: ListView, +} +#[derive(askama::Template)] +#[template(path = "curator/proposals/list.html")] +struct ListTemplate { + t: T, + list: ListView, +} + +async fn page( + Curator(s): Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&PageTemplate { + t: locale.t, + next: locale.next, + user: Some(NavUser { display_name: s.display_name, is_curator: true }), + list, + })) +} + +async fn list( + _c: Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&ListTemplate { t: locale.t, list })) +} + +#[derive(askama::Template)] +#[template(path = "curator/proposals/detail.html")] +struct DetailTemplate { + t: T, + row: Row, + variants: Vec, + evidence: Vec, + /// True while still open to a decision (PROPOSED / UNDER_REVIEW / + /// READY_FOR_REVIEW / SPLIT_CANDIDATE — the engine's actionable states). + open: bool, + /// True when ACCEPTED and not yet promoted (show the Promote button). + accepted: bool, + /// The engine flagged a diverging submitter — show the split banner. + split: bool, +} + +async fn detail_view(st: &AppState, t: T, id: i64) -> Result { + let d = du_db::proposal::get(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("proposal {id}")))?; + let open = matches!( + d.summary.status.as_str(), + "PROPOSED" | "UNDER_REVIEW" | "READY_FOR_REVIEW" | "SPLIT_CANDIDATE" + ); + let accepted = d.summary.status == "ACCEPTED"; + let split = d.summary.status == "SPLIT_CANDIDATE"; + let row = to_row(d.summary); + let variants = d + .variants + .into_iter() + .map(|v| VarRow { name: v.name.unwrap_or_else(|| "(unnamed)".into()), support: v.supporting_sample_count }) + .collect(); + let evidence = d + .evidence + .iter() + .map(|v| serde_json::to_string_pretty(v).unwrap_or_default()) + .collect(); + Ok(html(&DetailTemplate { t, row, variants, evidence, open, accepted, split })) +} + +async fn panel( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + detail_view(&st, locale.t, id).await +} + +#[derive(Deserialize)] +struct ReviewForm { + action: String, + notes: Option, +} + +async fn review( + Curator(s): Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + let notes = f.notes.as_deref().filter(|n| !n.trim().is_empty()); + let ok = du_db::proposal::review(&st.pool, id, &f.action, &s.display_name, notes).await?; + if !ok { + return Err(AppError::NotFound(format!("proposal {id}"))); + } + let body = detail_view(&st, locale.t, id).await?; + Ok((HxHeaders::new().trigger(CHANGED), body).into_response()) +} + +/// Promote an accepted proposal into the named catalog (new haplogroup branch + +/// relationship + variant links). Conflicts (wrong status, name taken, no parent) +/// surface as a 422 message. +async fn promote( + Curator(s): Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + match du_db::proposal::promote(&st.pool, id, &s.display_name).await { + Ok(_) => { + let body = detail_view(&st, locale.t, id).await?; + Ok((HxHeaders::new().trigger(CHANGED), body).into_response()) + } + Err(du_db::DbError::Conflict(msg)) => Err(AppError::BadRequest(msg)), + Err(e) => Err(e.into()), + } +} diff --git a/rust/crates/du-web/src/routes/curator.rs b/rust/crates/du-web/src/routes/curator.rs new file mode 100644 index 00000000..99ebb7d4 --- /dev/null +++ b/rust/crates/du-web/src/routes/curator.rs @@ -0,0 +1,460 @@ +//! Curator tools (role-gated). Demonstrates the HTMX two-panel write-flow: a +//! searchable list on the left and a detail/form panel on the right; mutations +//! return the updated panel plus an `HX-Trigger` that makes the list reload. + +use crate::auth::{Curator, NavUser}; +use crate::error::AppError; +use crate::htmx::HxHeaders; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Form, Router}; +use du_domain::enums::DnaType; +use du_domain::ids::HaplogroupId; +use serde::Deserialize; + +/// Event other elements listen for to refresh after a mutation. +const CHANGED: &str = "hg-changed"; + +pub fn router() -> Router { + Router::new() + .route("/curator", get(dashboard)) + .route("/curator/haplogroups", get(hg_page)) + .route("/curator/haplogroups/fragment", get(hg_list)) + .route("/curator/haplogroups/new", get(hg_new)) + .route("/curator/haplogroups", post(hg_create)) + .route("/curator/haplogroups/:id/panel", get(hg_panel)) + .route("/curator/haplogroups/:id/edit", get(hg_edit)) + .route("/curator/haplogroups/:id", post(hg_update)) + .route("/curator/haplogroups/:id", axum::routing::delete(hg_delete)) + .route("/curator/haplogroups/:id/reparent", post(hg_reparent)) + .route("/curator/haplogroups/:id/merge", post(hg_merge)) + .route("/curator/haplogroups/:id/split", post(hg_split)) +} + +// ── dashboard ──────────────────────────────────────────────────────────────── +#[derive(askama::Template)] +#[template(path = "curator/dashboard.html")] +struct DashTemplate { + t: T, + next: String, + user: Option, + display_name: String, + roles: String, +} + +async fn dashboard(Curator(s): Curator, locale: Locale) -> Response { + html(&DashTemplate { + t: locale.t, + next: locale.next, + user: Some(NavUser { display_name: s.display_name.clone(), is_curator: true }), + display_name: s.display_name, + roles: s.roles.join(", "), + }) +} + +// ── haplogroup list ────────────────────────────────────────────────────────── +#[derive(Deserialize)] +struct ListQuery { + query: Option, + dna: Option, + page: Option, +} + +fn parse_dna(s: Option<&str>) -> Option { + match s { + Some("Y_DNA") => Some(DnaType::YDna), + Some("MT_DNA") => Some(DnaType::MtDna), + _ => None, + } +} + +struct HgRow { + id: i64, + name: String, + dna: String, + lineage: String, +} + +struct HgListView { + query: String, + dna: String, + rows: Vec, + page: i64, + total: i64, + total_pages: i64, +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + let dna = parse_dna(q.dna.as_deref()); + let result = + du_db::haplogroup::list_paginated(&st.pool, q.query.as_deref(), dna, q.page.unwrap_or(1), 20) + .await?; + let rows = result + .items + .iter() + .map(|h| HgRow { + id: h.id.0, + name: h.name.clone(), + dna: h.haplogroup_type.label().to_string(), + lineage: h.lineage.clone().unwrap_or_default(), + }) + .collect(); + Ok(HgListView { + query: q.query.clone().unwrap_or_default(), + dna: q.dna.clone().unwrap_or_default(), + rows, + page: result.page, + total: result.total, + total_pages: result.total_pages(), + }) +} + +#[derive(askama::Template)] +#[template(path = "curator/haplogroups/page.html")] +struct HgPageTemplate { + t: T, + next: String, + user: Option, + list: HgListView, +} + +#[derive(askama::Template)] +#[template(path = "curator/haplogroups/list.html")] +struct HgListTemplate { + t: T, + list: HgListView, +} + +async fn hg_page( + Curator(s): Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&HgPageTemplate { + t: locale.t, + next: locale.next, + user: Some(NavUser { display_name: s.display_name, is_curator: true }), + list, + })) +} + +async fn hg_list( + _c: Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&HgListTemplate { t: locale.t, list })) +} + +// ── detail panel ───────────────────────────────────────────────────────────── +struct HgDetailView { + id: i64, + name: String, + dna: String, + lineage: String, + source: String, + formed_ybp: String, + tmrca_ybp: String, + /// Current parent name (None at a root) — context for reparent/merge. + parent_name: Option, + /// Current defining-variant names — reference for the split picker. + variants: Vec, +} + +#[derive(askama::Template)] +#[template(path = "curator/haplogroups/detail.html")] +struct HgDetailTemplate { + t: T, + hg: HgDetailView, + can_delete: bool, + error: Option, +} + +async fn detail_view(st: &AppState, id: HaplogroupId) -> Result { + let h = du_db::haplogroup::get_by_id(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("haplogroup {}", id.0)))?; + let parent_name = du_db::haplogroup::current_parent(&st.pool, id).await?.map(|(_, n)| n); + let variants = du_db::haplogroup::current_variant_links(&st.pool, id) + .await? + .into_iter() + .map(|(_, n)| n) + .collect(); + Ok(HgDetailView { + id: h.id.0, + name: h.name, + dna: h.haplogroup_type.label().to_string(), + lineage: h.lineage.unwrap_or_default(), + source: h.source.unwrap_or_default(), + formed_ybp: h.formed_ybp.map(|v| v.to_string()).unwrap_or_default(), + tmrca_ybp: h.tmrca_ybp.map(|v| v.to_string()).unwrap_or_default(), + parent_name, + variants, + }) +} + +async fn render_detail( + st: &AppState, + t: T, + id: HaplogroupId, + error: Option, +) -> Result { + let hg = detail_view(st, id).await?; + let can_delete = !du_db::haplogroup::has_current_edges(&st.pool, id).await?; + Ok(html(&HgDetailTemplate { t, hg, can_delete, error })) +} + +async fn hg_panel( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + render_detail(&st, locale.t, HaplogroupId(id), None).await +} + +// ── create / edit forms ────────────────────────────────────────────────────── +#[derive(askama::Template)] +#[template(path = "curator/haplogroups/form.html")] +struct HgFormTemplate { + t: T, + action: String, + is_edit: bool, + id: i64, + name: String, + dna: String, + lineage: String, + source: String, + formed_ybp: String, + tmrca_ybp: String, +} + +async fn hg_new(_c: Curator, locale: Locale) -> Response { + html(&HgFormTemplate { + t: locale.t, + action: "/curator/haplogroups".into(), + is_edit: false, + id: 0, + name: String::new(), + dna: "Y_DNA".into(), + lineage: String::new(), + source: String::new(), + formed_ybp: String::new(), + tmrca_ybp: String::new(), + }) +} + +async fn hg_edit( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let d = detail_view(&st, HaplogroupId(id)).await?; + Ok(html(&HgFormTemplate { + t: locale.t, + action: format!("/curator/haplogroups/{id}"), + is_edit: true, + id, + name: d.name, + dna: d.dna, + lineage: d.lineage, + source: d.source, + formed_ybp: d.formed_ybp, + tmrca_ybp: d.tmrca_ybp, + })) +} + +#[derive(Deserialize)] +struct HgForm { + name: String, + dna: Option, + lineage: Option, + source: Option, + formed_ybp: Option, + tmrca_ybp: Option, +} + +fn opt(s: Option) -> Option { + s.map(|v| v.trim().to_string()).filter(|v| !v.is_empty()) +} +fn opt_i32(s: Option) -> Option { + s.and_then(|v| v.trim().parse().ok()) +} + +/// On a successful mutation, return the saved detail panel and trigger the list +/// to reload (server-driven via HX-Trigger). +fn changed(body: Response) -> Response { + (HxHeaders::new().trigger(CHANGED), body).into_response() +} + +async fn hg_create( + _c: Curator, + State(st): State, + locale: Locale, + Form(f): Form, +) -> Result { + let name = f.name.trim(); + if name.is_empty() { + return Err(AppError::BadRequest("name is required".into())); + } + let dna = parse_dna(f.dna.as_deref()).unwrap_or(DnaType::YDna); + let id = du_db::haplogroup::create( + &st.pool, + name, + dna, + opt(f.lineage).as_deref(), + opt(f.source).as_deref(), + opt_i32(f.formed_ybp), + opt_i32(f.tmrca_ybp), + ) + .await?; + Ok(changed(render_detail(&st, locale.t, id, None).await?)) +} + +async fn hg_update( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + let name = f.name.trim(); + if name.is_empty() { + return Err(AppError::BadRequest("name is required".into())); + } + du_db::haplogroup::update( + &st.pool, + HaplogroupId(id), + name, + opt(f.lineage).as_deref(), + opt(f.source).as_deref(), + opt_i32(f.formed_ybp), + opt_i32(f.tmrca_ybp), + ) + .await?; + Ok(changed(render_detail(&st, locale.t, HaplogroupId(id), None).await?)) +} + +async fn hg_delete( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let hid = HaplogroupId(id); + if du_db::haplogroup::has_current_edges(&st.pool, hid).await? { + // Blocked: re-render the detail with an inline error, no reload. + let msg = locale.t.get("hg.deleteBlocked").to_string(); + return render_detail(&st, locale.t, hid, Some(msg)).await; + } + du_db::haplogroup::delete(&st.pool, hid).await?; + + #[derive(askama::Template)] + #[template(path = "curator/haplogroups/empty.html")] + struct Empty { + t: T, + } + Ok(changed(html(&Empty { t: locale.t }))) +} + +// ── structural ops (reparent / merge into parent / split) ────────────────────── + +/// Re-render the detail with a conflict message as an inline error (no reload). +async fn op_error(st: &AppState, t: T, id: HaplogroupId, msg: String) -> Result { + render_detail(st, t, id, Some(msg)).await +} + +#[derive(Deserialize)] +struct ReparentForm { + parent: String, +} + +async fn hg_reparent( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + let hid = HaplogroupId(id); + let h = du_db::haplogroup::get_by_id(&st.pool, hid) + .await? + .ok_or_else(|| AppError::NotFound(format!("haplogroup {id}")))?; + let parent = f.parent.trim(); + let Some(p) = du_db::haplogroup::get_by_name(&st.pool, parent, h.haplogroup_type).await? else { + return op_error(&st, locale.t, hid, format!("{}: {parent}", locale.t.get("hg.op.unknown"))).await; + }; + match du_db::haplogroup::reparent(&st.pool, hid, p.id).await { + Ok(()) => Ok(changed(render_detail(&st, locale.t, hid, None).await?)), + Err(du_db::DbError::Conflict(m)) => op_error(&st, locale.t, hid, m).await, + Err(e) => Err(e.into()), + } +} + +async fn hg_merge( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let hid = HaplogroupId(id); + match du_db::haplogroup::merge_into_parent(&st.pool, hid).await { + // Node is gone — show the empty panel and reload the list. + Ok(()) => { + #[derive(askama::Template)] + #[template(path = "curator/haplogroups/empty.html")] + struct Empty { + t: T, + } + Ok(changed(html(&Empty { t: locale.t }))) + } + Err(du_db::DbError::Conflict(m)) => op_error(&st, locale.t, hid, m).await, + Err(e) => Err(e.into()), + } +} + +#[derive(Deserialize)] +struct SplitForm { + name: String, + /// Comma-separated variant names to move to the new child. + variants: String, +} + +async fn hg_split( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + let hid = HaplogroupId(id); + let h = du_db::haplogroup::get_by_id(&st.pool, hid) + .await? + .ok_or_else(|| AppError::NotFound(format!("haplogroup {id}")))?; + // Resolve the entered names against the node's current variant links. + let links = du_db::haplogroup::current_variant_links(&st.pool, hid).await?; + let want: Vec<&str> = f.variants.split(',').map(str::trim).filter(|s| !s.is_empty()).collect(); + let ids: Vec = links + .iter() + .filter(|(_, name)| want.iter().any(|w| w.eq_ignore_ascii_case(name))) + .map(|(id, _)| *id) + .collect(); + if ids.is_empty() { + return op_error(&st, locale.t, hid, locale.t.get("hg.op.no_variants").to_string()).await; + } + let source = h.source.as_deref().unwrap_or("curator"); + match du_db::haplogroup::split(&st.pool, hid, f.name.trim(), &ids, h.haplogroup_type, Some(source)).await { + Ok(_) => Ok(changed(render_detail(&st, locale.t, hid, None).await?)), + Err(du_db::DbError::Conflict(m)) => op_error(&st, locale.t, hid, m).await, + Err(e) => Err(e.into()), + } +} diff --git a/rust/crates/du-web/src/routes/curator_regions.rs b/rust/crates/du-web/src/routes/curator_regions.rs new file mode 100644 index 00000000..70550b55 --- /dev/null +++ b/rust/crates/du-web/src/routes/curator_regions.rs @@ -0,0 +1,301 @@ +//! Curator genome-region CRUD. The multi-build `coordinates` and `properties` +//! are JSONB documents, edited here as JSON textareas (parse-validated on save, +//! re-rendering the form with an error on invalid JSON). + +use crate::auth::Curator; +use crate::error::AppError; +use crate::htmx::HxHeaders; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Form, Router}; +use serde::Deserialize; + +const CHANGED: &str = "region-changed"; + +pub fn router() -> Router { + Router::new() + .route("/curator/regions", get(page)) + .route("/curator/regions/fragment", get(list)) + .route("/curator/regions/new", get(new_form)) + .route("/curator/regions", post(create)) + .route("/curator/regions/:id/panel", get(panel)) + .route("/curator/regions/:id/edit", get(edit_form)) + .route("/curator/regions/:id", post(update)) + .route("/curator/regions/:id", axum::routing::delete(remove)) +} + +#[derive(Deserialize)] +struct ListQuery { + query: Option, + page: Option, +} + +struct Row { + id: i64, + region_type: String, + name: String, + builds: String, +} +struct ListView { + query: String, + rows: Vec, + page: i64, + total: i64, + total_pages: i64, +} + +/// Top-level keys of a JSONB object, comma-joined (the build labels). +fn keys_of(v: &serde_json::Value) -> String { + v.as_object() + .map(|o| o.keys().cloned().collect::>().join(", ")) + .unwrap_or_default() +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + let result = + du_db::genome_region::list_paginated(&st.pool, q.query.as_deref(), None, q.page.unwrap_or(1), 20).await?; + Ok(ListView { + query: q.query.clone().unwrap_or_default(), + rows: result + .items + .iter() + .map(|r| Row { + id: r.id, + region_type: r.region_type.clone(), + name: r.name.clone(), + builds: keys_of(&r.coordinates), + }) + .collect(), + page: result.page, + total: result.total, + total_pages: result.total_pages(), + }) +} + +#[derive(askama::Template)] +#[template(path = "curator/regions/page.html")] +struct PageTemplate { + t: T, + next: String, + user: Option, + list: ListView, +} +#[derive(askama::Template)] +#[template(path = "curator/regions/list.html")] +struct ListTemplate { + t: T, + list: ListView, +} + +async fn page( + Curator(s): Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&PageTemplate { + t: locale.t, + next: locale.next, + user: Some(crate::auth::NavUser { display_name: s.display_name, is_curator: true }), + list, + })) +} + +async fn list( + _c: Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&ListTemplate { t: locale.t, list })) +} + +#[derive(askama::Template)] +#[template(path = "curator/regions/detail.html")] +struct DetailTemplate { + t: T, + id: i64, + region_type: String, + name: String, + coordinates: String, + properties: String, +} + +fn pretty(v: &serde_json::Value) -> String { + serde_json::to_string_pretty(v).unwrap_or_else(|_| "{}".into()) +} + +async fn detail(st: &AppState, t: T, id: i64) -> Result { + let r = du_db::genome_region::get_by_id(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("region {id}")))?; + Ok(html(&DetailTemplate { + t, + id: r.id, + region_type: r.region_type, + name: r.name, + coordinates: pretty(&r.coordinates), + properties: pretty(&r.properties), + })) +} + +async fn panel( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + detail(&st, locale.t, id).await +} + +#[derive(askama::Template)] +#[template(path = "curator/regions/form.html")] +struct FormTemplate { + t: T, + action: String, + is_edit: bool, + id: i64, + region_type: String, + name: String, + coordinates: String, + properties: String, + error: Option, +} + +async fn new_form(_c: Curator, locale: Locale) -> Response { + html(&FormTemplate { + t: locale.t, + action: "/curator/regions".into(), + is_edit: false, + id: 0, + region_type: String::new(), + name: String::new(), + coordinates: "{\n \"GRCh38\": { \"contig\": \"chr1\", \"start\": 0, \"end\": 0 }\n}".into(), + properties: "{}".into(), + error: None, + }) +} + +async fn edit_form( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let r = du_db::genome_region::get_by_id(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("region {id}")))?; + Ok(html(&FormTemplate { + t: locale.t, + action: format!("/curator/regions/{id}"), + is_edit: true, + id, + region_type: r.region_type, + name: r.name, + coordinates: pretty(&r.coordinates), + properties: pretty(&r.properties), + error: None, + })) +} + +#[derive(Deserialize)] +struct RegionForm { + region_type: String, + name: String, + coordinates: String, + properties: String, +} + +fn changed(body: Response) -> Response { + (HxHeaders::new().trigger(CHANGED), body).into_response() +} + +/// Parse the two JSON fields; on failure re-render the form with an error. +fn parse_json( + t: T, + action: String, + is_edit: bool, + id: i64, + f: &RegionForm, +) -> Result<(serde_json::Value, serde_json::Value), Box> { + let coords = serde_json::from_str::(&f.coordinates); + let props = serde_json::from_str::(&f.properties); + match (coords, props) { + (Ok(c), Ok(p)) => Ok((c, p)), + (c, p) => { + let mut msg = String::new(); + if let Err(e) = &c { + msg = format!("coordinates: {e}"); + } else if let Err(e) = &p { + msg = format!("properties: {e}"); + } + Err(Box::new(html(&FormTemplate { + t, + action, + is_edit, + id, + region_type: f.region_type.clone(), + name: f.name.clone(), + coordinates: f.coordinates.clone(), + properties: f.properties.clone(), + error: Some(msg), + }))) + } + } +} + +async fn create( + _c: Curator, + State(st): State, + locale: Locale, + Form(f): Form, +) -> Result { + if f.name.trim().is_empty() || f.region_type.trim().is_empty() { + return Err(AppError::BadRequest("region_type and name are required".into())); + } + let (coords, props) = match parse_json(locale.t, "/curator/regions".into(), false, 0, &f) { + Ok(v) => v, + Err(resp) => return Ok(*resp), + }; + let id = du_db::genome_region::create(&st.pool, f.region_type.trim(), f.name.trim(), &coords, &props).await?; + Ok(changed(detail(&st, locale.t, id).await?)) +} + +async fn update( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + if f.name.trim().is_empty() || f.region_type.trim().is_empty() { + return Err(AppError::BadRequest("region_type and name are required".into())); + } + let (coords, props) = match parse_json(locale.t, format!("/curator/regions/{id}"), true, id, &f) { + Ok(v) => v, + Err(resp) => return Ok(*resp), + }; + du_db::genome_region::update(&st.pool, id, f.region_type.trim(), f.name.trim(), &coords, &props).await?; + Ok(changed(detail(&st, locale.t, id).await?)) +} + +async fn remove( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + du_db::genome_region::delete(&st.pool, id).await?; + #[derive(askama::Template)] + #[template(path = "curator/regions/empty.html")] + struct Empty { + t: T, + } + Ok(changed(html(&Empty { t: locale.t }))) +} diff --git a/rust/crates/du-web/src/routes/curator_variants.rs b/rust/crates/du-web/src/routes/curator_variants.rs new file mode 100644 index 00000000..aadbfb92 --- /dev/null +++ b/rust/crates/du-web/src/routes/curator_variants.rs @@ -0,0 +1,322 @@ +//! Curator variant CRUD. Same HTMX two-panel write-flow as haplogroups; edits +//! the scalar fields + alias lists (common_names / rs_ids). Coordinate editing +//! is out of scope for this panel and is preserved across updates. + +use crate::auth::Curator; +use crate::error::AppError; +use crate::htmx::HxHeaders; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Form, Router}; +use du_domain::enums::{MutationType, NamingStatus}; +use du_domain::ids::VariantId; +use du_domain::variant::Aliases; +use serde::Deserialize; + +const CHANGED: &str = "variant-changed"; + +pub fn router() -> Router { + Router::new() + .route("/curator/variants", get(page)) + .route("/curator/variants/fragment", get(list)) + .route("/curator/variants/new", get(new_form)) + .route("/curator/variants", post(create)) + .route("/curator/variants/:id/panel", get(panel)) + .route("/curator/variants/:id/edit", get(edit_form)) + .route("/curator/variants/:id", post(update)) + .route("/curator/variants/:id", axum::routing::delete(remove)) +} + +fn parse_mutation(s: &str) -> MutationType { + match s { + "INDEL" => MutationType::Indel, + "STR" => MutationType::Str, + "DEL" => MutationType::Del, + "INS" => MutationType::Ins, + "MNP" => MutationType::Mnp, + _ => MutationType::Snp, + } +} +fn parse_naming(s: &str) -> NamingStatus { + match s { + "NAMED" => NamingStatus::Named, + "PENDING_REVIEW" => NamingStatus::PendingReview, + _ => NamingStatus::Unnamed, + } +} +/// "a, b ,c" -> ["a","b","c"] +fn csv(s: Option) -> Vec { + s.unwrap_or_default() + .split(',') + .map(|t| t.trim().to_string()) + .filter(|t| !t.is_empty()) + .collect() +} + +#[derive(Deserialize)] +struct ListQuery { + query: Option, + page: Option, +} + +struct Row { + id: i64, + name: String, + mutation_type: String, + naming_status: String, +} +struct ListView { + query: String, + rows: Vec, + page: i64, + total: i64, + total_pages: i64, +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + let result = du_db::variant::search(&st.pool, q.query.as_deref(), q.page.unwrap_or(1), 20).await?; + Ok(ListView { + query: q.query.clone().unwrap_or_default(), + rows: result + .items + .iter() + .map(|v| Row { + id: v.id.0, + name: v.canonical_name.clone(), + mutation_type: v.mutation_type.label().to_string(), + naming_status: v.naming_status.label().to_string(), + }) + .collect(), + page: result.page, + total: result.total, + total_pages: result.total_pages(), + }) +} + +#[derive(askama::Template)] +#[template(path = "curator/variants/page.html")] +struct PageTemplate { + t: T, + next: String, + user: Option, + list: ListView, +} + +#[derive(askama::Template)] +#[template(path = "curator/variants/list.html")] +struct ListTemplate { + t: T, + list: ListView, +} + +async fn page( + Curator(s): Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&PageTemplate { + t: locale.t, + next: locale.next, + user: Some(crate::auth::NavUser { display_name: s.display_name, is_curator: true }), + list, + })) +} + +async fn list( + _c: Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&ListTemplate { t: locale.t, list })) +} + +struct DetailView { + id: i64, + name: String, + mutation_type: String, + naming_status: String, + common_names: String, + rs_ids: String, + builds: String, +} + +#[derive(askama::Template)] +#[template(path = "curator/variants/detail.html")] +struct DetailTemplate { + t: T, + v: DetailView, + can_delete: bool, + error: Option, +} + +async fn detail_view(st: &AppState, id: VariantId) -> Result { + let v = du_db::variant::get_by_id(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("variant {}", id.0)))?; + let mut builds: Vec<&str> = v.coordinates.0.keys().map(String::as_str).collect(); + builds.sort_unstable(); + Ok(DetailView { + id: v.id.0, + name: v.canonical_name, + mutation_type: v.mutation_type.label().to_string(), + naming_status: v.naming_status.label().to_string(), + common_names: v.aliases.common_names.join(", "), + rs_ids: v.aliases.rs_ids.join(", "), + builds: builds.join(", "), + }) +} + +async fn render_detail(st: &AppState, t: T, id: VariantId, error: Option) -> Result { + let v = detail_view(st, id).await?; + let can_delete = !du_db::variant::is_referenced(&st.pool, id).await?; + Ok(html(&DetailTemplate { t, v, can_delete, error })) +} + +async fn panel( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + render_detail(&st, locale.t, VariantId(id), None).await +} + +#[derive(askama::Template)] +#[template(path = "curator/variants/form.html")] +struct FormTemplate { + t: T, + action: String, + is_edit: bool, + id: i64, + name: String, + mutation_type: String, + naming_status: String, + common_names: String, + rs_ids: String, +} + +async fn new_form(_c: Curator, locale: Locale) -> Response { + html(&FormTemplate { + t: locale.t, + action: "/curator/variants".into(), + is_edit: false, + id: 0, + name: String::new(), + mutation_type: "SNP".into(), + naming_status: "UNNAMED".into(), + common_names: String::new(), + rs_ids: String::new(), + }) +} + +async fn edit_form( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let d = detail_view(&st, VariantId(id)).await?; + Ok(html(&FormTemplate { + t: locale.t, + action: format!("/curator/variants/{id}"), + is_edit: true, + id, + name: d.name, + mutation_type: d.mutation_type, + naming_status: d.naming_status, + common_names: d.common_names, + rs_ids: d.rs_ids, + })) +} + +#[derive(Deserialize)] +struct VariantForm { + name: String, + mutation_type: Option, + naming_status: Option, + common_names: Option, + rs_ids: Option, +} + +fn aliases_from(form: &VariantForm) -> Aliases { + Aliases { + common_names: csv(form.common_names.clone()), + rs_ids: csv(form.rs_ids.clone()), + ..Default::default() + } +} + +fn changed(body: Response) -> Response { + (HxHeaders::new().trigger(CHANGED), body).into_response() +} + +async fn create( + _c: Curator, + State(st): State, + locale: Locale, + Form(f): Form, +) -> Result { + if f.name.trim().is_empty() { + return Err(AppError::BadRequest("name is required".into())); + } + let id = du_db::variant::create( + &st.pool, + f.name.trim(), + parse_mutation(f.mutation_type.as_deref().unwrap_or("SNP")), + parse_naming(f.naming_status.as_deref().unwrap_or("UNNAMED")), + &aliases_from(&f), + ) + .await?; + Ok(changed(render_detail(&st, locale.t, id, None).await?)) +} + +async fn update( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + if f.name.trim().is_empty() { + return Err(AppError::BadRequest("name is required".into())); + } + du_db::variant::update( + &st.pool, + VariantId(id), + f.name.trim(), + parse_mutation(f.mutation_type.as_deref().unwrap_or("SNP")), + parse_naming(f.naming_status.as_deref().unwrap_or("UNNAMED")), + &aliases_from(&f), + ) + .await?; + Ok(changed(render_detail(&st, locale.t, VariantId(id), None).await?)) +} + +async fn remove( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let vid = VariantId(id); + if du_db::variant::is_referenced(&st.pool, vid).await? { + let msg = locale.t.get("var.deleteBlocked").to_string(); + return render_detail(&st, locale.t, vid, Some(msg)).await; + } + du_db::variant::delete(&st.pool, vid).await?; + + #[derive(askama::Template)] + #[template(path = "curator/variants/empty.html")] + struct Empty { + t: T, + } + Ok(changed(html(&Empty { t: locale.t }))) +} diff --git a/rust/crates/du-web/src/routes/denovo_conflicts.rs b/rust/crates/du-web/src/routes/denovo_conflicts.rs new file mode 100644 index 00000000..eef018aa --- /dev/null +++ b/rust/crates/du-web/src/routes/denovo_conflicts.rs @@ -0,0 +1,117 @@ +//! Curator review of **de-novo tree conflicts** — reference clades (ISOGG / +//! PhyloTree) whose de-novo placement disagrees: foreign tips inside the clade's +//! home node and/or clade members scattered elsewhere. Read-only triage queue +//! (worst magnitude first), filterable by lineage. Populated by the de-novo loader. + +use crate::auth::{Curator, NavUser}; +use crate::error::AppError; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Query, State}; +use axum::response::Response; +use axum::routing::get; +use axum::Router; +use du_domain::enums::DnaType; +use serde::Deserialize; + +pub fn router() -> Router { + Router::new() + .route("/curator/denovo-conflicts", get(page)) + .route("/curator/denovo-conflicts/fragment", get(list)) +} + +struct Row { + lineage: &'static str, + clade: String, + label: String, + n_tips: i32, + magnitude: i32, + home_node: String, + foreign_in: i32, + members_away: i32, +} + +struct ListView { + rows: Vec, + page: i64, + total: i64, + total_pages: i64, + dna: String, +} + +#[derive(Deserialize)] +struct ListQuery { + page: Option, + #[serde(rename = "type")] + dna: Option, +} + +fn parse_dna(s: Option<&str>) -> Option { + match s { + Some("Y") | Some("Y_DNA") => Some(DnaType::YDna), + Some("MT") | Some("MT_DNA") => Some(DnaType::MtDna), + _ => None, + } +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + let dna = parse_dna(q.dna.as_deref()); + let result = du_db::denovo::list_conflicts(&st.pool, dna, q.page.unwrap_or(1), 25).await?; + let (page, total, total_pages) = (result.page, result.total, result.total_pages()); + let rows = result + .items + .into_iter() + .map(|c| Row { + lineage: if c.dna_type == "Y_DNA" { "Y" } else { "mt" }, + clade: c.haplogroup, + label: c.label.unwrap_or_default(), + n_tips: c.n_tips, + magnitude: c.magnitude, + home_node: c.home_node.unwrap_or_default(), + foreign_in: c.foreign_in, + members_away: c.members_away, + }) + .collect(); + Ok(ListView { rows, page, total, total_pages, dna: q.dna.clone().unwrap_or_default() }) +} + +#[derive(askama::Template)] +#[template(path = "curator/denovo-conflicts/page.html")] +struct PageTemplate { + t: T, + next: String, + user: Option, + list: ListView, +} +#[derive(askama::Template)] +#[template(path = "curator/denovo-conflicts/list.html")] +struct ListTemplate { + t: T, + list: ListView, +} + +async fn page( + Curator(s): Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&PageTemplate { + t: locale.t, + next: locale.next, + user: Some(NavUser { display_name: s.display_name, is_curator: true }), + list, + })) +} + +async fn list( + Curator(_): Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&ListTemplate { t: locale.t, list })) +} diff --git a/rust/crates/du-web/src/routes/exchange.rs b/rust/crates/du-web/src/routes/exchange.rs new file mode 100644 index 00000000..f067e383 --- /dev/null +++ b/rust/crates/du-web/src/routes/exchange.rs @@ -0,0 +1,360 @@ +//! D1 encrypted-exchange **broker** endpoints (`/api/v1/exchange/*`). The AppView +//! never sees plaintext or session keys — these endpoints record consent, mirror +//! published X25519 keys, and (increment 2) blind-relay ciphertext. Every Edge +//! submission is **signature-authenticated**: the caller signs a canonical message +//! ([`du_db::exchange::messages`]) with its DID's Ed25519 identity key, which the +//! broker verifies (`du_atproto::verify_did_key`; `did:plc/web` resolved first). +//! Not part of the public OpenAPI document (Edge protocol, not the read API). + +use crate::error::AppError; +use crate::sig::verify_signed; +use crate::state::AppState; +use axum::extract::{Query, State}; +use axum::routing::{get, post}; +use axum::{Json, Router}; +use base64::engine::general_purpose::STANDARD; +use base64::Engine; +use du_db::exchange::{self, messages}; +use serde::Deserialize; +use serde_json::{json, Value}; +use sha2::{Digest, Sha256}; +use uuid::Uuid; + +/// Max relay envelope size (ciphertext) — back-pressure on the blind buffer. +const MAX_ENVELOPE_BYTES: usize = 1 << 20; // 1 MiB + +/// Parse a `project:` scope into a project id (the D5 ACL boundary). +fn project_scope_id(scope: Option<&str>) -> Option { + scope?.strip_prefix("project:")?.parse().ok() +} + +pub fn router() -> Router { + Router::new() + .route("/api/v1/exchange/key", post(publish_key).get(fetch_key)) + .route("/api/v1/exchange/request", post(create_request)) + .route("/api/v1/exchange/consent", post(consent)) + .route("/api/v1/exchange/incoming", get(incoming)) + .route("/api/v1/exchange/pending", get(pending)) + .route("/api/v1/exchange/relay", post(relay_post)) + .route("/api/v1/exchange/relay/pull", get(relay_pull)) + .route("/api/v1/exchange/relay/ack", post(relay_ack)) +} + +// ── published X25519 key ────────────────────────────────────────────────────── + +#[derive(Deserialize)] +struct KeyBody { + did: String, + /// Standard base64 of the 32-byte X25519 public key. + x25519_pub: String, + key_uri: Option, + signature: String, +} + +async fn publish_key(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed(&st.pool, &b.did, &messages::publickey(&b.did, &b.x25519_pub, b.key_uri.as_deref()), &b.signature).await?; + let bytes = STANDARD.decode(b.x25519_pub.trim()).map_err(|_| AppError::BadRequest("x25519_pub base64".into()))?; + if bytes.len() != 32 { + return Err(AppError::BadRequest("x25519_pub must be 32 bytes".into())); + } + exchange::publish_key(&st.pool, &b.did, &bytes, b.key_uri.as_deref()).await?; + Ok(Json(json!({ "did": b.did, "status": "published" }))) +} + +#[derive(Deserialize)] +struct DidQuery { + did: String, +} + +async fn fetch_key(State(st): State, Query(q): Query) -> Result, AppError> { + let k = exchange::key_for(&st.pool, &q.did).await?.ok_or_else(|| AppError::NotFound(format!("key for {}", q.did)))?; + Ok(Json(json!({ + "did": k.did, + "x25519_pub": STANDARD.encode(&k.x25519_pub), + "key_uri": k.key_uri, + }))) +} + +// ── request + consent ───────────────────────────────────────────────────────── + +#[derive(Deserialize)] +struct RequestBody { + request_uri: String, + initiator_did: String, + partner_did: String, + purpose: String, + scope: Option, + details: Option, + signature: String, +} + +async fn create_request(State(st): State, Json(b): Json) -> Result, AppError> { + let msg = messages::request(&b.request_uri, &b.initiator_did, &b.partner_did, &b.purpose, b.scope.as_deref()); + verify_signed(&st.pool, &b.initiator_did, &msg, &b.signature).await?; + // D5 ACL: a project-scoped request requires the initiator be a live team member. + if let Some(pid) = project_scope_id(b.scope.as_deref()) { + if !du_db::research::is_team_member(&st.pool, pid, &b.initiator_did).await? { + return Err(AppError::Forbidden); + } + } + exchange::create_request( + &st.pool, + &exchange::NewRequest { + request_uri: &b.request_uri, + initiator_did: &b.initiator_did, + partner_did: &b.partner_did, + purpose: &b.purpose, + scope: b.scope.as_deref(), + details: b.details.unwrap_or_else(|| json!({})), + }, + ) + .await?; + Ok(Json(json!({ "request_uri": b.request_uri, "status": "PENDING" }))) +} + +#[derive(Deserialize)] +struct ConsentBody { + request_uri: String, + consenting_did: String, + consent_given: bool, + consent_uri: Option, + signature: String, +} + +async fn consent(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed(&st.pool, &b.consenting_did, &messages::consent(&b.request_uri, &b.consenting_did, b.consent_given), &b.signature).await?; + // D5 ACL: consenting into a project-scoped exchange requires team membership. + if let Some(meta) = exchange::request_meta(&st.pool, &b.request_uri).await? { + if let Some(pid) = project_scope_id(meta.scope.as_deref()) { + if !du_db::research::is_team_member(&st.pool, pid, &b.consenting_did).await? { + return Err(AppError::Forbidden); + } + } + } + let outcome = exchange::record_consent( + &st.pool, + &b.request_uri, + &b.consenting_did, + b.consent_given, + b.consent_uri.as_deref(), + &b.signature, + ) + .await?; + match outcome { + exchange::ConsentOutcome::Unknown => Err(AppError::NotFound(format!("request {}", b.request_uri))), + exchange::ConsentOutcome::Consented(sid) => Ok(Json(json!({ "status": "CONSENTED", "session_id": sid }))), + exchange::ConsentOutcome::Declined => Ok(Json(json!({ "status": "DECLINED" }))), + exchange::ConsentOutcome::Recorded => Ok(Json(json!({ "status": "PENDING" }))), + } +} + +// ── exchange-ready poll (signed) ────────────────────────────────────────────── + +#[derive(Deserialize)] +struct PendingQuery { + did: String, + /// Unix seconds; must be within ±5 min of now (replay guard). + ts: i64, + sig: String, +} + +async fn pending(State(st): State, Query(q): Query) -> Result, AppError> { + let now = chrono::Utc::now().timestamp(); + if (now - q.ts).abs() > 300 { + return Err(AppError::BadRequest("stale timestamp".into())); + } + verify_signed(&st.pool, &q.did, &messages::poll(&q.did, q.ts), &q.sig).await?; + let ready = exchange::pending_for(&st.pool, &q.did).await?; + let items: Vec = ready + .into_iter() + .map(|r| json!({ + "session_id": r.session_id, + "request_uri": r.request_uri, + "purpose": r.purpose, + "partner_did": r.partner_did, + "partner_key_uri": r.partner_key_uri, + })) + .collect(); + Ok(Json(json!({ "items": items }))) +} + +/// Incoming requests awaiting the caller's consent — **symmetric-blind** (no initiator +/// DID). This is the counterpart-discovery path that closes the introduce→consent loop: +/// the recipient learns a request exists and gets the opaque `request_uri` to consent +/// with, but learns *who* asked only after they consent (then via `/pending`). +async fn incoming(State(st): State, Query(q): Query) -> Result, AppError> { + let now = chrono::Utc::now().timestamp(); + if (now - q.ts).abs() > 300 { + return Err(AppError::BadRequest("stale timestamp".into())); + } + verify_signed(&st.pool, &q.did, &messages::poll(&q.did, q.ts), &q.sig).await?; + let items: Vec = exchange::incoming_for(&st.pool, &q.did) + .await? + .into_iter() + .map(|r| json!({ "request_uri": r.request_uri, "purpose": r.purpose, "created_at": r.created_at })) + .collect(); + Ok(Json(json!({ "items": items }))) +} + +// ── blind relay ─────────────────────────────────────────────────────────────── + +#[derive(Deserialize)] +struct RelayBody { + session_id: Uuid, + from_did: String, + to_did: String, + seq: i32, + /// Standard base64 of the opaque AES-GCM ciphertext envelope. + blob: String, + signature: String, +} + +async fn relay_post(State(st): State, Json(b): Json) -> Result, AppError> { + let bytes = STANDARD.decode(b.blob.trim()).map_err(|_| AppError::BadRequest("blob base64".into()))?; + if bytes.is_empty() || bytes.len() > MAX_ENVELOPE_BYTES { + return Err(AppError::BadRequest("envelope size out of range".into())); + } + let hash = STANDARD.encode(Sha256::digest(&bytes)); + let msg = messages::relay(&b.session_id.to_string(), &b.from_did, &b.to_did, b.seq, &hash); + verify_signed(&st.pool, &b.from_did, &msg, &b.signature).await?; + let id = exchange::post_envelope(&st.pool, b.session_id, &b.from_did, &b.to_did, b.seq, &bytes).await?; + Ok(Json(json!({ "id": id }))) +} + +#[derive(Deserialize)] +struct PullQuery { + session_id: Uuid, + did: String, + ts: i64, + sig: String, +} + +async fn relay_pull(State(st): State, Query(q): Query) -> Result, AppError> { + if (chrono::Utc::now().timestamp() - q.ts).abs() > 300 { + return Err(AppError::BadRequest("stale timestamp".into())); + } + verify_signed(&st.pool, &q.did, &messages::poll(&q.did, q.ts), &q.sig).await?; + let envs = exchange::pull_envelopes(&st.pool, q.session_id, &q.did).await?; + let items: Vec = envs + .into_iter() + .map(|e| json!({ "id": e.id, "from_did": e.from_did, "seq": e.seq, "blob": STANDARD.encode(&e.blob) })) + .collect(); + Ok(Json(json!({ "items": items }))) +} + +#[derive(Deserialize)] +struct AckBody { + envelope_id: i64, + did: String, + signature: String, +} + +async fn relay_ack(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed(&st.pool, &b.did, &messages::ack(&b.did, b.envelope_id), &b.signature).await?; + if exchange::ack_envelope(&st.pool, b.envelope_id, &b.did).await? { + Ok(Json(json!({ "status": "acked" }))) + } else { + Err(AppError::NotFound(format!("envelope {}", b.envelope_id))) + } +} + +#[cfg(test)] +mod tests { + use axum::body::{to_bytes, Body}; + use axum::http::{Request, StatusCode}; + use base64::engine::general_purpose::STANDARD; + use base64::Engine; + use ed25519_dalek::{Signer, SigningKey}; + use tower::ServiceExt; + + /// did:key + a signed request verifies and is recorded; a tampered signature + /// is rejected with 403. (Exercises the broker's signature-auth gate offline.) + #[tokio::test] + async fn signed_request_verifies_and_tamper_is_rejected() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping exchange auth test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let state = crate::state::AppState { pool: db.pool().clone(), key: tower_cookies::Key::generate(), oauth: None }; + + let sk = SigningKey::from_bytes(&[3u8; 32]); + let did = du_atproto::did::did_key_from_ed25519(&sk.verifying_key()); + let partner = "did:key:zPartnerPlaceholder"; + let req_uri = "at://did:key:z.../com.decodingus.exchange.request/1"; + let msg = du_db::exchange::messages::request(req_uri, &did, partner, "GENEALOGY_PII", None); + let sig = STANDARD.encode(sk.sign(msg.as_bytes()).to_bytes()); + + let body = serde_json::json!({ + "request_uri": req_uri, "initiator_did": did, "partner_did": partner, + "purpose": "GENEALOGY_PII", "signature": sig, + }); + let app = crate::routes::app(state.clone()); + let r = app + .oneshot(Request::builder().method("POST").uri("/api/v1/exchange/request") + .header("content-type", "application/json").body(Body::from(body.to_string())).unwrap()) + .await + .unwrap(); + assert_eq!(r.status(), StatusCode::OK); + let v: serde_json::Value = serde_json::from_slice(&to_bytes(r.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert_eq!(v["status"], "PENDING"); + + // Tampered signature → 403. + let mut bad = body.clone(); + bad["partner_did"] = serde_json::json!("did:key:zSomeoneElse"); + let app = crate::routes::app(state); + let r2 = app + .oneshot(Request::builder().method("POST").uri("/api/v1/exchange/request") + .header("content-type", "application/json").body(Body::from(bad.to_string())).unwrap()) + .await + .unwrap(); + assert_eq!(r2.status(), StatusCode::FORBIDDEN); + } + + /// D5 ACL: a project-scoped request is rejected (403) when the initiator is not a + /// live team member, and accepted once they are (the project owner is the ADMIN). + #[tokio::test] + async fn project_scoped_request_requires_team_membership() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping project-scope test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + let owner = SigningKey::from_bytes(&[31u8; 32]); + let owner_did = du_atproto::did::did_key_from_ed25519(&owner.verifying_key()); + let project_id: i64 = sqlx::query_scalar( + "INSERT INTO social.group_project (project_name, project_type, owner_did) VALUES ('P','RESEARCH',$1) RETURNING id", + ) + .bind(&owner_did) + .fetch_one(&pool) + .await + .unwrap(); + let scope = format!("project:{project_id}"); + let state = crate::state::AppState { pool, key: tower_cookies::Key::generate(), oauth: None }; + + let req = |state: crate::state::AppState, sk: &SigningKey, uri_n: u8| { + let did = du_atproto::did::did_key_from_ed25519(&sk.verifying_key()); + let req_uri = format!("at://{did}/exchange/{uri_n}"); + let msg = du_db::exchange::messages::request(&req_uri, &did, "did:key:zPartner", "GENEALOGY_PII", Some(&scope)); + let sig = STANDARD.encode(sk.sign(msg.as_bytes()).to_bytes()); + let body = serde_json::json!({ + "request_uri": req_uri, "initiator_did": did, "partner_did": "did:key:zPartner", + "purpose": "GENEALOGY_PII", "scope": scope, "signature": sig, + }); + async move { + crate::routes::app(state) + .oneshot(Request::builder().method("POST").uri("/api/v1/exchange/request") + .header("content-type", "application/json").body(Body::from(body.to_string())).unwrap()) + .await + .unwrap() + } + }; + + // A non-team DID (valid signature) → 403. + let outsider = SigningKey::from_bytes(&[32u8; 32]); + assert_eq!(req(state.clone(), &outsider, 1).await.status(), StatusCode::FORBIDDEN); + // The project owner (founding ADMIN) → 200. + assert_eq!(req(state, &owner, 2).await.status(), StatusCode::OK); + } +} diff --git a/rust/crates/du-web/src/routes/ibd.rs b/rust/crates/du-web/src/routes/ibd.rs new file mode 100644 index 00000000..c5c91069 --- /dev/null +++ b/rust/crates/du-web/src/routes/ibd.rs @@ -0,0 +1,429 @@ +//! Federated IBD suggestions (`/api/v1/ibd/*`) — the D3 read entry point. The AppView +//! *coordinates* IBD: it serves a caller their own **pseudonymous** introduction +//! candidates (mined by `du_db::ibd::recompute_suggestions` from the `fed.*` records +//! Navigator publishes) and brokers a consent request to a chosen candidate **without +//! ever revealing the counterpart's DID** — identity reveal stays Edge-to-Edge over the +//! D1 exchange after mutual consent. Every call is **signature-authenticated** (the +//! caller signs a canonical [`du_db::ibd::messages`] message with its DID key — +//! [`crate::sig::verify_signed`]). Scope is **personal** (the caller's own samples via +//! the `core.biosample.atproto->>'repo_did'` bridge), not project-scoped. Not part of +//! the public OpenAPI document. + +use crate::error::AppError; +use crate::sig::verify_signed; +use crate::state::AppState; +use axum::extract::{Query, State}; +use axum::routing::{get, post}; +use axum::{Json, Router}; +use du_db::ibd::{self, messages}; +use du_db::exchange; +use serde::Deserialize; +use serde_json::{json, Value}; +use sha2::{Digest, Sha256}; +use uuid::Uuid; + +pub fn router() -> Router { + Router::new() + .route("/api/v1/ibd/suggestions", get(suggestions)) + .route("/api/v1/ibd/introduce", post(introduce)) + .route("/api/v1/ibd/dismiss", post(dismiss)) + .route("/api/v1/ibd/attest", post(attest)) +} + +#[derive(Deserialize)] +struct SuggestionsQuery { + did: String, + ts: i64, + sig: String, +} + +/// A caller's own ranked candidates — pseudonymous (no counterpart DID). +async fn suggestions(State(st): State, Query(q): Query) -> Result, AppError> { + if (chrono::Utc::now().timestamp() - q.ts).abs() > 300 { + return Err(AppError::BadRequest("stale timestamp".into())); + } + verify_signed(&st.pool, &q.did, &messages::poll(&q.did, q.ts), &q.sig).await?; + let items: Vec = ibd::suggestions_for_did(&st.pool, &q.did, 100) + .await? + .into_iter() + .map(|s| json!({ + "suggested_sample_guid": s.suggested_sample_guid, + "suggestion_type": s.suggestion_type, + "score": s.score, + "metadata": s.metadata, + })) + .collect(); + Ok(Json(json!({ "items": items }))) +} + +#[derive(Deserialize)] +struct IntroduceBody { + did: String, + suggested_sample_guid: Uuid, + signature: String, +} + +/// Ask the broker to relay a D1 consent request to a chosen candidate. The counterpart +/// DID is resolved server-side and **never returned**; the caller learns it only after +/// mutual consent opens a session (`exchange::pending_for`). +async fn introduce(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed(&st.pool, &b.did, &messages::introduce(&b.did, &b.suggested_sample_guid.to_string()), &b.signature).await?; + // Authorize + pick the exchange purpose from the suggestion's dominant signal (the caller + // may only introduce to its own genuine candidate). + let purpose = ibd::introduction_purpose(&st.pool, &b.did, b.suggested_sample_guid) + .await? + .ok_or(AppError::Forbidden)?; + // Resolve the counterpart server-side; never surface it to the caller. + let counterpart = ibd::owner_did_of_sample(&st.pool, b.suggested_sample_guid) + .await? + .ok_or_else(|| AppError::NotFound(format!("candidate {} is not claimable", b.suggested_sample_guid)))?; + // Opaque, deterministic handle (idempotent per caller+candidate) that does NOT embed + // the initiator DID — the recipient consents blind, learning the initiator only after + // mutual consent (symmetric with the caller never seeing the counterpart pre-consent). + let digest = Sha256::digest(format!("{}:{}", b.did, b.suggested_sample_guid).as_bytes()); + let hex: String = digest.iter().map(|byte| format!("{byte:02x}")).collect(); + let request_uri = format!("urn:ibd:{hex}"); + exchange::create_request( + &st.pool, + &exchange::NewRequest { + request_uri: &request_uri, + initiator_did: &b.did, + partner_did: &counterpart, + purpose: &purpose, + scope: None, + details: json!({ "origin": "IBD_SUGGESTION" }), + }, + ) + .await?; + // The suggestion became a request → CONVERTED (drops from the active candidate list). + ibd::mark_converted(&st.pool, &b.did, b.suggested_sample_guid).await?; + Ok(Json(json!({ "request_uri": request_uri, "status": "PENDING", "purpose": purpose }))) +} + +#[derive(Deserialize)] +struct DismissBody { + did: String, + suggested_sample_guid: Uuid, + signature: String, +} + +/// Dismiss a candidate so the engine stops suggesting it (preserved across recomputes). +async fn dismiss(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed(&st.pool, &b.did, &messages::dismiss(&b.did, &b.suggested_sample_guid.to_string()), &b.signature).await?; + let n = ibd::dismiss_suggestion(&st.pool, &b.did, b.suggested_sample_guid).await?; + if n == 0 { + return Err(AppError::NotFound(format!("no active suggestion for {}", b.suggested_sample_guid))); + } + Ok(Json(json!({ "suggested_sample_guid": b.suggested_sample_guid, "status": "DISMISSED" }))) +} + +#[derive(Deserialize)] +struct AttestBody { + did: String, + request_uri: String, + claimed_sample: Uuid, + counterpart_sample: Uuid, + region_type: String, + #[serde(default)] + total_shared_cm: Option, + #[serde(default)] + num_segments: Option, + #[serde(default = "default_attestation_type")] + attestation_type: String, + #[serde(default)] + notes: Option, + signature: String, +} + +fn default_attestation_type() -> String { + "INITIAL_REPORT".to_string() +} + +/// Report the outcome of a completed Edge-to-Edge comparison. The AppView records it as +/// match state and, once both consented parties report a compatible total, confirms the +/// edge — which is what feeds the shared-match discovery signal. PII-free: the body carries +/// only pseudonymous sample handles + coarse totals, never segment coordinates or genotypes. +async fn attest(State(st): State, Json(b): Json) -> Result, AppError> { + // The signed figure binds the claim to the signature (formatted to match the Edge). + let cm = b.total_shared_cm.map(|c| format!("{c:.1}")).unwrap_or_default(); + let msg = messages::attest( + &b.did, + &b.request_uri, + &b.claimed_sample.to_string(), + &b.counterpart_sample.to_string(), + &b.region_type, + &cm, + ); + verify_signed(&st.pool, &b.did, &msg, &b.signature).await?; + + let outcome = ibd::record_attestation( + &st.pool, + &ibd::Attestation { + attester_did: &b.did, + request_uri: &b.request_uri, + claimed_sample: b.claimed_sample, + counterpart_sample: b.counterpart_sample, + region_type: &b.region_type, + total_shared_cm: b.total_shared_cm, + num_segments: b.num_segments, + attestation_type: &b.attestation_type, + signature: &b.signature, + notes: b.notes.as_deref(), + }, + ) + .await?; + match outcome { + ibd::AttestationOutcome::Recorded { discovery_index_id, consensus_status, publicly_discoverable } => { + Ok(Json(json!({ + "discovery_index_id": discovery_index_id, + "consensus_status": consensus_status, + "publicly_discoverable": publicly_discoverable, + }))) + } + ibd::AttestationOutcome::Rejected(reason) => { + tracing::warn!(reason, did = %b.did, "ibd attestation rejected"); + Err(AppError::Forbidden) + } + } +} + +#[cfg(test)] +mod tests { + use axum::body::{to_bytes, Body}; + use axum::http::{Request, StatusCode}; + use base64::engine::general_purpose::STANDARD; + use base64::Engine; + use ed25519_dalek::{Signer, SigningKey}; + use serde_json::Value; + use tower::ServiceExt; + use uuid::Uuid; + + /// Insert a federated biosample owned by `did`; returns its sample_guid. + async fn fed_sample(pool: &sqlx::PgPool, did: &str) -> Uuid { + sqlx::query_scalar( + "INSERT INTO core.biosample (source, atproto) \ + VALUES ('CITIZEN'::core.biosample_source, jsonb_build_object('uri',$1::text,'repo_did',$2::text)) \ + RETURNING sample_guid", + ) + .bind(format!("at://{did}/bio")) + .bind(did) + .fetch_one(pool) + .await + .expect("insert biosample") + } + + /// A signed-poll suggestions read returns the caller's own pseudonymous candidates and + /// nothing for an unrelated DID; introduce brokers a request without leaking the + /// counterpart DID; introduce to a non-candidate is refused. + #[tokio::test] + async fn suggestions_scoped_and_introduce_hides_counterpart() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping ibd endpoint test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + + let owner = SigningKey::from_bytes(&[41u8; 32]); + let owner_did = du_atproto::did::did_key_from_ed25519(&owner.verifying_key()); + let target = fed_sample(&pool, &owner_did).await; + // The counterpart is a did:key so it can sign its own /exchange/incoming poll. + let counter = SigningKey::from_bytes(&[43u8; 32]); + let counterpart_did = du_atproto::did::did_key_from_ed25519(&counter.verifying_key()); + let suggested = fed_sample(&pool, &counterpart_did).await; + // A single ACTIVE candidate: owner's sample → the counterpart's sample. + sqlx::query( + "INSERT INTO ibd.match_suggestion (target_sample_guid, suggested_sample_guid, suggestion_type, score, status) \ + VALUES ($1, $2, 'POPULATION_OVERLAP', 0.9, 'ACTIVE')", + ) + .bind(target) + .bind(suggested) + .execute(&pool) + .await + .unwrap(); + let state = crate::state::AppState { pool: pool.clone(), key: tower_cookies::Key::generate(), oauth: None }; + + // base64 STANDARD sigs carry +,/,= — percent-encode for the query string. + let enc = |s: &str| s.replace('+', "%2B").replace('/', "%2F").replace('=', "%3D"); + let get = |state: crate::state::AppState, uri: String| async move { + crate::routes::app(state) + .oneshot(Request::builder().method("GET").uri(uri).body(Body::empty()).unwrap()) + .await + .unwrap() + }; + let post = |state: crate::state::AppState, body: Value| async move { + crate::routes::app(state) + .oneshot(Request::builder().method("POST").uri("/api/v1/ibd/introduce") + .header("content-type", "application/json").body(Body::from(body.to_string())).unwrap()) + .await + .unwrap() + }; + + // Owner-signed poll → its one candidate, pseudonymous. + let ts = chrono::Utc::now().timestamp(); + let sig = STANDARD.encode(owner.sign(du_db::ibd::messages::poll(&owner_did, ts).as_bytes()).to_bytes()); + let resp = get(state.clone(), format!("/api/v1/ibd/suggestions?did={owner_did}&ts={ts}&sig={}", enc(&sig))).await; + assert_eq!(resp.status(), StatusCode::OK); + let v: Value = serde_json::from_slice(&to_bytes(resp.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert_eq!(v["items"].as_array().unwrap().len(), 1); + assert_eq!(v["items"][0]["suggested_sample_guid"].as_str().unwrap(), suggested.to_string()); + // The pseudonymous row carries no counterpart DID. + assert!(!v["items"][0].to_string().contains(counterpart_did.as_str())); + + // An unrelated DID (valid signature) sees none of the owner's candidates. + let other = SigningKey::from_bytes(&[42u8; 32]); + let other_did = du_atproto::did::did_key_from_ed25519(&other.verifying_key()); + let osig = STANDARD.encode(other.sign(du_db::ibd::messages::poll(&other_did, ts).as_bytes()).to_bytes()); + let r2 = get(state.clone(), format!("/api/v1/ibd/suggestions?did={other_did}&ts={ts}&sig={}", enc(&osig))).await; + let v2: Value = serde_json::from_slice(&to_bytes(r2.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert_eq!(v2["items"].as_array().unwrap().len(), 0); + + // A stale timestamp → 422. + let stale = get(state.clone(), format!("/api/v1/ibd/suggestions?did={owner_did}&ts={}&sig={}", ts - 10_000, enc(&sig))).await; + assert_eq!(stale.status(), StatusCode::UNPROCESSABLE_ENTITY); + + // Introduce to the genuine candidate → brokers a PENDING request, counterpart hidden. + let im = du_db::ibd::messages::introduce(&owner_did, &suggested.to_string()); + let isig = STANDARD.encode(owner.sign(im.as_bytes()).to_bytes()); + let intro = post(state.clone(), serde_json::json!({ + "did": owner_did, "suggested_sample_guid": suggested, "signature": isig, + })).await; + assert_eq!(intro.status(), StatusCode::OK); + let iv: Value = serde_json::from_slice(&to_bytes(intro.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert_eq!(iv["status"].as_str(), Some("PENDING")); + // Purpose routed from the suggestion's signal (POPULATION_OVERLAP → autosomal). + assert_eq!(iv["purpose"].as_str(), Some("IBD_AUTOSOMAL")); + assert!(!iv.to_string().contains(counterpart_did.as_str()), "introduce response must not leak the counterpart DID"); + let request_uri = iv["request_uri"].as_str().unwrap().to_string(); + assert!(!request_uri.contains(&owner_did), "the opaque handle must not embed the initiator DID"); + // The broker row carries the resolved counterpart as partner_did (server-side only). + let partner: String = sqlx::query_scalar("SELECT partner_did FROM exchange.exchange_request WHERE initiator_did = $1") + .bind(&owner_did) + .fetch_one(&pool) + .await + .unwrap(); + assert_eq!(partner, counterpart_did); + + // The loop closes: the counterpart DISCOVERS the request via /exchange/incoming — + // symmetric-blind (gets the handle + purpose, NOT the initiator DID). + let cts = chrono::Utc::now().timestamp(); + let csig = STANDARD.encode(counter.sign(du_db::exchange::messages::poll(&counterpart_did, cts).as_bytes()).to_bytes()); + let inc = get(state.clone(), format!("/api/v1/exchange/incoming?did={counterpart_did}&ts={cts}&sig={}", enc(&csig))).await; + assert_eq!(inc.status(), StatusCode::OK); + let incv: Value = serde_json::from_slice(&to_bytes(inc.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert_eq!(incv["items"].as_array().unwrap().len(), 1); + assert_eq!(incv["items"][0]["request_uri"].as_str(), Some(request_uri.as_str())); + assert_eq!(incv["items"][0]["purpose"].as_str(), Some("IBD_AUTOSOMAL")); + assert!(!incv.to_string().contains(&owner_did), "incoming must not reveal the initiator pre-consent"); + + // Introduce to a sample that is NOT the caller's candidate → 403. + let stranger = fed_sample(&pool, "did:plc:stranger").await; + let bm = du_db::ibd::messages::introduce(&owner_did, &stranger.to_string()); + let bsig = STANDARD.encode(owner.sign(bm.as_bytes()).to_bytes()); + let bad = post(state.clone(), serde_json::json!({ + "did": owner_did, "suggested_sample_guid": stranger, "signature": bsig, + })).await; + assert_eq!(bad.status(), StatusCode::FORBIDDEN); + + // Dismiss a fresh ACTIVE candidate → 200; it then no longer shows in /suggestions. + let other = fed_sample(&pool, "did:plc:other").await; + sqlx::query( + "INSERT INTO ibd.match_suggestion (target_sample_guid, suggested_sample_guid, suggestion_type, score, status) \ + VALUES ($1, $2, 'POPULATION_OVERLAP', 0.7, 'ACTIVE')", + ) + .bind(target) + .bind(other) + .execute(&pool) + .await + .unwrap(); + let dm = du_db::ibd::messages::dismiss(&owner_did, &other.to_string()); + let dsig = STANDARD.encode(owner.sign(dm.as_bytes()).to_bytes()); + let dres = crate::routes::app(state.clone()) + .oneshot(Request::builder().method("POST").uri("/api/v1/ibd/dismiss") + .header("content-type", "application/json") + .body(Body::from(serde_json::json!({ "did": owner_did, "suggested_sample_guid": other, "signature": dsig }).to_string())).unwrap()) + .await + .unwrap(); + assert_eq!(dres.status(), StatusCode::OK); + let dts = chrono::Utc::now().timestamp(); + let dpoll = STANDARD.encode(owner.sign(du_db::ibd::messages::poll(&owner_did, dts).as_bytes()).to_bytes()); + let listed = get(state, format!("/api/v1/ibd/suggestions?did={owner_did}&ts={dts}&sig={}", enc(&dpoll))).await; + let lv: Value = serde_json::from_slice(&to_bytes(listed.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert!(!lv.to_string().contains(&other.to_string()), "dismissed candidate is gone from /suggestions"); + } + + /// The attest endpoint records a signed report (200/PENDING for one side), rejects a + /// tampered signature (403) and a non-consented exchange (403). + #[tokio::test] + async fn attest_endpoint_signed_and_gated() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping ibd attest endpoint test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + + let alice = SigningKey::from_bytes(&[61u8; 32]); + let alice_did = du_atproto::did::did_key_from_ed25519(&alice.verifying_key()); + let bob = SigningKey::from_bytes(&[62u8; 32]); + let bob_did = du_atproto::did::did_key_from_ed25519(&bob.verifying_key()); + let sa = fed_sample(&pool, &alice_did).await; + let sb = fed_sample(&pool, &bob_did).await; + sqlx::query( + "INSERT INTO exchange.exchange_request (request_uri, initiator_did, partner_did, purpose, status) \ + VALUES ('urn:web:ab', $1, $2, 'IBD_AUTOSOMAL', 'CONSENTED')", + ) + .bind(&alice_did) + .bind(&bob_did) + .execute(&pool) + .await + .unwrap(); + let state = crate::state::AppState { pool: pool.clone(), key: tower_cookies::Key::generate(), oauth: None }; + + let post = |state: crate::state::AppState, body: Value| async move { + crate::routes::app(state) + .oneshot(Request::builder().method("POST").uri("/api/v1/ibd/attest") + .header("content-type", "application/json").body(Body::from(body.to_string())).unwrap()) + .await + .unwrap() + }; + + // Signed happy path: alice's first report → 200, PENDING (awaiting bob). + let cm = format!("{:.1}", 250.0); + let msg = du_db::ibd::messages::attest(&alice_did, "urn:web:ab", &sa.to_string(), &sb.to_string(), "AUTOSOMAL", &cm); + let sig = STANDARD.encode(alice.sign(msg.as_bytes()).to_bytes()); + let ok = post(state.clone(), serde_json::json!({ + "did": alice_did, "request_uri": "urn:web:ab", "claimed_sample": sa, "counterpart_sample": sb, + "region_type": "AUTOSOMAL", "total_shared_cm": 250.0, "signature": sig, + })).await; + assert_eq!(ok.status(), StatusCode::OK); + let ov: Value = serde_json::from_slice(&to_bytes(ok.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert_eq!(ov["consensus_status"].as_str(), Some("PENDING")); + assert_eq!(ov["publicly_discoverable"].as_bool(), Some(false)); + + // Tampered signature → 403. + let bad = post(state.clone(), serde_json::json!({ + "did": alice_did, "request_uri": "urn:web:ab", "claimed_sample": sa, "counterpart_sample": sb, + "region_type": "AUTOSOMAL", "total_shared_cm": 250.0, "signature": "AAAA", + })).await; + assert_eq!(bad.status(), StatusCode::FORBIDDEN); + + // A correctly-signed report against a non-consented exchange → 403. + sqlx::query( + "INSERT INTO exchange.exchange_request (request_uri, initiator_did, partner_did, purpose, status) \ + VALUES ('urn:web:pending', $1, $2, 'IBD_AUTOSOMAL', 'PENDING')", + ) + .bind(&alice_did) + .bind(&bob_did) + .execute(&pool) + .await + .unwrap(); + let pmsg = du_db::ibd::messages::attest(&alice_did, "urn:web:pending", &sa.to_string(), &sb.to_string(), "AUTOSOMAL", &cm); + let psig = STANDARD.encode(alice.sign(pmsg.as_bytes()).to_bytes()); + let pending = post(state, serde_json::json!({ + "did": alice_did, "request_uri": "urn:web:pending", "claimed_sample": sa, "counterpart_sample": sb, + "region_type": "AUTOSOMAL", "total_shared_cm": 250.0, "signature": psig, + })).await; + assert_eq!(pending.status(), StatusCode::FORBIDDEN); + } +} diff --git a/rust/crates/du-web/src/routes/maps.rs b/rust/crates/du-web/src/routes/maps.rs new file mode 100644 index 00000000..a9f67635 --- /dev/null +++ b/rust/crates/du-web/src/routes/maps.rs @@ -0,0 +1,47 @@ +//! Biosample geographic map. The page loads Leaflet (client-side) and fetches +//! GeoJSON from `/biosamples/geo-data`, which is produced from the donor +//! `geocoord` via PostGIS. This is the one public area that needs client JS. + +use crate::error::AppError; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::State; +use axum::response::Response; +use axum::routing::get; +use axum::{Json, Router}; +use serde_json::{json, Value}; + +pub fn router() -> Router { + Router::new() + .route("/biosamples/map", get(map_page)) + .route("/biosamples/geo-data", get(geo_data)) +} + +#[derive(askama::Template)] +#[template(path = "biosamples/map.html")] +struct MapTemplate { + t: T, + next: String, + user: Option, +} + +async fn map_page(locale: Locale, user: crate::auth::MaybeUser) -> Response { + html(&MapTemplate { t: locale.t, next: locale.next, user: user.nav() }) +} + +/// GeoJSON FeatureCollection of biosample locations for Leaflet. +async fn geo_data(State(st): State) -> Result, AppError> { + let points = du_db::biosample::geo_points(&st.pool).await?; + let features: Vec = points + .into_iter() + .map(|p| { + json!({ + "type": "Feature", + "geometry": { "type": "Point", "coordinates": [p.lon, p.lat] }, + "properties": { "accession": p.accession, "source": p.source.label() }, + }) + }) + .collect(); + Ok(Json(json!({ "type": "FeatureCollection", "features": features }))) +} diff --git a/rust/crates/du-web/src/routes/mod.rs b/rust/crates/du-web/src/routes/mod.rs new file mode 100644 index 00000000..b02fb468 --- /dev/null +++ b/rust/crates/du-web/src/routes/mod.rs @@ -0,0 +1,143 @@ +//! Router assembly + top-level pages. + +use crate::auth::{MaybeUser, NavUser}; +use crate::i18n::{Lang, Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query}; +use axum::http::header::{LOCATION, SET_COOKIE}; +use axum::http::StatusCode; +use axum::response::{IntoResponse, Response}; +use axum::routing::get; +use axum::Router; +use serde::Deserialize; +use tower_cookies::CookieManagerLayer; +use tower_http::services::ServeDir; + +pub mod auth_routes; +pub mod change_sets; +pub mod coverage; +pub mod exchange; +pub mod ibd; +pub mod curation; +pub mod curator; +pub mod curator_regions; +pub mod curator_variants; +pub mod maps; +pub mod naming; +pub mod denovo_conflicts; +pub mod pages; +pub mod publications; +pub mod reconcile_flags; +pub mod references; +pub mod research; +pub mod samples; +pub mod sequencer; +pub mod tree; +pub mod variants; +pub mod versioning; + +/// Directory holding vendored static assets. Settable for deployment +/// (Dockerfile sets DU_ASSETS_DIR=/app/assets); falls back to the crate's +/// assets dir for local `cargo run`. +fn assets_dir() -> String { + std::env::var("DU_ASSETS_DIR") + .unwrap_or_else(|_| concat!(env!("CARGO_MANIFEST_DIR"), "/assets").to_string()) +} + +/// Full application router (requires a DB-backed AppState). +pub fn app(state: AppState) -> Router { + Router::new() + .route("/health", get(health)) + .route("/", get(index)) + .route("/language/:lang", get(switch_language)) + .merge(variants::router()) + .merge(tree::router()) + .merge(references::router()) + .merge(samples::router()) + .merge(maps::router()) + .merge(coverage::router()) + .merge(pages::router()) + .merge(auth_routes::router()) + .merge(curator::router()) + .merge(curator_variants::router()) + .merge(curator_regions::router()) + .merge(curation::router()) + .merge(publications::router()) + .merge(naming::router()) + .merge(reconcile_flags::router()) + .merge(denovo_conflicts::router()) + .merge(change_sets::router()) + .merge(versioning::router()) + .merge(sequencer::router()) + .merge(exchange::router()) + .merge(ibd::router()) + .merge(research::router()) + .merge(crate::oauth::router()) + .merge(crate::api::router()) + .nest_service("/assets", ServeDir::new(assets_dir())) + .layer(CookieManagerLayer::new()) + .with_state(state) +} + +/// Health-only router for environments without a database (and for tests). +pub fn health_only() -> Router { + Router::new().route("/health", get(health)) +} + +async fn health() -> (StatusCode, &'static str) { + (StatusCode::OK, "ok") +} + +#[derive(askama::Template)] +#[template(path = "index.html")] +struct IndexTemplate { + t: T, + next: String, + user: Option, +} + +async fn index(locale: Locale, user: MaybeUser) -> Response { + html(&IndexTemplate { t: locale.t, next: locale.next, user: user.nav() }) +} + +#[derive(Deserialize)] +struct NextQuery { + next: Option, +} + +/// Set the `lang` cookie and redirect back. Only same-site relative paths are +/// honored as the redirect target (open-redirect guard, like the legacy app). +async fn switch_language(Path(lang): Path, Query(q): Query) -> Response { + let chosen = Lang::parse(&lang).unwrap_or(Lang::En); + let next = q + .next + .filter(|n| n.starts_with('/') && !n.starts_with("//")) + .unwrap_or_else(|| "/".to_string()); + let cookie = format!( + "lang={}; Path=/; Max-Age=31536000; SameSite=Lax", + chosen.code() + ); + ( + StatusCode::SEE_OTHER, + [(SET_COOKIE, cookie), (LOCATION, next)], + ) + .into_response() +} + +#[cfg(test)] +mod tests { + use super::*; + use axum::body::Body; + use axum::http::{Request, StatusCode}; + use tower::ServiceExt; + + #[tokio::test] + async fn health_returns_ok() { + let resp = health_only() + .oneshot(Request::builder().uri("/health").body(Body::empty()).unwrap()) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + } +} diff --git a/rust/crates/du-web/src/routes/naming.rs b/rust/crates/du-web/src/routes/naming.rs new file mode 100644 index 00000000..355acd3b --- /dev/null +++ b/rust/crates/du-web/src/routes/naming.rs @@ -0,0 +1,241 @@ +//! Curator **Variant Naming Authority** UI. Two-panel HTMX screen over +//! `du_db::naming`: the naming queue (left) and a variant panel (right) showing +//! coordinates, current name/aliases, the branch it defines, and any same-coord +//! named variant (dedup) — with **Assign DU name**, **Flag for review**, and +//! **Send back to unnamed** actions. Minting goes through `assign_du_name`. + +use crate::auth::{Curator, NavUser}; +use crate::error::AppError; +use crate::htmx::HxHeaders; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Form, Router}; +use serde::Deserialize; + +const CHANGED: &str = "naming-changed"; + +pub fn router() -> Router { + Router::new() + .route("/curator/naming", get(page)) + .route("/curator/naming/fragment", get(list)) + .route("/curator/naming/:id/panel", get(panel)) + .route("/curator/naming/:id/assign", post(assign)) + .route("/curator/naming/:id/status", post(status)) +} + +// ── helpers ───────────────────────────────────────────────────────────────── + +/// "chrY:2781234" from the GRCh38 coordinate JSONB, or "—". +fn coord_label(coords: &serde_json::Value) -> String { + let g = coords.get("GRCh38"); + match g { + Some(g) => { + let contig = g.get("contig").and_then(|v| v.as_str()); + let pos = g.get("position").and_then(|v| v.as_str().map(str::to_string).or_else(|| v.as_i64().map(|n| n.to_string()))); + match (contig, pos) { + (Some(c), Some(p)) => format!("{c}:{p}"), + _ => "—".into(), + } + } + None => "—".into(), + } +} + +fn common_names(aliases: &serde_json::Value) -> Vec { + aliases + .get("common_names") + .and_then(|v| v.as_array()) + .map(|a| a.iter().filter_map(|x| x.as_str().map(str::to_string)).collect()) + .unwrap_or_default() +} + +// ── list ────────────────────────────────────────────────────────────────────── + +struct Row { + id: i64, + name: String, + status: String, + coord: String, + defining: String, +} + +struct ListView { + mode: String, + rows: Vec, + page: i64, + total: i64, + total_pages: i64, +} + +#[derive(Deserialize)] +struct ListQuery { + mode: Option, + page: Option, +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + let mode = q.mode.clone().unwrap_or_else(|| "needs_name".into()); + let result = du_db::naming::queue(&st.pool, &mode, q.page.unwrap_or(1), 25).await?; + let (page, total, total_pages) = (result.page, result.total, result.total_pages()); + let rows = result + .items + .into_iter() + .map(|i| Row { + id: i.id, + name: i.canonical_name.unwrap_or_else(|| "(unnamed)".into()), + status: i.naming_status, + coord: coord_label(&i.coordinates), + defining: i.defining.unwrap_or_else(|| "—".into()), + }) + .collect(); + Ok(ListView { mode, rows, page, total, total_pages }) +} + +#[derive(askama::Template)] +#[template(path = "curator/naming/page.html")] +struct PageTemplate { + t: T, + next: String, + user: Option, + list: ListView, +} +#[derive(askama::Template)] +#[template(path = "curator/naming/list.html")] +struct ListTemplate { + t: T, + list: ListView, +} + +async fn page( + Curator(s): Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&PageTemplate { + t: locale.t, + next: locale.next, + user: Some(NavUser { display_name: s.display_name, is_curator: true }), + list, + })) +} + +async fn list( + _c: Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&ListTemplate { t: locale.t, list })) +} + +// ── detail panel ────────────────────────────────────────────────────────────── + +struct Candidate { + name: String, +} + +struct DetailView { + id: i64, + name: Option, + status: String, + mutation_type: String, + coord: String, + aliases: Vec, + defining: Option, + dedup: Vec, + can_assign: bool, + notice: Option, +} + +#[derive(askama::Template)] +#[template(path = "curator/naming/detail.html")] +struct DetailTemplate { + t: T, + v: DetailView, +} + +async fn build_detail(st: &AppState, id: i64, notice: Option) -> Result { + let i = du_db::naming::get(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("variant {id}")))?; + let dedup = du_db::naming::dedup_by_coordinates(&st.pool, id) + .await? + .into_iter() + .map(|(_, name)| Candidate { name }) + .collect(); + Ok(DetailView { + id: i.id, + name: i.canonical_name.clone(), + status: i.naming_status.clone(), + mutation_type: i.mutation_type, + coord: coord_label(&i.coordinates), + aliases: common_names(&i.aliases), + defining: i.defining, + dedup, + can_assign: i.naming_status != "NAMED", + notice, + }) +} + +async fn detail_response(st: &AppState, t: T, id: i64, notice: Option) -> Result { + let v = build_detail(st, id, notice).await?; + Ok(html(&DetailTemplate { t, v })) +} + +async fn changed_response(st: &AppState, t: T, id: i64, notice: Option) -> Result { + let body = detail_response(st, t, id, notice).await?; + Ok((HxHeaders::new().trigger(CHANGED), body).into_response()) +} + +async fn panel( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + detail_response(&st, locale.t, id, None).await +} + +// ── actions ───────────────────────────────────────────────────────────────── + +async fn assign( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let notice = match du_db::naming::assign_du_name(&st.pool, id).await { + Ok(du) => format!("{} {du}", locale.t.get("nm.notice.minted")), + Err(du_db::DbError::Conflict(m)) => m, + Err(e) => return Err(e.into()), + }; + changed_response(&st, locale.t, id, Some(notice)).await +} + +#[derive(Deserialize)] +struct StatusForm { + /// PENDING_REVIEW | UNNAMED + status: String, +} + +async fn status( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + let new_status = match f.status.as_str() { + "PENDING_REVIEW" => "PENDING_REVIEW", + _ => "UNNAMED", + }; + du_db::naming::set_status(&st.pool, id, new_status).await?; + changed_response(&st, locale.t, id, None).await +} diff --git a/rust/crates/du-web/src/routes/pages.rs b/rust/crates/du-web/src/routes/pages.rs new file mode 100644 index 00000000..eb304466 --- /dev/null +++ b/rust/crates/du-web/src/routes/pages.rs @@ -0,0 +1,295 @@ +//! Secondary surfaces: static informational pages (about, FAQ, terms, privacy, +//! cookies, reputation), SEO endpoints (sitemap.xml, robots.txt), the GDPR +//! cookie-consent record, the signed-in user's profile, and the public contact +//! form (reCAPTCHA-protected when configured). + +use crate::auth::{MaybeUser, NavUser}; +use crate::error::AppError; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::State; +use axum::http::{header, HeaderMap, StatusCode}; +use axum::response::{IntoResponse, Redirect, Response}; +use axum::routing::{get, post}; +use axum::{Form, Router}; +use du_domain::ids::UserId; +use serde::Deserialize; + +/// Bump when the cookie-consent text materially changes (re-prompts users). +const POLICY_VERSION: &str = "2026-06-01"; +const CONSENT_COOKIE: &str = "du_consent"; +/// ~180 days. +const CONSENT_MAX_AGE: i64 = 15_552_000; + +pub fn router() -> Router { + Router::new() + .route("/about", get(|l, u| page("about", l, u))) + .route("/faq", get(|l, u| page("faq", l, u))) + .route("/terms", get(|l, u| page("terms", l, u))) + .route("/privacy", get(|l, u| page("privacy", l, u))) + .route("/cookies", get(|l, u| page("cookies", l, u))) + .route("/reputation", get(|l, u| page("reputation", l, u))) + .route("/sitemap.xml", get(sitemap)) + .route("/robots.txt", get(robots)) + .route("/cookie-consent", post(cookie_consent)) + .route("/profile", get(profile_page).post(profile_update)) + .route("/contact", get(contact_form).post(contact_submit)) +} + +// ── static content pages ────────────────────────────────────────────────────── + +#[derive(askama::Template)] +#[template(path = "static/page.html")] +struct PageTemplate { + t: T, + next: String, + user: Option, + /// Selects which content block renders + the page heading. + page: &'static str, + title: String, +} + +async fn page(page: &'static str, locale: Locale, user: MaybeUser) -> Response { + let title = locale.t.get(&format!("page.{page}.title")).to_string(); + html(&PageTemplate { t: locale.t, next: locale.next, user: user.nav(), page, title }) +} + +// ── SEO ───────────────────────────────────────────────────────────────────── + +/// Canonical site origin for absolute URLs; overridable for deploys. +fn base_url() -> String { + std::env::var("DU_BASE_URL").unwrap_or_else(|_| "https://decoding-us.com".to_string()) +} + +/// The public, indexable pages (curator/auth surfaces are intentionally omitted). +const PUBLIC_PATHS: &[&str] = + &["/", "/ytree", "/mtree", "/variants", "/references", "/coverage-benchmarks", "/about", "/contact", + "/reputation", "/terms", "/privacy", "/cookies", "/faq"]; + +async fn sitemap() -> Response { + let base = base_url(); + let mut xml = String::from("\n"); + xml.push_str("\n"); + for p in PUBLIC_PATHS { + xml.push_str(&format!(" {base}{p}\n")); + } + xml.push_str("\n"); + ([(header::CONTENT_TYPE, "application/xml; charset=utf-8")], xml).into_response() +} + +async fn robots() -> Response { + let body = format!( + "User-agent: *\nAllow: /\nDisallow: /curator\nDisallow: /api\nSitemap: {}/sitemap.xml\n", + base_url() + ); + ([(header::CONTENT_TYPE, "text/plain; charset=utf-8")], body).into_response() +} + +// ── cookie consent ──────────────────────────────────────────────────────────── + +#[derive(Deserialize)] +struct ConsentForm { + /// "true" to accept non-essential cookies, anything else to decline. + consent: Option, +} + +/// Record a consent decision (attributed to the signed-in user if any) and set +/// the client-side consent cookie so the banner stays dismissed. +async fn cookie_consent( + State(st): State, + user: MaybeUser, + headers: HeaderMap, + Form(f): Form, +) -> Result { + let accepted = f.consent.as_deref() == Some("true"); + let user_id = user.0.as_ref().map(|s| s.user_id); + let ua = headers.get(header::USER_AGENT).and_then(|v| v.to_str().ok()); + du_db::consent::record(&st.pool, user_id, accepted, POLICY_VERSION, ua).await?; + + let cookie = format!( + "{}={}; Path=/; Max-Age={}; SameSite=Lax", + CONSENT_COOKIE, + if accepted { "yes" } else { "no" }, + CONSENT_MAX_AGE + ); + Ok((StatusCode::NO_CONTENT, [(header::SET_COOKIE, cookie)]).into_response()) +} + +// ── profile (signed-in user's own account) ───────────────────────────────────── + +#[derive(askama::Template)] +#[template(path = "account/profile.html")] +struct ProfileTemplate { + t: T, + next: String, + user: Option, + display_name: String, + roles: String, + email: Option, + did: Option, + handle: Option, + member_since: String, + /// Shows a "saved" confirmation after an update. + saved: bool, +} + +/// Render the profile page for the signed-in user (or redirect to login). +async fn render_profile( + st: &AppState, + user: &MaybeUser, + locale: &Locale, + saved: bool, +) -> Result { + let Some(session) = user.0.clone() else { + return Ok(Redirect::to("/login").into_response()); + }; + let p = du_db::auth::profile(&st.pool, UserId(session.user_id)) + .await? + .ok_or_else(|| AppError::NotFound("user".into()))?; + let roles = if session.roles.is_empty() { "—".to_string() } else { session.roles.join(", ") }; + Ok(html(&ProfileTemplate { + t: locale.t, + next: locale.next.clone(), + user: user.nav(), + display_name: p.display_name.unwrap_or_else(|| session.display_name.clone()), + roles, + email: p.email, + did: p.did, + handle: p.handle, + member_since: p.created_at.format("%Y-%m-%d").to_string(), + saved, + })) +} + +async fn profile_page(State(st): State, user: MaybeUser, locale: Locale) -> Result { + render_profile(&st, &user, &locale, false).await +} + +#[derive(Deserialize)] +struct ProfileForm { + display_name: String, +} + +async fn profile_update( + State(st): State, + user: MaybeUser, + locale: Locale, + Form(f): Form, +) -> Result { + let Some(session) = user.0.clone() else { + return Ok(Redirect::to("/login").into_response()); + }; + let name = f.display_name.trim(); + if !name.is_empty() { + du_db::auth::update_display_name(&st.pool, UserId(session.user_id), name).await?; + } + render_profile(&st, &user, &locale, true).await +} + +// ── contact / support form ───────────────────────────────────────────────────── + +#[derive(askama::Template)] +#[template(path = "static/contact.html")] +struct ContactTemplate { + t: T, + next: String, + user: Option, + /// reCAPTCHA site key — renders the widget when present. + site_key: Option, + sent: bool, + error: Option, +} + +#[derive(Deserialize)] +struct ContactForm { + name: Option, + email: Option, + subject: Option, + message: String, + #[serde(rename = "g-recaptcha-response")] + recaptcha: Option, +} + +/// reCAPTCHA secret for server-side verification; when unset, verification is +/// skipped (dev) and the widget is not rendered. +pub(crate) fn recaptcha_secret() -> Option { + std::env::var("RECAPTCHA_SECRET").ok().filter(|s| !s.is_empty()) +} +pub(crate) fn recaptcha_site_key() -> Option { + std::env::var("RECAPTCHA_SITE_KEY").ok().filter(|s| !s.is_empty()) +} + +/// Verify a reCAPTCHA token against Google's siteverify endpoint. +pub(crate) async fn verify_recaptcha(secret: &str, token: &str) -> bool { + let resp = reqwest::Client::new() + .post("https://www.google.com/recaptcha/api/siteverify") + .form(&[("secret", secret), ("response", token)]) + .send() + .await; + match resp { + Ok(r) => r + .json::() + .await + .ok() + .and_then(|v| v["success"].as_bool()) + .unwrap_or(false), + Err(_) => false, + } +} + +async fn contact_form(user: MaybeUser, locale: Locale) -> Response { + html(&ContactTemplate { + t: locale.t, + next: locale.next, + user: user.nav(), + site_key: recaptcha_site_key(), + sent: false, + error: None, + }) +} + +async fn contact_submit( + State(st): State, + user: MaybeUser, + locale: Locale, + Form(f): Form, +) -> Result { + let render = |sent: bool, error: Option| { + html(&ContactTemplate { + t: locale.t, + next: locale.next, + user: user.nav(), + site_key: recaptcha_site_key(), + sent, + error, + }) + }; + + if f.message.trim().is_empty() { + return Ok(render(false, Some(locale.t.get("contact.error.empty").to_string()))); + } + // reCAPTCHA: enforced only when a secret is configured (so dev works). + if let Some(secret) = recaptcha_secret() { + let token = f.recaptcha.as_deref().unwrap_or(""); + if token.is_empty() || !verify_recaptcha(&secret, token).await { + return Ok(render(false, Some(locale.t.get("contact.error.captcha").to_string()))); + } + } + + let trim = |o: &Option| o.as_deref().map(str::trim).filter(|s| !s.is_empty()).map(str::to_string); + let (name, email, subject) = (trim(&f.name), trim(&f.email), trim(&f.subject)); + du_db::support::create_message( + &st.pool, + &du_db::support::NewContactMessage { + user_id: user.0.as_ref().map(|s| s.user_id), + sender_name: name.as_deref(), + sender_email: email.as_deref(), + subject: subject.as_deref(), + message: f.message.trim(), + ip_address_hash: None, + }, + ) + .await?; + Ok(render(true, None)) +} diff --git a/rust/crates/du-web/src/routes/publications.rs b/rust/crates/du-web/src/routes/publications.rs new file mode 100644 index 00000000..6db07a2d --- /dev/null +++ b/rust/crates/du-web/src/routes/publications.rs @@ -0,0 +1,219 @@ +//! Curator **publication-candidate review** UI. The publication-discovery job +//! (OpenAlex) writes candidates into `pubs.publication_candidate`; curators +//! triage them here. Two-panel HTMX screen mirroring the proposals UI: a status- +//! filtered queue (left) and a review panel (right) with Accept (promote to a +//! real `pubs.publication`) / Reject / Defer. + +use crate::auth::{Curator, NavUser}; +use crate::error::AppError; +use crate::htmx::HxHeaders; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Form, Router}; +use serde::Deserialize; + +const CHANGED: &str = "candidate-changed"; + +pub fn router() -> Router { + Router::new() + .route("/curator/publications", get(page)) + .route("/curator/publications/fragment", get(list)) + .route("/curator/publications/:id/panel", get(panel)) + .route("/curator/publications/:id/review", post(review)) +} + +// ── list ────────────────────────────────────────────────────────────────────── + +struct Row { + id: i64, + title: String, + journal: String, + date: String, + status: String, + relevance: String, +} + +struct ListView { + status: String, + rows: Vec, + page: i64, + total: i64, + total_pages: i64, +} + +#[derive(Deserialize)] +struct ListQuery { + status: Option, + page: Option, +} + +fn fmt_date(d: Option) -> String { + d.map(|d| d.to_string()).unwrap_or_else(|| "—".into()) +} + +fn to_row(c: du_db::publication::Candidate) -> Row { + Row { + id: c.id, + title: c.title.unwrap_or_else(|| "(untitled)".into()), + journal: c.journal_name.unwrap_or_else(|| "—".into()), + date: fmt_date(c.publication_date), + status: c.status, + relevance: c.relevance_score.map(|r| format!("{r:.2}")).unwrap_or_else(|| "—".into()), + } +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + // Default the queue to the pending items (the actionable ones). + let status = q.status.clone().unwrap_or_else(|| "pending".into()); + let filter = if status.is_empty() { None } else { Some(status.as_str()) }; + let result = du_db::publication::list_candidates(&st.pool, filter, q.page.unwrap_or(1), 20).await?; + let (page, total, total_pages) = (result.page, result.total, result.total_pages()); + Ok(ListView { + status, + rows: result.items.into_iter().map(to_row).collect(), + page, + total, + total_pages, + }) +} + +#[derive(askama::Template)] +#[template(path = "curator/publications/page.html")] +struct PageTemplate { + t: T, + next: String, + user: Option, + list: ListView, +} +#[derive(askama::Template)] +#[template(path = "curator/publications/list.html")] +struct ListTemplate { + t: T, + list: ListView, +} + +async fn page( + Curator(s): Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&PageTemplate { + t: locale.t, + next: locale.next, + user: Some(NavUser { display_name: s.display_name, is_curator: true }), + list, + })) +} + +async fn list( + _c: Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&ListTemplate { t: locale.t, list })) +} + +// ── detail / review panel ─────────────────────────────────────────────────── + +struct DetailView { + id: i64, + title: String, + journal: String, + date: String, + doi: Option, + doi_url: Option, + openalex_id: String, + relevance: String, + status: String, + abstract_text: Option, + /// Not yet accepted → the action buttons are live. + can_act: bool, + notice: Option, +} + +#[derive(askama::Template)] +#[template(path = "curator/publications/detail.html")] +struct DetailTemplate { + t: T, + c: DetailView, +} + +async fn build_detail(st: &AppState, id: i64, notice: Option) -> Result { + let c = du_db::publication::get_candidate(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("candidate {id}")))?; + let doi = c.doi.filter(|d| !d.trim().is_empty()); + let doi_url = doi.as_ref().map(|d| format!("https://doi.org/{d}")); + Ok(DetailView { + id: c.id, + title: c.title.unwrap_or_else(|| "(untitled)".into()), + journal: c.journal_name.unwrap_or_else(|| "—".into()), + date: fmt_date(c.publication_date), + doi, + doi_url, + openalex_id: c.openalex_id, + relevance: c.relevance_score.map(|r| format!("{r:.2}")).unwrap_or_else(|| "—".into()), + status: c.status.clone(), + abstract_text: c.abstract_text.filter(|a| !a.trim().is_empty()), + can_act: c.status != "accepted", + notice, + }) +} + +async fn detail_response(st: &AppState, t: T, id: i64, notice: Option) -> Result { + let c = build_detail(st, id, notice).await?; + Ok(html(&DetailTemplate { t, c })) +} + +async fn changed_response(st: &AppState, t: T, id: i64, notice: Option) -> Result { + let body = detail_response(st, t, id, notice).await?; + Ok((HxHeaders::new().trigger(CHANGED), body).into_response()) +} + +async fn panel( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + detail_response(&st, locale.t, id, None).await +} + +#[derive(Deserialize)] +struct ReviewForm { + /// accept | reject | defer + action: String, +} + +async fn review( + Curator(s): Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + let notice = match f.action.as_str() { + "accept" => match du_db::publication::promote_candidate(&st.pool, id, s.user_id).await { + Ok(pid) => format!("{} (#{})", locale.t.get("pc.notice.accepted"), pid.0), + Err(du_db::DbError::Conflict(msg)) => msg, + Err(e) => return Err(e.into()), + }, + "reject" => { + du_db::publication::review_candidate(&st.pool, id, "rejected", s.user_id).await?; + locale.t.get("pc.notice.rejected").to_string() + } + _ => { + du_db::publication::review_candidate(&st.pool, id, "deferred", s.user_id).await?; + locale.t.get("pc.notice.deferred").to_string() + } + }; + changed_response(&st, locale.t, id, Some(notice)).await +} diff --git a/rust/crates/du-web/src/routes/reconcile_flags.rs b/rust/crates/du-web/src/routes/reconcile_flags.rs new file mode 100644 index 00000000..f1304963 --- /dev/null +++ b/rust/crates/du-web/src/routes/reconcile_flags.rs @@ -0,0 +1,198 @@ +//! Curator review of YBrowse **reconcile flags** — synonym clusters whose names +//! map to MORE THAN ONE existing variant (the catalog has them split across +//! rows, possibly tree-linked, so reconciliation won't auto-merge them). Two-panel +//! HTMX screen: the flag queue (left) + a panel (right) showing the YBrowse +//! synonyms and the conflicting variants (canonical + branches each defines), +//! with a pick-the-keeper **merge** action. + +use crate::auth::{Curator, NavUser}; +use crate::error::AppError; +use crate::htmx::HxHeaders; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Form, Router}; +use serde::Deserialize; + +const CHANGED: &str = "flag-changed"; + +pub fn router() -> Router { + Router::new() + .route("/curator/reconcile-flags", get(page)) + .route("/curator/reconcile-flags/fragment", get(list)) + .route("/curator/reconcile-flags/:id/panel", get(panel)) + .route("/curator/reconcile-flags/:id/merge", post(merge)) +} + +// ── list ────────────────────────────────────────────────────────────────────── + +struct Row { + id: i64, + locus: String, + names: String, + variant_count: i32, +} + +struct ListView { + rows: Vec, + page: i64, + total: i64, + total_pages: i64, +} + +#[derive(Deserialize)] +struct ListQuery { + page: Option, +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + let result = du_db::ybrowse::list_flags(&st.pool, q.page.unwrap_or(1), 25).await?; + let (page, total, total_pages) = (result.page, result.total, result.total_pages()); + let rows = result + .items + .into_iter() + .map(|f| Row { + id: f.id, + locus: f.locus, + names: f.names.join(", "), + variant_count: f.variant_count, + }) + .collect(); + Ok(ListView { rows, page, total, total_pages }) +} + +#[derive(askama::Template)] +#[template(path = "curator/reconcile-flags/page.html")] +struct PageTemplate { + t: T, + next: String, + user: Option, + list: ListView, +} +#[derive(askama::Template)] +#[template(path = "curator/reconcile-flags/list.html")] +struct ListTemplate { + t: T, + list: ListView, +} + +async fn page( + Curator(s): Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&PageTemplate { + t: locale.t, + next: locale.next, + user: Some(NavUser { display_name: s.display_name, is_curator: true }), + list, + })) +} + +async fn list( + _c: Curator, + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&ListTemplate { t: locale.t, list })) +} + +// ── detail / merge panel ──────────────────────────────────────────────────── + +struct VarRow { + id: i64, + canonical: String, + defines: String, +} + +struct DetailView { + id: i64, + locus: String, + names: String, + variants: Vec, + notice: Option, + resolved: bool, +} + +#[derive(askama::Template)] +#[template(path = "curator/reconcile-flags/detail.html")] +struct DetailTemplate { + t: T, + f: DetailView, +} + +async fn build_detail(st: &AppState, id: i64, notice: Option) -> Result { + match du_db::ybrowse::flag(&st.pool, id).await? { + None => Ok(DetailView { id, locus: String::new(), names: String::new(), variants: vec![], notice, resolved: true }), + Some(d) => Ok(DetailView { + id: d.id, + locus: d.locus, + names: d.names.join(", "), + variants: d + .variants + .into_iter() + .map(|v| VarRow { + id: v.id, + canonical: v.canonical_name.unwrap_or_else(|| format!("#{}", v.id)), + defines: if v.defines.is_empty() { "—".into() } else { v.defines.join(", ") }, + }) + .collect(), + notice, + resolved: false, + }), + } +} + +async fn detail_response(st: &AppState, t: T, id: i64, notice: Option) -> Result { + let f = build_detail(st, id, notice).await?; + Ok(html(&DetailTemplate { t, f })) +} + +async fn changed_response(st: &AppState, t: T, id: i64, notice: Option) -> Result { + let body = detail_response(st, t, id, notice).await?; + Ok((HxHeaders::new().trigger(CHANGED), body).into_response()) +} + +async fn panel( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + detail_response(&st, locale.t, id, None).await +} + +#[derive(Deserialize)] +struct MergeForm { + /// The variant id to keep as canonical; the others are merged into it. + keep: i64, +} + +async fn merge( + _c: Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + let detail = du_db::ybrowse::flag(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("flag {id}")))?; + let mut merged = 0; + for v in &detail.variants { + if v.id != f.keep { + du_db::variant::merge_into(&st.pool, f.keep, v.id).await?; + merged += 1; + } + } + du_db::ybrowse::delete_flag(&st.pool, id).await?; + let notice = format!("{} {merged}", locale.t.get("rf.notice.merged")); + changed_response(&st, locale.t, id, Some(notice)).await +} diff --git a/rust/crates/du-web/src/routes/references.rs b/rust/crates/du-web/src/routes/references.rs new file mode 100644 index 00000000..74a79680 --- /dev/null +++ b/rust/crates/du-web/src/routes/references.rs @@ -0,0 +1,278 @@ +//! Public references (publications) + per-publication biosample report. +//! Same two-panel HTMX pattern as the variant browser: a searchable/paginated +//! publication list on the left, the selected publication's samples on the right. + +use crate::auth::{MaybeUser, NavUser}; +use crate::error::AppError; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::routes::pages::{recaptcha_secret, recaptcha_site_key, verify_recaptcha}; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::response::Response; +use axum::routing::get; +use axum::{Form, Router}; +use du_domain::ids::PublicationId; +use serde::Deserialize; + +pub fn router() -> Router { + Router::new() + .route("/references", get(page)) + .route("/references/list", get(list)) + .route("/references/:id/biosamples", get(biosamples)) + .route("/references/submit", get(submit_form).post(submit)) +} + +#[derive(Deserialize)] +struct ListQuery { + query: Option, + page: Option, + page_size: Option, +} + +#[derive(Deserialize)] +struct PageQuery { + page: Option, + page_size: Option, +} + +struct PubRow { + id: i64, + title: String, + journal: String, + year: String, + citations: Option, +} + +struct PubListView { + query: String, + rows: Vec, + page: i64, + page_size: i64, + total: i64, + total_pages: i64, +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + let result = + du_db::publication::search(&st.pool, q.query.as_deref(), q.page.unwrap_or(1), q.page_size.unwrap_or(20)) + .await?; + let rows = result + .items + .iter() + .map(|p| PubRow { + id: p.id.0, + title: p.title.clone(), + journal: p.journal.clone().unwrap_or_default(), + year: p.publication_date.map(|d| d.format("%Y").to_string()).unwrap_or_default(), + citations: p.cited_by_count, + }) + .collect(); + Ok(PubListView { + query: q.query.clone().unwrap_or_default(), + rows, + page: result.page, + page_size: result.page_size, + total: result.total, + total_pages: result.total_pages(), + }) +} + +#[derive(askama::Template)] +#[template(path = "references/page.html")] +struct ReferencesPageTemplate { + t: T, + next: String, + user: Option, + list: PubListView, +} + +#[derive(askama::Template)] +#[template(path = "references/list.html")] +struct PubListTemplate { + t: T, + list: PubListView, +} + +struct BioRow { + source: String, + accession: String, + alias: String, + description: String, +} + +#[derive(askama::Template)] +#[template(path = "references/biosamples.html")] +struct BiosamplesTemplate { + t: T, + pub_id: i64, + pub_title: String, + pub_doi: Option, + rows: Vec, + page: i64, + page_size: i64, + total: i64, + total_pages: i64, +} + +async fn page( + State(st): State, + locale: Locale, + user: crate::auth::MaybeUser, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&ReferencesPageTemplate { t: locale.t, next: locale.next, user: user.nav(), list })) +} + +async fn list( + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&PubListTemplate { t: locale.t, list })) +} + +async fn biosamples( + State(st): State, + locale: Locale, + Path(id): Path, + Query(q): Query, +) -> Result { + let pub_id = PublicationId(id); + let publication = du_db::publication::get_by_id(&st.pool, pub_id) + .await? + .ok_or_else(|| AppError::NotFound(format!("publication {id}")))?; + let result = + du_db::biosample::for_publication(&st.pool, pub_id, q.page.unwrap_or(1), q.page_size.unwrap_or(25)) + .await?; + let rows = result + .items + .iter() + .map(|b| BioRow { + source: b.source.label().to_string(), + accession: b.accession.clone().unwrap_or_default(), + alias: b.alias.clone().unwrap_or_default(), + description: b.description.clone().unwrap_or_default(), + }) + .collect(); + + Ok(html(&BiosamplesTemplate { + t: locale.t, + pub_id: id, + pub_title: publication.title, + pub_doi: publication.doi, + rows, + page: result.page, + page_size: result.page_size, + total: result.total, + total_pages: result.total_pages(), + })) +} + +// ── public "suggest a paper" (DOI) form ─────────────────────────────────────── + +#[derive(askama::Template)] +#[template(path = "references/submit.html")] +struct SubmitTemplate { + t: T, + next: String, + user: Option, + /// reCAPTCHA site key — renders the widget when present. + site_key: Option, + doi: String, + /// Success: the candidate was queued (carries the resolved title). + queued: Option, + error: Option, +} + +#[derive(Deserialize)] +struct SubmitForm { + doi: String, + #[serde(rename = "g-recaptcha-response")] + recaptcha: Option, +} + +/// Strip common DOI prefixes so OpenAlex's `/works/doi:` lookup gets a bare DOI. +fn normalize_doi(raw: &str) -> String { + let d = raw.trim(); + let d = d.strip_prefix("https://doi.org/").or_else(|| d.strip_prefix("http://doi.org/")).unwrap_or(d); + let d = d.strip_prefix("doi:").unwrap_or(d); + d.trim().to_string() +} + +async fn submit_form(locale: Locale, user: MaybeUser) -> Response { + html(&SubmitTemplate { + t: locale.t, + next: locale.next, + user: user.nav(), + site_key: recaptcha_site_key(), + doi: String::new(), + queued: None, + error: None, + }) +} + +/// Public: look a submitted DOI up in OpenAlex and queue it as a pending +/// `publication_candidate` for curator review (never a published reference +/// directly). Idempotent on the work's OpenAlex id. +async fn submit( + State(st): State, + locale: Locale, + user: MaybeUser, + Form(f): Form, +) -> Result { + let doi = normalize_doi(&f.doi); + let render = |doi: String, queued: Option, error: Option| { + html(&SubmitTemplate { + t: locale.t, + next: locale.next.clone(), + user: user.nav(), + site_key: recaptcha_site_key(), + doi, + queued, + error, + }) + }; + + if doi.is_empty() { + return Ok(render(doi, None, Some(locale.t.get("submit.error.empty").to_string()))); + } + // reCAPTCHA enforced only when configured (dev works without). + if let Some(secret) = recaptcha_secret() { + let token = f.recaptcha.as_deref().unwrap_or(""); + if token.is_empty() || !verify_recaptcha(&secret, token).await { + return Ok(render(doi, None, Some(locale.t.get("submit.error.captcha").to_string()))); + } + } + // Already in the catalog? + if du_db::publication::exists_by_doi(&st.pool, &doi).await? { + return Ok(render(doi, None, Some(locale.t.get("submit.error.exists").to_string()))); + } + + // Resolve via OpenAlex; only queue works it can identify (need an OpenAlex id). + let client = du_external::openalex::OpenAlexClient::new(std::env::var("OPENALEX_MAILTO").ok()); + let meta = match client.work_by_doi(&doi).await { + Ok(Some(m)) => m, + Ok(None) => return Ok(render(doi, None, Some(locale.t.get("submit.error.notfound").to_string()))), + Err(_) => return Ok(render(doi, None, Some(locale.t.get("submit.error.lookup").to_string()))), + }; + let Some(openalex_id) = meta.openalex_id.as_deref() else { + return Ok(render(doi, None, Some(locale.t.get("submit.error.notfound").to_string()))); + }; + + du_db::publication::upsert_candidate( + &st.pool, + openalex_id, + Some(&doi), + meta.title.as_deref(), + meta.abstract_summary.as_deref(), + meta.publication_date, + meta.journal.as_deref(), + ) + .await?; + + let title = meta.title.clone().unwrap_or_else(|| doi.clone()); + Ok(render(String::new(), Some(title), None)) +} diff --git a/rust/crates/du-web/src/routes/research.rs b/rust/crates/du-web/src/routes/research.rs new file mode 100644 index 00000000..34bd99da --- /dev/null +++ b/rust/crates/du-web/src/routes/research.rs @@ -0,0 +1,494 @@ +//! D2 ResearchSubject registry endpoints (`/api/v1/research/*`) — **PII-free**. The +//! AppView stores only pseudonymous person nodes + project memberships + a merge +//! audit; identity resolution is Edge-to-Edge over D1/D3. Every call is +//! **signature-authenticated** (the caller signs a canonical message with its DID +//! key — [`crate::sig::verify_signed`]) **and authorized** from existing data: +//! register → project owner; merge → steward of both subjects; custody → the +//! subject's steward. Not part of the public OpenAPI document. + +use crate::error::AppError; +use crate::sig::verify_signed; +use crate::state::AppState; +use axum::extract::{Query, State}; +use axum::routing::{get, post}; +use axum::{Json, Router}; +use du_db::research::{self, messages}; +use serde::Deserialize; +use serde_json::{json, Value}; +use uuid::Uuid; + +pub fn router() -> Router { + Router::new() + .route("/api/v1/research/subject", post(register)) + .route("/api/v1/research/merge", post(merge)) + .route("/api/v1/research/custody", post(custody)) + .route("/api/v1/research/subjects", get(subjects)) + .route("/api/v1/research/project/member", post(add_member)) + .route("/api/v1/research/project/member/revoke", post(revoke_member)) + .route("/api/v1/research/project/members", get(members)) + .route("/api/v1/research/assertion", post(assert)) + .route("/api/v1/research/assertion/retract", post(retract)) + .route("/api/v1/research/assertion/resolve", post(resolve)) + .route("/api/v1/research/current-view", get(current_view)) +} + +#[derive(Deserialize)] +struct RegisterBody { + steward_did: String, + project_id: i64, + /// Present when the subject id was agreed via a D1 id-exchange; omit to mint one. + subject_id: Option, + signature: String, +} + +async fn register(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed( + &st.pool, + &b.steward_did, + &messages::register(&b.steward_did, b.project_id, b.subject_id.map(|u| u.to_string()).as_deref()), + &b.signature, + ) + .await?; + // Authorize (D5 ACL): ADMIN/CO_ADMIN of the project (owner is the founding ADMIN). + if !research::can(&st.pool, b.project_id, &b.steward_did, research::Capability::ManageSubjects).await? { + return Err(AppError::Forbidden); + } + let sid = research::register_in_project(&st.pool, b.subject_id, b.project_id, &b.steward_did).await?; + Ok(Json(json!({ "research_subject_id": sid }))) +} + +#[derive(Deserialize)] +struct MergeBody { + asserted_by_did: String, + keep: Uuid, + retire: Uuid, + method: String, + confidence: Option, + signature: String, +} + +async fn merge(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed( + &st.pool, + &b.asserted_by_did, + &messages::merge(&b.asserted_by_did, &b.keep.to_string(), &b.retire.to_string(), &b.method), + &b.signature, + ) + .await?; + // Authorize: the asserter must steward BOTH subjects. + if !research::is_steward_of(&st.pool, &b.asserted_by_did, b.keep).await? + || !research::is_steward_of(&st.pool, &b.asserted_by_did, b.retire).await? + { + return Err(AppError::Forbidden); + } + research::merge_subjects(&st.pool, b.keep, b.retire, &b.method, &b.asserted_by_did, b.confidence).await?; + Ok(Json(json!({ "kept": b.keep, "retired": b.retire }))) +} + +#[derive(Deserialize)] +struct CustodyBody { + steward_did: String, + subject_id: Uuid, + custody_did: String, + signature: String, +} + +async fn custody(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed( + &st.pool, + &b.steward_did, + &messages::custody(&b.steward_did, &b.subject_id.to_string(), &b.custody_did), + &b.signature, + ) + .await?; + // Authorize: only the current steward may transfer custody. + if !research::is_steward_of(&st.pool, &b.steward_did, b.subject_id).await? { + return Err(AppError::Forbidden); + } + if !research::set_custody(&st.pool, b.subject_id, &b.custody_did).await? { + return Err(AppError::NotFound(format!("subject {}", b.subject_id))); + } + Ok(Json(json!({ "research_subject_id": b.subject_id, "custody_did": b.custody_did }))) +} + +#[derive(Deserialize)] +struct SubjectsQuery { + project_id: i64, + did: String, + ts: i64, + sig: String, +} + +async fn subjects(State(st): State, Query(q): Query) -> Result, AppError> { + if (chrono::Utc::now().timestamp() - q.ts).abs() > 300 { + return Err(AppError::BadRequest("stale timestamp".into())); + } + verify_signed(&st.pool, &q.did, &messages::poll(&q.did, q.ts), &q.sig).await?; + if !research::is_team_member(&st.pool, q.project_id, &q.did).await? { + return Err(AppError::Forbidden); + } + let rows = research::subjects_in_project(&st.pool, q.project_id).await?; + let items: Vec = rows + .into_iter() + .map(|r| json!({ "research_subject_id": r.research_subject_id, "steward_did": r.steward_did })) + .collect(); + Ok(Json(json!({ "items": items }))) +} + +// ── collaborator-team management (D5, ADMIN-gated) ──────────────────────────── + +#[derive(Deserialize)] +struct AddMemberBody { + actor_did: String, + project_id: i64, + member_did: String, + role: String, + permissions: Option>, + signature: String, +} + +async fn add_member(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed(&st.pool, &b.actor_did, &messages::add_member(&b.actor_did, b.project_id, &b.member_did, &b.role), &b.signature).await?; + if !research::can(&st.pool, b.project_id, &b.actor_did, research::Capability::ManageRoles).await? { + return Err(AppError::Forbidden); + } + let role = research::Role::parse(&b.role).ok_or_else(|| AppError::BadRequest("invalid role".into()))?; + research::add_member(&st.pool, b.project_id, &b.member_did, role, &b.permissions.unwrap_or_default(), &b.actor_did).await?; + Ok(Json(json!({ "project_id": b.project_id, "member_did": b.member_did, "role": role.as_str() }))) +} + +#[derive(Deserialize)] +struct RevokeMemberBody { + actor_did: String, + project_id: i64, + member_did: String, + signature: String, +} + +async fn revoke_member(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed(&st.pool, &b.actor_did, &messages::revoke_member(&b.actor_did, b.project_id, &b.member_did), &b.signature).await?; + if !research::can(&st.pool, b.project_id, &b.actor_did, research::Capability::ManageRoles).await? { + return Err(AppError::Forbidden); + } + if !research::revoke_member(&st.pool, b.project_id, &b.member_did).await? { + return Err(AppError::NotFound(format!("live member {}", b.member_did))); + } + Ok(Json(json!({ "project_id": b.project_id, "member_did": b.member_did, "status": "REVOKED" }))) +} + +async fn members(State(st): State, Query(q): Query) -> Result, AppError> { + if (chrono::Utc::now().timestamp() - q.ts).abs() > 300 { + return Err(AppError::BadRequest("stale timestamp".into())); + } + verify_signed(&st.pool, &q.did, &messages::poll(&q.did, q.ts), &q.sig).await?; + if !research::is_team_member(&st.pool, q.project_id, &q.did).await? { + return Err(AppError::Forbidden); + } + let items: Vec = research::members_of(&st.pool, q.project_id) + .await? + .into_iter() + .map(|m| json!({ "member_did": m.member_did, "role": m.role })) + .collect(); + Ok(Json(json!({ "items": items }))) +} + +// ── D4: attributed-claim assertions (R2 — non-PII, project-scoped) ───────────── + +#[derive(Deserialize)] +struct AssertBody { + author_did: String, + subject_id: Uuid, + /// The ACL context: the author must hold `WriteAssertions` in this project even when + /// the assertion is PUBLIC-scoped (consent raises an assertion *about a project + /// subject* to public — §5). + project_id: i64, + predicate: String, + value: Value, + /// PUBLIC (R1) vs the default PROJECT() (R2) scope. + public: Option, + evidence: Option, + supersedes_id: Option, + /// Author asserts the free-text value carries no PII (required for `NOTE`). + pii_cleared: Option, + signature: String, +} + +async fn assert(State(st): State, Json(b): Json) -> Result, AppError> { + let scope = if b.public.unwrap_or(false) { "PUBLIC".to_string() } else { format!("PROJECT:{}", b.project_id) }; + verify_signed( + &st.pool, + &b.author_did, + &messages::assert(&b.author_did, &b.subject_id.to_string(), &b.predicate, &scope), + &b.signature, + ) + .await?; + // Authorize: ADMIN/CO_ADMIN of the project (WriteAssertions). + if !research::can(&st.pool, b.project_id, &b.author_did, research::Capability::WriteAssertions).await? { + return Err(AppError::Forbidden); + } + let id = research::record_assertion( + &st.pool, + b.subject_id, + &b.predicate, + &b.value, + &b.author_did, + &scope, + b.evidence.as_ref(), + b.supersedes_id, + b.pii_cleared.unwrap_or(false), + ) + .await?; + Ok(Json(json!({ "assertion_id": id, "scope": scope }))) +} + +#[derive(Deserialize)] +struct RetractBody { + actor_did: String, + assertion_id: i64, + /// The ACL context for the dispute-resolution path (non-authors need ResolveDispute). + project_id: i64, + signature: String, +} + +async fn retract(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed(&st.pool, &b.actor_did, &messages::retract(&b.actor_did, b.assertion_id), &b.signature).await?; + let meta = research::assertion_meta(&st.pool, b.assertion_id) + .await? + .ok_or_else(|| AppError::NotFound(format!("live assertion {}", b.assertion_id)))?; + // The author may retract their own claim; anyone else needs ResolveDispute. + if meta.author_did != b.actor_did + && !research::can(&st.pool, b.project_id, &b.actor_did, research::Capability::ResolveDispute).await? + { + return Err(AppError::Forbidden); + } + if !research::retract_assertion(&st.pool, b.assertion_id).await? { + return Err(AppError::NotFound(format!("live assertion {}", b.assertion_id))); + } + Ok(Json(json!({ "assertion_id": b.assertion_id, "status": "RETRACTED" }))) +} + +#[derive(Deserialize)] +struct ResolveBody { + actor_did: String, + /// A live `SAME_PERSON_AS` assertion to accept → drives the D2 merge. + assertion_id: i64, + project_id: i64, + signature: String, +} + +async fn resolve(State(st): State, Json(b): Json) -> Result, AppError> { + verify_signed(&st.pool, &b.actor_did, &messages::resolve(&b.actor_did, b.assertion_id), &b.signature).await?; + if !research::can(&st.pool, b.project_id, &b.actor_did, research::Capability::ResolveDispute).await? { + return Err(AppError::Forbidden); + } + let (kept, retired) = research::accept_same_person(&st.pool, b.assertion_id).await?; + Ok(Json(json!({ "kept": kept, "retired": retired }))) +} + +#[derive(Deserialize)] +struct ViewQuery { + subject_id: Uuid, + project_id: i64, + did: String, + ts: i64, + sig: String, +} + +async fn current_view(State(st): State, Query(q): Query) -> Result, AppError> { + if (chrono::Utc::now().timestamp() - q.ts).abs() > 300 { + return Err(AppError::BadRequest("stale timestamp".into())); + } + verify_signed(&st.pool, &q.did, &messages::poll(&q.did, q.ts), &q.sig).await?; + if !research::is_team_member(&st.pool, q.project_id, &q.did).await? { + return Err(AppError::Forbidden); + } + let scope = format!("PROJECT:{}", q.project_id); + let items: Vec = research::current_view(&st.pool, q.subject_id, &scope) + .await? + .into_iter() + .map(|r| json!({ "predicate": r.predicate, "state": r.state, "view": r.view })) + .collect(); + Ok(Json(json!({ "items": items }))) +} + +#[cfg(test)] +mod tests { + use axum::body::{to_bytes, Body}; + use axum::http::{Request, StatusCode}; + use base64::engine::general_purpose::STANDARD; + use base64::Engine; + use ed25519_dalek::{Signer, SigningKey}; + use tower::ServiceExt; + + /// The project owner can register a subject; a valid signature from a non-owner + /// is rejected by the authorization gate (403), and a tampered signature 403s. + #[tokio::test] + async fn register_is_owner_gated_and_signed() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping research endpoint test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + let owner = SigningKey::from_bytes(&[11u8; 32]); + let owner_did = du_atproto::did::did_key_from_ed25519(&owner.verifying_key()); + + let project_id: i64 = sqlx::query_scalar( + "INSERT INTO social.group_project (project_name, project_type, owner_did) VALUES ('P','RESEARCH',$1) RETURNING id", + ) + .bind(&owner_did) + .fetch_one(&pool) + .await + .expect("project"); + let state = crate::state::AppState { pool, key: tower_cookies::Key::generate(), oauth: None }; + + let post = |state: crate::state::AppState, body: serde_json::Value| async move { + crate::routes::app(state) + .oneshot(Request::builder().method("POST").uri("/api/v1/research/subject") + .header("content-type", "application/json").body(Body::from(body.to_string())).unwrap()) + .await + .unwrap() + }; + + // Owner-signed → 200 + a minted subject. + let msg = du_db::research::messages::register(&owner_did, project_id, None); + let sig = STANDARD.encode(owner.sign(msg.as_bytes()).to_bytes()); + let ok = post(state.clone(), serde_json::json!({ "steward_did": owner_did, "project_id": project_id, "signature": sig })).await; + assert_eq!(ok.status(), StatusCode::OK); + let v: serde_json::Value = serde_json::from_slice(&to_bytes(ok.into_body(), usize::MAX).await.unwrap()).unwrap(); + assert!(v["research_subject_id"].as_str().is_some()); + + // A non-owner with a VALID signature is rejected by the owner gate (403). + let other = SigningKey::from_bytes(&[12u8; 32]); + let other_did = du_atproto::did::did_key_from_ed25519(&other.verifying_key()); + let omsg = du_db::research::messages::register(&other_did, project_id, None); + let osig = STANDARD.encode(other.sign(omsg.as_bytes()).to_bytes()); + let r403 = post(state.clone(), serde_json::json!({ "steward_did": other_did, "project_id": project_id, "signature": osig })).await; + assert_eq!(r403.status(), StatusCode::FORBIDDEN); + + // A tampered signature (owner did, wrong sig) → 403. + let bad = post(state, serde_json::json!({ "steward_did": owner_did, "project_id": project_id, "signature": "AAAA" })).await; + assert_eq!(bad.status(), StatusCode::FORBIDDEN); + } + + /// Adding a team member is ADMIN-gated: the owner (founding ADMIN) succeeds; a + /// non-admin DID with a valid signature is rejected (403). + #[tokio::test] + async fn add_member_is_admin_gated() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping add-member test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + let owner = SigningKey::from_bytes(&[21u8; 32]); + let owner_did = du_atproto::did::did_key_from_ed25519(&owner.verifying_key()); + let project_id: i64 = sqlx::query_scalar( + "INSERT INTO social.group_project (project_name, project_type, owner_did) VALUES ('T','RESEARCH',$1) RETURNING id", + ) + .bind(&owner_did) + .fetch_one(&pool) + .await + .unwrap(); + let state = crate::state::AppState { pool, key: tower_cookies::Key::generate(), oauth: None }; + + let post = |state: crate::state::AppState, body: serde_json::Value| async move { + crate::routes::app(state) + .oneshot(Request::builder().method("POST").uri("/api/v1/research/project/member") + .header("content-type", "application/json").body(Body::from(body.to_string())).unwrap()) + .await + .unwrap() + }; + + // Owner (ADMIN) adds a CO_ADMIN → 200. + let m = du_db::research::messages::add_member(&owner_did, project_id, "did:key:zNew", "CO_ADMIN"); + let sig = STANDARD.encode(owner.sign(m.as_bytes()).to_bytes()); + let ok = post(state.clone(), serde_json::json!({ + "actor_did": owner_did, "project_id": project_id, "member_did": "did:key:zNew", "role": "CO_ADMIN", "signature": sig, + })).await; + assert_eq!(ok.status(), StatusCode::OK); + + // A non-admin (valid signature, but no ManageRoles) → 403. + let outsider = SigningKey::from_bytes(&[22u8; 32]); + let out_did = du_atproto::did::did_key_from_ed25519(&outsider.verifying_key()); + let m2 = du_db::research::messages::add_member(&out_did, project_id, "did:key:zEvil", "ADMIN"); + let sig2 = STANDARD.encode(outsider.sign(m2.as_bytes()).to_bytes()); + let r403 = post(state, serde_json::json!({ + "actor_did": out_did, "project_id": project_id, "member_did": "did:key:zEvil", "role": "ADMIN", "signature": sig2, + })).await; + assert_eq!(r403.status(), StatusCode::FORBIDDEN); + } + + /// Recording an assertion is WriteAssertions-gated; a PII predicate is rejected (422); + /// resolving a dispute is ResolveDispute-gated. + #[tokio::test] + async fn assertion_endpoints_gated() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping assertion endpoint test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + let owner = SigningKey::from_bytes(&[31u8; 32]); + let owner_did = du_atproto::did::did_key_from_ed25519(&owner.verifying_key()); + let project_id: i64 = sqlx::query_scalar( + "INSERT INTO social.group_project (project_name, project_type, owner_did) VALUES ('A','RESEARCH',$1) RETURNING id", + ) + .bind(&owner_did) + .fetch_one(&pool) + .await + .unwrap(); + // A pseudonymous subject to assert over, and a MODERATOR (no WriteAssertions). + let subject = du_db::research::register_in_project(&pool, None, project_id, &owner_did).await.unwrap(); + let mods = SigningKey::from_bytes(&[32u8; 32]); + let mod_did = du_atproto::did::did_key_from_ed25519(&mods.verifying_key()); + du_db::research::add_member(&pool, project_id, &mod_did, du_db::research::Role::Moderator, &[], &owner_did) + .await + .unwrap(); + let state = crate::state::AppState { pool, key: tower_cookies::Key::generate(), oauth: None }; + + let post = |state: crate::state::AppState, uri: &'static str, body: serde_json::Value| async move { + crate::routes::app(state) + .oneshot(Request::builder().method("POST").uri(uri) + .header("content-type", "application/json").body(Body::from(body.to_string())).unwrap()) + .await + .unwrap() + }; + let scope = format!("PROJECT:{project_id}"); + + // Owner (ADMIN ⇒ WriteAssertions) records a HAPLOGROUP_IS → 200. + let m = du_db::research::messages::assert(&owner_did, &subject.to_string(), "HAPLOGROUP_IS", &scope); + let sig = STANDARD.encode(owner.sign(m.as_bytes()).to_bytes()); + let ok = post(state.clone(), "/api/v1/research/assertion", serde_json::json!({ + "author_did": owner_did, "subject_id": subject, "project_id": project_id, + "predicate": "HAPLOGROUP_IS", "value": {"haplogroup": "R-M269"}, "signature": sig, + })).await; + assert_eq!(ok.status(), StatusCode::OK); + + // A PII predicate (MDKA_IS) is rejected at the store boundary → 422. + let mp = du_db::research::messages::assert(&owner_did, &subject.to_string(), "MDKA_IS", &scope); + let psig = STANDARD.encode(owner.sign(mp.as_bytes()).to_bytes()); + let pii = post(state.clone(), "/api/v1/research/assertion", serde_json::json!({ + "author_did": owner_did, "subject_id": subject, "project_id": project_id, + "predicate": "MDKA_IS", "value": {"ancestor_name": "Jane"}, "signature": psig, + })).await; + assert_eq!(pii.status(), StatusCode::UNPROCESSABLE_ENTITY); + + // A MODERATOR (valid signature, no WriteAssertions) → 403. + let mm = du_db::research::messages::assert(&mod_did, &subject.to_string(), "HAPLOGROUP_IS", &scope); + let msig = STANDARD.encode(mods.sign(mm.as_bytes()).to_bytes()); + let r403 = post(state.clone(), "/api/v1/research/assertion", serde_json::json!({ + "author_did": mod_did, "subject_id": subject, "project_id": project_id, + "predicate": "HAPLOGROUP_IS", "value": {"haplogroup": "R-L21"}, "signature": msig, + })).await; + assert_eq!(r403.status(), StatusCode::FORBIDDEN); + + // Resolve is ResolveDispute-gated: the MODERATOR is refused before any merge → 403. + let mr = du_db::research::messages::resolve(&mod_did, 1); + let rsig = STANDARD.encode(mods.sign(mr.as_bytes()).to_bytes()); + let rr = post(state, "/api/v1/research/assertion/resolve", serde_json::json!({ + "actor_did": mod_did, "assertion_id": 1, "project_id": project_id, "signature": rsig, + })).await; + assert_eq!(rr.status(), StatusCode::FORBIDDEN); + } +} diff --git a/rust/crates/du-web/src/routes/samples.rs b/rust/crates/du-web/src/routes/samples.rs new file mode 100644 index 00000000..5c0c64f4 --- /dev/null +++ b/rust/crates/du-web/src/routes/samples.rs @@ -0,0 +1,442 @@ +//! Public per-sample report (`/sample/:slug`) — an ExploreYourDNA-style page for +//! biosamples a curator has opted public. Identity + Y/mt haplogroup pathways + +//! origin map + sequencing/coverage + ancestry, assembled from the unified +//! biosample read path (`du_db::biosample::report`). A curator-only toggle flips +//! the `is_public` gate; curators may also preview private samples here. + +use crate::auth::{Curator, MaybeUser, NavUser}; +use crate::error::AppError; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, State}; +use axum::response::Response; +use axum::routing::{get, post}; +use axum::{Form, Router}; +use du_db::biosample::{HaplogroupCall, SampleReport}; +use du_db::haplogroup::Pathway; +use serde::Deserialize; + +pub fn router() -> Router { + Router::new() + .route("/sample/:slug", get(report)) + .route("/curator/samples/:slug/public", post(toggle_public)) +} + +// ── view models (all display logic lives here; templates stay logic-free) ────── + +/// Distinct colors for ancestry components, cycled by index. Last is the +/// synthetic "unassigned" remainder (grey). +const ANCESTRY_PALETTE: [&str; 9] = + ["#4e79a7", "#f28e2b", "#e15759", "#76b7b2", "#59a14f", "#edc948", "#b07aa1", "#ff9da7", "#9c755f"]; +const UNASSIGNED_COLOR: &str = "#bab0ac"; + +struct OriginView { + lat: f64, + lon: f64, +} + +struct PubView { + title: String, + href: Option, + year: String, +} + +struct StepView { + name: String, + formed: String, + tmrca: String, + snps: Vec, + href: String, +} + +struct PathwayView { + /// The raw called name (None ⇒ no haplogroup call at all for this lineage). + call: Option, + /// True when the call resolved to tree nodes (we have steps to show). + placed: bool, + /// True when the call is the cross-technology reconciliation consensus. + reconciled: bool, + /// Consensus reliability for a reconciled call (formatted; "" when absent). + confidence: String, + run_count: String, + concordance: String, + steps: Vec, +} + +struct SeqView { + platform: String, + instrument: String, + test_type: String, + layout: String, + reads: String, + read_length: String, +} + +struct CovView { + build: String, + aligner: String, + test_type: String, + mean: String, + pct_10x: String, + pct_20x: String, + pct_30x: String, + /// Advertised spec / cohort norm shown alongside the actual depth. + expected: String, + norm: String, + /// BELOW / AT / ABOVE (empty when nothing to compare against). + conformance: String, +} + +struct AncestryComp { + label: String, + /// Bar width as a bare number string (percent), e.g. "12.3". + width: String, + /// Display percentage, e.g. "12.3%". + pct_label: String, + color: String, +} + +struct SampleView { + display_name: String, + accession: Option, + alias: Option, + description: Option, + source: String, + center_name: Option, + sex: Option, + federated: bool, + origin: Option, + publications: Vec, + y: PathwayView, + mt: PathwayView, + sequencing: Vec, + coverage: Vec, + ancestry: Vec, + ancestry_method: Option, +} + +fn dash(v: Option) -> String { + v.filter(|s| !s.trim().is_empty()).unwrap_or_else(|| "—".to_string()) +} + +fn num_i64(v: Option) -> String { + v.map(|n| n.to_string()).unwrap_or_else(|| "—".to_string()) +} + +fn num_i32(v: Option) -> String { + v.map(|n| n.to_string()).unwrap_or_else(|| "—".to_string()) +} + +fn num_f64(v: Option, decimals: usize) -> String { + v.map(|n| format!("{n:.decimals$}")).unwrap_or_else(|| "—".to_string()) +} + +fn titlecase(s: &str) -> String { + let mut chars = s.chars(); + match chars.next() { + Some(c) => c.to_uppercase().collect::() + &chars.as_str().to_lowercase(), + None => String::new(), + } +} + +/// Pull `(label, value)` pairs out of an ancestry JSONB array, tolerating the +/// several key spellings the federated payload may use. +fn extract_components(v: &serde_json::Value) -> Vec<(String, f64)> { + let Some(arr) = v.as_array() else { return Vec::new() }; + let mut out = Vec::new(); + for e in arr { + let label = ["superPopulation", "population", "name", "label", "ancestry", "group"] + .iter() + .find_map(|k| e.get(*k).and_then(serde_json::Value::as_str)) + .map(str::to_string); + let value = ["percentage", "fraction", "value", "proportion", "percent"] + .iter() + .find_map(|k| e.get(*k).and_then(serde_json::Value::as_f64)); + if let (Some(l), Some(va)) = (label, value) { + if va > 0.0 { + out.push((l, va)); + } + } + } + out +} + +fn build_ancestry(rep: &SampleReport) -> (Vec, Option) { + let Some(anc) = &rep.ancestry else { return (Vec::new(), None) }; + // Prefer the continental rollup; fall back to sub-continental components. + let mut pairs = extract_components(&anc.super_populations); + if pairs.is_empty() { + pairs = extract_components(&anc.components); + } + if pairs.is_empty() { + return (Vec::new(), anc.analysis_method.clone()); + } + // Normalize to percentages: if everything looks like a 0..1 fraction, scale. + let max = pairs.iter().map(|(_, v)| *v).fold(0.0_f64, f64::max); + let scale = if max <= 1.0 { 100.0 } else { 1.0 }; + pairs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + + let mut comps: Vec = pairs + .iter() + .enumerate() + .map(|(i, (label, v))| { + let pct = v * scale; + AncestryComp { + label: label.clone(), + width: format!("{pct:.1}"), + pct_label: format!("{pct:.1}%"), + color: ANCESTRY_PALETTE[i % ANCESTRY_PALETTE.len()].to_string(), + } + }) + .collect(); + + // Synthetic remainder so the stacked bar is honest/full-width. + let total: f64 = pairs.iter().map(|(_, v)| v * scale).sum(); + if total < 99.0 { + let rem = 100.0 - total; + comps.push(AncestryComp { + label: "Unassigned".to_string(), + width: format!("{rem:.1}"), + pct_label: format!("{rem:.1}%"), + color: UNASSIGNED_COLOR.to_string(), + }); + } + (comps, anc.analysis_method.clone()) +} + +/// Tree-view base path for re-rooting links, by lineage. +fn tree_base(call: &HaplogroupCall) -> &'static str { + match call.dna_type { + du_domain::enums::DnaType::YDna => "/ytree", + du_domain::enums::DnaType::MtDna => "/mtree", + } +} + +/// Best display name for a defining variant: canonical name, else first alias. +fn snp_label(v: &du_db::haplogroup::VariantInfo) -> Option { + if let Some(n) = v.canonical_name.as_deref().filter(|s| !s.is_empty()) { + return Some(n.to_string()); + } + v.aliases + .get("common_names") + .and_then(serde_json::Value::as_array) + .and_then(|a| a.first()) + .and_then(serde_json::Value::as_str) + .map(str::to_string) +} + +fn build_pathway(call: Option<&HaplogroupCall>, pathway: Option) -> PathwayView { + let Some(call) = call else { + return PathwayView { + call: None, + placed: false, + reconciled: false, + confidence: String::new(), + run_count: String::new(), + concordance: String::new(), + steps: Vec::new(), + }; + }; + let base = tree_base(call); + let steps = pathway + .map(|p| { + p.steps + .into_iter() + .map(|s| { + let encoded = utf8_percent_encode(&s.name); + StepView { + href: format!("{base}?root={encoded}"), + name: s.name, + formed: num_i32(s.formed_ybp), + tmrca: num_i32(s.tmrca_ybp), + snps: s.defining_snps.iter().filter_map(snp_label).collect(), + } + }) + .collect::>() + }) + .unwrap_or_default(); + let pct = |v: Option| v.map(|x| format!("{:.0}%", x * 100.0)).unwrap_or_default(); + PathwayView { + call: Some(call.name.clone()), + placed: !steps.is_empty(), + reconciled: call.origin == du_db::biosample::HaplogroupCallOrigin::Reconciled, + confidence: pct(call.confidence), + run_count: call.run_count.map(|n| n.to_string()).unwrap_or_default(), + concordance: pct(call.snp_concordance), + steps, + } +} + +/// Minimal percent-encoding for a clade name used in a `?root=` query value. +fn utf8_percent_encode(s: &str) -> String { + percent_encoding::utf8_percent_encode(s, percent_encoding::NON_ALPHANUMERIC).to_string() +} + +impl SampleView { + fn build(rep: SampleReport, y_path: Option, mt_path: Option) -> Self { + let id = &rep.identity; + let display_name = id + .accession + .clone() + .or_else(|| id.alias.clone()) + .unwrap_or_else(|| id.sample_guid.0.to_string()); + + let publications = rep + .publications + .iter() + .map(|p| { + let href = p + .url + .clone() + .or_else(|| p.doi.as_ref().map(|d| format!("https://doi.org/{d}"))); + PubView { + title: p.title.clone(), + href, + year: p.publication_date.map(|d| d.format("%Y").to_string()).unwrap_or_default(), + } + }) + .collect(); + + let sequencing = rep + .sequencing + .iter() + .map(|r| SeqView { + platform: dash(r.platform_name.clone()), + instrument: dash(r.instrument_model.clone()), + test_type: dash(r.test_type.clone()), + layout: dash(r.library_layout.clone()), + reads: num_i64(r.total_reads), + read_length: num_i32(r.read_length), + }) + .collect(); + + let coverage = rep + .coverage + .iter() + .map(|c| CovView { + build: dash(c.reference_build.clone()), + aligner: dash(c.aligner.clone()), + test_type: dash(c.test_type.clone()), + mean: num_f64(c.mean_coverage, 1), + pct_10x: num_f64(c.pct_10x, 1), + pct_20x: num_f64(c.pct_20x, 1), + pct_30x: num_f64(c.pct_30x, 1), + expected: num_f64(c.expected_min_depth, 0), + norm: num_f64(c.norm_median_depth, 1), + conformance: c.conformance.clone().unwrap_or_default(), + }) + .collect(); + + let (ancestry, ancestry_method) = build_ancestry(&rep); + + SampleView { + display_name, + accession: id.accession.clone(), + alias: id.alias.clone(), + description: id.description.clone(), + source: rep.identity.source.label().to_string(), + center_name: id.center_name.clone(), + sex: id.sex.as_deref().map(titlecase), + federated: id.is_federated, + origin: id.origin.map(|o| OriginView { lat: o.lat, lon: o.lon }), + publications, + y: build_pathway(rep.y.as_ref(), y_path), + mt: build_pathway(rep.mt.as_ref(), mt_path), + sequencing, + coverage, + ancestry, + ancestry_method, + } + } +} + +// ── templates ───────────────────────────────────────────────────────────────── + +#[derive(askama::Template)] +#[template(path = "samples/report.html")] +struct SampleReportTemplate { + t: T, + next: String, + user: Option, + /// Curator preview affordances (the visibility toggle). + is_curator: bool, + /// Identifier used in the URL (for the toggle's form action). + slug: String, + /// Current visibility (drives the toggle's checked state). + is_public: bool, + s: SampleView, +} + +#[derive(askama::Template)] +#[template(path = "samples/_public_toggle.html")] +struct PublicToggleFragment { + t: T, + slug: String, + is_public: bool, +} + +// ── handlers ────────────────────────────────────────────────────────────────── + +async fn report( + State(st): State, + locale: Locale, + user: MaybeUser, + Path(slug): Path, +) -> Result { + let rep = du_db::biosample::report(&st.pool, &slug) + .await? + .ok_or_else(|| AppError::NotFound(format!("sample {slug}")))?; + + // Gate: private samples 404 for the public (indistinguishable from missing); + // a signed-in curator may preview them. + let is_curator = user.0.as_ref().map(crate::auth::Session::is_curator).unwrap_or(false); + if !rep.identity.is_public && !is_curator { + return Err(AppError::NotFound(format!("sample {slug}"))); + } + let is_public = rep.identity.is_public; + + // Resolve each called haplogroup to its tree pathway (best-effort). + let y_path = match &rep.y { + Some(c) => Some(du_db::haplogroup::pathway(&st.pool, &c.name, c.dna_type).await?), + None => None, + }; + let mt_path = match &rep.mt { + Some(c) => Some(du_db::haplogroup::pathway(&st.pool, &c.name, c.dna_type).await?), + None => None, + }; + + let view = SampleView::build(rep, y_path, mt_path); + Ok(html(&SampleReportTemplate { + t: locale.t, + next: locale.next, + user: user.nav(), + is_curator, + slug, + is_public, + s: view, + })) +} + +#[derive(Deserialize)] +struct ToggleForm { + /// A checkbox: present (`Some`) when checked, absent when not. + is_public: Option, +} + +/// Curator-only: flip the sample's public-visibility flag, return the swapped +/// toggle fragment. RBAC enforced by the `Curator` extractor. +async fn toggle_public( + Curator(_s): Curator, + State(st): State, + locale: Locale, + Path(slug): Path, + Form(f): Form, +) -> Result { + let guid = du_db::biosample::resolve_guid(&st.pool, &slug) + .await? + .ok_or_else(|| AppError::NotFound(format!("sample {slug}")))?; + let make_public = f.is_public.is_some(); + du_db::biosample::set_public(&st.pool, guid, make_public).await?; + Ok(html(&PublicToggleFragment { t: locale.t, slug, is_public: make_public })) +} diff --git a/rust/crates/du-web/src/routes/sequencer.rs b/rust/crates/du-web/src/routes/sequencer.rs new file mode 100644 index 00000000..46d09101 --- /dev/null +++ b/rust/crates/du-web/src/routes/sequencer.rs @@ -0,0 +1,422 @@ +//! Sequencer instrument→lab consensus review surfaces. Two faces over the same +//! `du_db::sequencer` proposal queue, both Curator-gated (session + Curator role): +//! a JSON API at `/manage/instrument-proposals/*` (Navigator/programmatic) and a +//! two-panel HTMX review UI at `/curator/instrument-proposals`. Accepting a +//! proposal sets `sequencer_instrument.lab_id` (what the public +//! `/api/v1/sequencer/lab` lookup resolves); accept/reject are audited to +//! `ident.audit_log`. Neither is part of the public OpenAPI document. + +use crate::auth::{Curator, NavUser}; +use crate::error::AppError; +use crate::htmx::HxHeaders; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Form, Json, Router}; +use du_db::sequencer::{ObservationView, ProposalView}; +use serde::Deserialize; +use serde_json::{json, Value}; + +pub fn router() -> Router { + Router::new() + .route("/manage/instrument-proposals", get(list)) + .route("/manage/instrument-proposals/:id", get(detail)) + .route("/manage/instrument-proposals/:id/accept", post(accept)) + .route("/manage/instrument-proposals/:id/reject", post(reject)) + // Curator HTMX review UI (two-panel) over the same proposal queue. + .route("/curator/instrument-proposals", get(ui_page)) + .route("/curator/instrument-proposals/fragment", get(ui_list)) + .route("/curator/instrument-proposals/:id/panel", get(ui_panel)) + .route("/curator/instrument-proposals/:id/accept", post(ui_accept)) + .route("/curator/instrument-proposals/:id/reject", post(ui_reject)) +} + +fn proposal_json(p: &ProposalView) -> Value { + json!({ + "id": p.id, + "instrument_id": p.instrument_id, + "proposed_lab_name": p.proposed_lab_name, + "proposed_model": p.proposed_model, + "observation_count": p.observation_count, + "distinct_citizen_count": p.distinct_citizen_count, + "confidence_score": p.confidence_score, + "status": p.status, + }) +} + +fn observation_json(o: &ObservationView) -> Value { + json!({ + "lab_name": o.lab_name, + "biosample_ref": o.biosample_ref, + "platform": o.platform, + "instrument_model": o.instrument_model, + "repo_did": o.repo_did, + "confidence": o.confidence, + }) +} + +#[derive(Deserialize)] +struct ListQuery { + status: Option, + page: Option, + page_size: Option, +} + +async fn list(_cur: Curator, State(st): State, Query(q): Query) -> Result, AppError> { + let page = du_db::sequencer::list_proposals( + &st.pool, + q.status.as_deref().filter(|s| !s.is_empty()), + q.page.unwrap_or(1), + q.page_size.unwrap_or(50), + ) + .await?; + Ok(Json(json!({ + "items": page.items.iter().map(proposal_json).collect::>(), + "total": page.total, + "page": page.page, + "page_size": page.page_size, + }))) +} + +async fn detail(_cur: Curator, State(st): State, Path(id): Path) -> Result, AppError> { + let (p, obs) = du_db::sequencer::proposal_detail(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("proposal {id}")))?; + Ok(Json(json!({ + "proposal": proposal_json(&p), + "observations": obs.iter().map(observation_json).collect::>(), + }))) +} + +#[derive(Deserialize)] +struct AcceptBody { + /// The lab to associate (may differ from the proposed name). + lab_name: String, + manufacturer: Option, + model: Option, + is_d2c: Option, +} + +async fn accept(cur: Curator, State(st): State, Path(id): Path, Json(b): Json) -> Result, AppError> { + let lab_name = b.lab_name.trim(); + if lab_name.is_empty() { + return Err(AppError::BadRequest("lab_name is required".into())); + } + let hit = du_db::sequencer::accept_proposal( + &st.pool, + id, + cur.0.user_id, + lab_name, + b.manufacturer.as_deref(), + b.model.as_deref(), + b.is_d2c, + ) + .await?; + Ok(Json(json!({ + "instrument_id": hit.instrument_id, + "lab_name": hit.lab_name, + "is_d2c": hit.is_d2c, + "manufacturer": hit.manufacturer, + "model_name": hit.model_name, + "website_url": hit.website_url, + }))) +} + +#[derive(Deserialize, Default)] +struct RejectBody { + reason: Option, +} + +async fn reject(cur: Curator, State(st): State, Path(id): Path, body: Option>) -> Result, AppError> { + let reason = body.and_then(|b| b.0.reason); + let (instrument, _lab) = du_db::sequencer::reject_proposal(&st.pool, id, cur.0.user_id, reason.as_deref()) + .await? + .ok_or_else(|| AppError::NotFound(format!("reviewable proposal {id}")))?; + Ok(Json(json!({ "id": id, "status": "REJECTED", "instrument_id": instrument }))) +} + +// ── curator HTMX review UI ────────────────────────────────────────────────── + +const PROPOSAL_CHANGED: &str = "proposal-changed"; + +fn status_class(status: &str) -> &'static str { + match status { + "READY_FOR_REVIEW" => "text-bg-success", + "ACCEPTED" => "text-bg-primary", + "REJECTED" => "text-bg-secondary", + _ => "text-bg-warning", // PENDING / conflict + } +} + +fn fmt_conf(score: Option) -> String { + match score { + Some(s) => format!("{:.0}%", (s * 100.0).round()), + None => "—".into(), + } +} + +struct ProposalRow { + id: i64, + instrument_id: String, + lab: String, + obs: i32, + citizens: i32, + confidence: String, + status: String, + status_class: String, +} + +struct ListView { + rows: Vec, + /// The active status filter (`ALL` or a concrete status) — drives the chips. + status: String, + page: i64, + total: i64, + total_pages: i64, +} + +#[derive(Deserialize)] +struct UiListQuery { + status: Option, + page: Option, +} + +fn proposal_row(p: ProposalView) -> ProposalRow { + ProposalRow { + id: p.id, + instrument_id: p.instrument_id, + lab: p.proposed_lab_name.unwrap_or_else(|| "—".into()), + obs: p.observation_count, + citizens: p.distinct_citizen_count, + confidence: fmt_conf(p.confidence_score), + status_class: status_class(&p.status).to_string(), + status: p.status, + } +} + +async fn load_ui_list(st: &AppState, q: &UiListQuery) -> Result { + let filter = q.status.as_deref().filter(|s| !s.is_empty() && *s != "ALL"); + let page = du_db::sequencer::list_proposals(&st.pool, filter, q.page.unwrap_or(1), 25).await?; + let (cur_page, total, total_pages) = (page.page, page.total, page.total_pages()); + Ok(ListView { + rows: page.items.into_iter().map(proposal_row).collect(), + status: q.status.clone().filter(|s| !s.is_empty()).unwrap_or_else(|| "ALL".into()), + page: cur_page, + total, + total_pages, + }) +} + +#[derive(askama::Template)] +#[template(path = "curator/instrument-proposals/page.html")] +struct PageTemplate { + t: T, + next: String, + user: Option, + list: ListView, +} +#[derive(askama::Template)] +#[template(path = "curator/instrument-proposals/list.html")] +struct ListTemplate { + t: T, + list: ListView, +} + +async fn ui_page(Curator(s): Curator, State(st): State, locale: Locale, Query(q): Query) -> Result { + let list = load_ui_list(&st, &q).await?; + Ok(html(&PageTemplate { + t: locale.t, + next: locale.next, + user: Some(NavUser { display_name: s.display_name, is_curator: true }), + list, + })) +} + +async fn ui_list(_c: Curator, State(st): State, locale: Locale, Query(q): Query) -> Result { + let list = load_ui_list(&st, &q).await?; + Ok(html(&ListTemplate { t: locale.t, list })) +} + +// ── detail / accept / reject panel ────────────────────────────────────────── + +struct ObsRow { + lab: String, + platform: String, + model: String, + citizen: String, +} + +struct DetailView { + id: i64, + instrument_id: String, + proposed_lab: String, + status: String, + status_class: String, + obs_count: i32, + citizen_count: i32, + confidence: String, + observations: Vec, + /// PENDING / READY_FOR_REVIEW → show the accept/reject forms. + actionable: bool, + notice: Option, + /// The proposal is gone / terminal — show only the resolved note. + resolved: bool, +} + +#[derive(askama::Template)] +#[template(path = "curator/instrument-proposals/detail.html")] +struct DetailTemplate { + t: T, + p: DetailView, +} + +/// Last DID path segment, for a compact citizen label. +fn short_did(did: &str) -> String { + did.rsplit(['/', ':']).next().unwrap_or(did).to_string() +} + +async fn build_detail(st: &AppState, id: i64, notice: Option) -> Result { + let Some((p, obs)) = du_db::sequencer::proposal_detail(&st.pool, id).await? else { + return Ok(DetailView { + id, + instrument_id: String::new(), + proposed_lab: String::new(), + status: String::new(), + status_class: String::new(), + obs_count: 0, + citizen_count: 0, + confidence: String::new(), + observations: vec![], + actionable: false, + notice, + resolved: true, + }); + }; + let actionable = matches!(p.status.as_str(), "PENDING" | "READY_FOR_REVIEW"); + let observations = obs + .into_iter() + .map(|o| ObsRow { + lab: o.lab_name.unwrap_or_else(|| "—".into()), + platform: o.platform.unwrap_or_default(), + model: o.instrument_model.unwrap_or_default(), + citizen: o.repo_did.as_deref().map(short_did).unwrap_or_default(), + }) + .collect(); + Ok(DetailView { + id: p.id, + instrument_id: p.instrument_id, + proposed_lab: p.proposed_lab_name.unwrap_or_default(), + status_class: status_class(&p.status).to_string(), + status: p.status, + obs_count: p.observation_count, + citizen_count: p.distinct_citizen_count, + confidence: fmt_conf(p.confidence_score), + observations, + actionable, + notice, + resolved: false, + }) +} + +async fn detail_response(st: &AppState, t: T, id: i64, notice: Option) -> Result { + Ok(html(&DetailTemplate { t, p: build_detail(st, id, notice).await? })) +} + +async fn changed_response(st: &AppState, t: T, id: i64, notice: Option) -> Result { + let body = detail_response(st, t, id, notice).await?; + Ok((HxHeaders::new().trigger(PROPOSAL_CHANGED), body).into_response()) +} + +async fn ui_panel(_c: Curator, State(st): State, locale: Locale, Path(id): Path) -> Result { + detail_response(&st, locale.t, id, None).await +} + +#[derive(Deserialize)] +struct UiAcceptForm { + lab_name: String, + manufacturer: Option, + model: Option, + /// Checkbox: present only when ticked. Absent ⇒ leave an existing lab's flag + /// untouched (the safe default — see `accept_proposal`). + is_d2c: Option, +} + +async fn ui_accept( + Curator(s): Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + let lab_name = f.lab_name.trim(); + if lab_name.is_empty() { + return Err(AppError::BadRequest("lab_name is required".into())); + } + let clean = |o: Option| o.map(|s| s.trim().to_string()).filter(|s| !s.is_empty()); + let manufacturer = clean(f.manufacturer); + let model = clean(f.model); + let is_d2c = f.is_d2c.map(|_| true); // ticked ⇒ Some(true); absent ⇒ None + let hit = du_db::sequencer::accept_proposal( + &st.pool, + id, + s.user_id, + lab_name, + manufacturer.as_deref(), + model.as_deref(), + is_d2c, + ) + .await?; + let notice = format!("{} {}", locale.t.get("ip.notice.accepted"), hit.lab_name); + changed_response(&st, locale.t, id, Some(notice)).await +} + +#[derive(Deserialize)] +struct UiRejectForm { + reason: Option, +} + +async fn ui_reject( + Curator(s): Curator, + State(st): State, + locale: Locale, + Path(id): Path, + Form(f): Form, +) -> Result { + let reason = f.reason.map(|r| r.trim().to_string()).filter(|r| !r.is_empty()); + du_db::sequencer::reject_proposal(&st.pool, id, s.user_id, reason.as_deref()) + .await? + .ok_or_else(|| AppError::NotFound(format!("reviewable proposal {id}")))?; + let notice = locale.t.get("ip.notice.rejected").to_string(); + changed_response(&st, locale.t, id, Some(notice)).await +} + +#[cfg(test)] +mod tests { + use axum::body::Body; + use axum::http::{Request, StatusCode}; + use tower::ServiceExt; + + /// The proposal queue sits behind the Curator guard — an unauthenticated + /// request is redirected to /login, never served. + #[tokio::test] + async fn proposals_require_curator() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping curator-guard test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let state = crate::state::AppState { pool: db.pool().clone(), key: tower_cookies::Key::generate(), oauth: None }; + // Both the JSON management API and the HTMX curator UI are guarded. + for uri in ["/manage/instrument-proposals", "/curator/instrument-proposals"] { + let app = crate::routes::app(state.clone()); + let r = app + .oneshot(Request::builder().uri(uri).body(Body::empty()).unwrap()) + .await + .unwrap(); + assert_eq!(r.status(), StatusCode::SEE_OTHER, "{uri} must redirect unauth"); + } + } +} diff --git a/rust/crates/du-web/src/routes/tree.rs b/rust/crates/du-web/src/routes/tree.rs new file mode 100644 index 00000000..ce8222f5 --- /dev/null +++ b/rust/crates/du-web/src/routes/tree.rs @@ -0,0 +1,583 @@ +//! Public Y/MT haplogroup tree views — two server-rendered SVG cladogram modes +//! (horizontal & vertical, [`crate::tree_layout`]) replacing the old one-level +//! list. One handler per lineage serves both the full page and the HTMX +//! `#tree-container` fragment (history-restore/boosted nav get the full page). +//! +//! A depth-bounded window (default `DEFAULT_DEPTH` levels below the current root, +//! overridable via a client-persisted `?depth=`) keeps any view renderable; +//! every node re-roots on click. Orientation is chosen by the `tree_orient` +//! cookie, flipped by an `?orient=` toggle that also persists the cookie. Search +//! resolves a haplogroup name *or* a defining variant. + +use crate::error::AppError; +use crate::htmx::{HxHeaders, HxRequest}; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use crate::tree_layout::{self, InNode, Laid, Orientation}; +use crate::auth::Curator; +use axum::extract::{Path, Query, State}; +use axum::http::header::{HeaderMap, HeaderValue, COOKIE, SET_COOKIE}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Json, Router}; +use serde_json::{json, Value}; +use du_db::haplogroup::WindowNode; +use du_domain::enums::DnaType; +use serde::Deserialize; +use std::collections::HashMap; + +const TARGET: &str = "tree-container"; +/// Levels rendered below the current display root when none is requested. +const DEFAULT_DEPTH: i32 = 4; +/// Clamp range for a client-supplied `?depth=` (the selector persists the choice +/// in localStorage and injects it into every tree request — see page.html). +const MIN_DEPTH: i32 = 1; +const MAX_DEPTH: i32 = 8; +/// Depths offered in the selector. +const DEPTH_OPTIONS: [i32; 6] = [2, 3, 4, 5, 6, 7]; +const ORIENT_COOKIE: &str = "tree_orient"; + +pub fn router() -> Router { + Router::new() + .route("/ytree", get(ytree)) + .route("/mtree", get(mtree)) + .route("/ytree/snp/:name", get(ysnp)) + .route("/mtree/snp/:name", get(mtsnp)) + // Curator triage for sample leaves whose published call didn't resolve to a node. + .route("/manage/tree-sample/unplaced", get(unplaced)) + .route("/manage/tree-sample/place", post(place)) +} + +/// `Y_DNA` (default) / `MT_DNA` from a `type` query param. +fn dna_param(s: Option<&str>) -> DnaType { + match s { + Some("MT_DNA") => DnaType::MtDna, + _ => DnaType::YDna, + } +} + +#[derive(Deserialize)] +struct UnplacedQuery { + #[serde(rename = "type")] + dna_type: Option, +} + +/// Curator-gated: the queue of UNPLACED published calls (no node matched) for triage. +async fn unplaced(_cur: Curator, State(st): State, Query(q): Query) -> Result, AppError> { + let dna = dna_param(q.dna_type.as_deref()); + let items: Vec = du_db::tree_sample::unplaced(&st.pool, dna, 500) + .await? + .into_iter() + .map(|r| json!({ + "sample_guid": r.sample_guid, + "call_text": r.call_text, + "accession": r.accession, + "alias": r.alias, + })) + .collect(); + Ok(Json(json!({ "items": items }))) +} + +#[derive(Deserialize)] +struct PlaceBody { + sample_guid: uuid::Uuid, + #[serde(rename = "type")] + dna_type: Option, + /// The haplogroup name (or defining SNP) to pin this sample under. + haplogroup: String, +} + +/// Curator-gated: manually place an UNPLACED sample under a chosen node. +async fn place(_cur: Curator, State(st): State, Json(b): Json) -> Result, AppError> { + let dna = dna_param(b.dna_type.as_deref()); + if du_db::tree_sample::place_sample(&st.pool, b.sample_guid, dna, &b.haplogroup).await? { + Ok(Json(json!({ "sample_guid": b.sample_guid, "haplogroup": b.haplogroup, "status": "PLACED" }))) + } else { + Err(AppError::NotFound(format!("could not place {} under {}", b.sample_guid, b.haplogroup))) + } +} + +#[derive(Deserialize)] +struct TreeQuery { + /// Haplogroup name or defining-variant query to center on. + root: Option, + /// Orientation override ("h"/"v"); also persisted to the cookie. + orient: Option, + /// Levels to render below the root (clamped); the client persists it. + depth: Option, +} + +// ── View models ──────────────────────────────────────────────────────────── + +struct Crumb { + name: String, + href: String, +} + +#[derive(askama::Template)] +#[template(path = "tree/page.html")] +struct TreePageTemplate { + t: T, + next: String, + user: Option, + title: String, + base_path: &'static str, + root_name: String, + query: String, + orientation: Orientation, + /// (depth value, is-current) for the selector options. + depth_options: Vec<(i32, bool)>, + crumbs: Vec, + laid: Option, +} + +#[derive(askama::Template)] +#[template(path = "tree/svg.html")] +struct SvgFragment { + t: T, + base_path: &'static str, + crumbs: Vec, + laid: Option, +} + +#[derive(askama::Template)] +#[template(path = "tree/snp_sidebar.html")] +struct SnpSidebar { + t: T, + name: String, + provenance: Option, + variants: Vec, + /// Placed non-D2C sample leaves at or below this node (capped for the sidebar). + samples: Vec, + /// How many more placed samples exist beyond the shown `samples` (0 ⇒ all shown). + samples_more: i64, +} + +/// One placed sample row in the sidebar (label + optional paper citation). +struct LeafRow { + label: String, + source: String, + citation: Option, +} + +/// Max leaf rows rendered in the sidebar before collapsing to an "+N more" note. +const SIDEBAR_SAMPLE_CAP: usize = 50; + +struct VariantRow { + name: String, + mutation_type: String, + aliases: Vec, + coordinates: Vec, + /// This branch's ancestral>derived transition (ASR), e.g. "T>G". + transition: Option, + /// SNP occurs on other branches too (homoplasy). + recurrent: bool, + /// This branch reverted to the ancestral state (derived == variant ancestral). + back_mutation: bool, +} + +/// Where a branch came from — increasingly important as multiple source trees +/// (ISOGG, decoding-us, ytree.net, …) fold into one node. +struct Provenance { + /// Originating source (the `tree.haplogroup.source` column). + source: String, + /// Cross-source alternate names (`provenance.aliases`) — e.g. the ISOGG path + /// name vs. a decoding-us `R1b-…` name for the same branch. + aliases: Vec, + /// When the source last updated this node (date only), if recorded. + updated: Option, + /// Curator-adopted backbone (a `provenance.backbone_source` marker). + backbone: bool, + formed_ybp: Option, + tmrca_ybp: Option, +} + +// ── Handlers ──────────────────────────────────────────────────────────────── + +async fn ytree( + st: State, + hx: HxRequest, + locale: Locale, + user: crate::auth::MaybeUser, + headers: HeaderMap, + q: Query, +) -> Result { + render_tree(st, hx, locale, user, headers, q, DnaType::YDna, "/ytree", "Y", "tree.title.y").await +} + +async fn mtree( + st: State, + hx: HxRequest, + locale: Locale, + user: crate::auth::MaybeUser, + headers: HeaderMap, + q: Query, +) -> Result { + render_tree(st, hx, locale, user, headers, q, DnaType::MtDna, "/mtree", "L", "tree.title.mt").await +} + +async fn ysnp(st: State, locale: Locale, name: Path) -> Result { + snp_sidebar(st, locale, name, DnaType::YDna).await +} + +async fn mtsnp(st: State, locale: Locale, name: Path) -> Result { + snp_sidebar(st, locale, name, DnaType::MtDna).await +} + +#[allow(clippy::too_many_arguments)] +async fn render_tree( + State(st): State, + hx: HxRequest, + locale: Locale, + user: crate::auth::MaybeUser, + headers: HeaderMap, + Query(q): Query, + dna_type: DnaType, + base_path: &'static str, + default_root: &str, + title_key: &str, +) -> Result { + // Orientation: explicit ?orient= wins (and will be persisted), else cookie. + let orientation = match q.orient.as_deref() { + Some(o) => Orientation::parse(o), + None => cookie(&headers, ORIENT_COOKIE) + .map(|s| Orientation::parse(&s)) + .unwrap_or(Orientation::Horizontal), + }; + + // Resolve the display root: search query → name-or-variant, else the lineage + // default, else the first actual root (covers empty/renamed roots). + let query = q.root.as_deref().map(str::trim).filter(|s| !s.is_empty()); + let root_name = match query { + Some(qy) => du_db::haplogroup::resolve_name_or_variant(&st.pool, qy, dna_type) + .await? + .ok_or_else(|| AppError::NotFound(format!("haplogroup or variant {qy}")))?, + None => default_root_name(&st.pool, dna_type, default_root).await?, + }; + + // Depth: client-supplied ?depth= (persisted in localStorage), clamped. + let depth = q.depth.unwrap_or(DEFAULT_DEPTH).clamp(MIN_DEPTH, MAX_DEPTH); + + // Window + nesting + layout. Each visible node's directly-placed samples hang off it as + // YFull-style leaf tips (capped per node). + let window = du_db::haplogroup::subtree_window(&st.pool, dna_type, &root_name, depth).await?; + let node_ids: Vec = window.iter().map(|n| n.id).collect(); + let mut samples_by_node: HashMap> = HashMap::new(); + for (id, label) in du_db::tree_sample::direct_labels(&st.pool, dna_type, &node_ids).await? { + samples_by_node.entry(id).or_default().push(label); + } + let laid = build_root(&window, &samples_by_node).and_then(|root| tree_layout::layout(Some(&root), orientation)); + + // Breadcrumbs: ancestors (root→parent) + the current node (no link). + let crumbs = build_crumbs(&st.pool, dna_type, base_path, &root_name).await?; + + // Persist orientation when it was toggled via the query param. + let set_cookie = q.orient.is_some().then(|| orient_cookie(orientation)); + + let mut resp = if hx.wants_fragment_for(TARGET) { + let push = format!("{base_path}?root={root_name}"); + let frag = SvgFragment { t: locale.t, base_path, crumbs, laid }; + (HxHeaders::new().push_url(push), html(&frag)).into_response() + } else { + let page = TreePageTemplate { + t: locale.t, + next: locale.next, + user: user.nav(), + title: locale.t.get(title_key).to_string(), + base_path, + root_name, + query: query.unwrap_or_default().to_string(), + orientation, + depth_options: DEPTH_OPTIONS.iter().map(|&d| (d, d == depth)).collect(), + crumbs, + laid, + }; + html(&page) + }; + if let Some(c) = set_cookie { + if let Ok(hv) = HeaderValue::from_str(&c) { + resp.headers_mut().insert(SET_COOKIE, hv); + } + } + Ok(resp) +} + +async fn snp_sidebar( + State(st): State, + locale: Locale, + Path(name): Path, + dna_type: DnaType, +) -> Result { + let variants = du_db::haplogroup::variants_of(&st.pool, &name, dna_type) + .await? + .into_iter() + .map(|v| { + let transition = match (&v.link_ancestral, &v.link_derived) { + (Some(a), Some(d)) => Some(format!("{a}>{d}")), + _ => None, + }; + // Back-mutation: this branch's derived state is the SNP's ancestral allele. + let back_mutation = + matches!((&v.link_derived, coord_ancestral(&v.coordinates)), (Some(d), Some(a)) if *d == a); + let aliases = json_str_list(&v.aliases); + // UNNAMED variants (homoplasy collisions) fall back to an alias. + let name = v + .canonical_name + .or_else(|| aliases.first().cloned()) + .unwrap_or_else(|| "(unnamed)".into()); + VariantRow { + name, + mutation_type: v.mutation_type, + aliases: aliases.clone(), + coordinates: coord_list(&v.coordinates), + transition, + recurrent: v.recurrent, + back_mutation, + } + }) + .collect(); + + // Branch provenance: source, cross-source aliases, last-updated, backbone, age. + let provenance = du_db::haplogroup::get_by_name(&st.pool, &name, dna_type).await?.map(|h| { + let p = &h.provenance; + Provenance { + source: h.source.unwrap_or_else(|| "—".into()), + aliases: p + .get("aliases") + .and_then(serde_json::Value::as_array) + .map(|a| a.iter().filter_map(|s| s.as_str().map(str::to_string)).collect()) + .unwrap_or_default(), + // Stored like "2025-05-24T13:58:48…[Etc/UTC]" — keep the date. + updated: p.get("source_updated").and_then(|v| v.as_str()).and_then(|s| s.split('T').next()).map(str::to_string), + backbone: p.get("backbone_source").is_some(), + formed_ybp: h.formed_ybp, + tmrca_ybp: h.tmrca_ybp, + } + }); + + // Placed non-D2C sample leaves at or below this node (capped for the sidebar). + let mut leaves = du_db::tree_sample::samples_under(&st.pool, &name, dna_type).await?; + let samples_more = (leaves.len() as i64 - SIDEBAR_SAMPLE_CAP as i64).max(0); + leaves.truncate(SIDEBAR_SAMPLE_CAP); + let samples: Vec = leaves + .into_iter() + .map(|s| { + let label = s.accession.or(s.alias).unwrap_or_else(|| s.sample_guid.to_string()); + let citation = s.pub_title.map(|t| match s.pub_doi { + Some(doi) => format!("{t} ({doi})"), + None => t, + }); + LeafRow { label, source: s.source, citation } + }) + .collect(); + + Ok(html(&SnpSidebar { t: locale.t, name, provenance, variants, samples, samples_more })) +} + +// ── Helpers ────────────────────────────────────────────────────────────────── + +/// Read a cookie value from the request `Cookie` header. +fn cookie(headers: &HeaderMap, key: &str) -> Option { + headers + .get(COOKIE)? + .to_str() + .ok()? + .split(';') + .filter_map(|kv| kv.trim().split_once('=')) + .find(|(k, _)| *k == key) + .map(|(_, v)| v.to_string()) +} + +fn orient_cookie(o: Orientation) -> String { + format!("{ORIENT_COOKIE}={}; Path=/; Max-Age=31536000; SameSite=Lax", o.code()) +} + +/// The lineage's display root: the configured default if present, else the +/// first real root (so the page still renders for renamed/empty lineages). +async fn default_root_name(pool: &du_db::PgPool, dna_type: DnaType, default_root: &str) -> Result { + if du_db::haplogroup::get_by_name(pool, default_root, dna_type).await?.is_some() { + return Ok(default_root.to_string()); + } + let roots = du_db::haplogroup::roots(pool, dna_type).await?; + roots + .into_iter() + .next() + .map(|h| h.name) + .ok_or_else(|| AppError::NotFound(format!("no {default_root} tree loaded"))) +} + +/// Max sample tips drawn directly under one node before collapsing to a "+N" overflow tip. +const NODE_TIP_CAP: usize = 8; + +/// Nest the flat window into an `InNode` tree rooted at the depth-0 node. `samples` maps a node +/// id to its directly-placed sample labels (rendered as leaf tips). +fn build_root(window: &[WindowNode], samples: &HashMap>) -> Option { + let mut children_of: HashMap> = HashMap::new(); + let mut root: Option<&WindowNode> = None; + for n in window { + match n.parent_id { + Some(p) => children_of.entry(p).or_default().push(n), + None => root = Some(n), + } + } + // Window root is the depth-0 node; fall back to min-depth if shapes differ. + let root = root.or_else(|| window.iter().min_by_key(|n| n.depth))?; + Some(to_innode(root, &children_of, samples)) +} + +fn to_innode(n: &WindowNode, children_of: &HashMap>, samples: &HashMap>) -> InNode { + let children = children_of + .get(&n.id) + .map(|kids| kids.iter().map(|c| to_innode(c, children_of, samples)).collect()) + .unwrap_or_default(); + let all = samples.get(&n.id).map(Vec::as_slice).unwrap_or(&[]); + let tips: Vec = all.iter().take(NODE_TIP_CAP).cloned().collect(); + let sample_overflow = (all.len() as i64 - tips.len() as i64).max(0); + InNode { + name: n.name.clone(), + variant_count: n.variant_count, + samples: tips, + sample_overflow, + is_backbone: n.is_backbone, + is_recent: n.is_recent, + formed_ybp: n.formed_ybp, + tmrca_ybp: n.tmrca_ybp, + has_hidden: n.has_hidden, + children, + } +} + +async fn build_crumbs( + pool: &du_db::PgPool, + dna_type: DnaType, + base_path: &str, + root_name: &str, +) -> Result, AppError> { + let Some(cur) = du_db::haplogroup::get_by_name(pool, root_name, dna_type).await? else { + return Ok(vec![]); + }; + let mut crumbs: Vec = du_db::haplogroup::ancestors(pool, cur.id) + .await? + .into_iter() + .map(|(_, name)| Crumb { href: format!("{base_path}?root={name}"), name }) + .collect(); + // Current node closes the trail (no link). + crumbs.push(Crumb { name: root_name.to_string(), href: String::new() }); + Ok(crumbs) +} + +/// Flatten a `{key: [..strings..]}` aliases JSONB into a display list. +fn json_str_list(v: &serde_json::Value) -> Vec { + let Some(obj) = v.as_object() else { return vec![] }; + obj.values() + .filter_map(serde_json::Value::as_array) + .flatten() + .filter_map(|s| s.as_str().map(str::to_string)) + .collect() +} + +/// Render coordinates JSONB as `"contig:pos anc>der [b38]"` strings — the locus +/// plus the SNP's ancestral→derived states (the reference genome is not the +/// phylogenetic root, so these are `ancestral`/`derived`, not ref/alt). When no +/// alleles are recorded, shows just the locus. +fn coord_list(v: &serde_json::Value) -> Vec { + let allele = |c: &serde_json::Value, k: &str| { + c.get(k).and_then(|x| x.as_str()).unwrap_or("").to_string() + }; + let Some(obj) = v.as_object() else { return vec![] }; + obj.iter() + .filter_map(|(genome, c)| { + let contig = c.get("contig").and_then(|x| x.as_str()).unwrap_or("?"); + let pos = c.get("position").and_then(serde_json::Value::as_i64)?; + let anc = allele(c, "ancestral"); + let der = allele(c, "derived"); + let alleles = if anc.is_empty() && der.is_empty() { String::new() } else { format!(" {anc}>{der}") }; + Some(format!("{contig}:{pos}{alleles} [{}]", short_genome(genome))) + }) + .collect() +} + +/// The SNP's representative ancestral allele (prefer GRCh38, else any build), used +/// to flag back-mutations against a branch's derived state. +fn coord_ancestral(v: &serde_json::Value) -> Option { + let obj = v.as_object()?; + let pick = obj.get("GRCh38").or_else(|| obj.values().next())?; + pick.get("ancestral").and_then(|x| x.as_str()).map(str::to_string) +} + +fn short_genome(g: &str) -> &str { + if g.contains("GRCh37") || g.contains("hg19") { + "b37" + } else if g.contains("GRCh38") || g.contains("hg38") { + "b38" + } else if g.contains("T2T") || g.contains("CHM13") || g == "hs1" { + "hs1" + } else { + g + } +} + +#[cfg(test)] +mod tests { + use axum::body::{to_bytes, Body}; + use axum::http::{Request, StatusCode}; + use tower::ServiceExt; + + /// The cladogram shows a node's cumulative placed-sample count, and the SNP sidebar lists + /// the placed (non-D2C) samples with their citation. + #[tokio::test] + async fn cladogram_shows_sample_count_and_sidebar_leaves() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping cladogram test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + sqlx::query("INSERT INTO tree.haplogroup (name, haplogroup_type) VALUES ('R-M269', 'Y_DNA'::core.dna_type)") + .execute(&pool) + .await + .unwrap(); + // One paper sample (placed) + one D2C sample (excluded), both calling R-M269. + for (src, acc) in [("EXTERNAL", "EX-1"), ("CITIZEN", "CIT-1")] { + sqlx::query( + "INSERT INTO core.biosample (source, accession, original_haplogroups) \ + VALUES ($1::core.biosample_source, $2, '[{\"y\":\"R-M269\"}]'::jsonb)", + ) + .bind(src) + .bind(acc) + .execute(&pool) + .await + .unwrap(); + } + du_db::tree_sample::recompute_placements(&pool, du_domain::enums::DnaType::YDna).await.unwrap(); + let state = crate::state::AppState { pool, key: tower_cookies::Key::generate(), oauth: None }; + + let body = |state: crate::state::AppState, uri: &'static str| async move { + let resp = crate::routes::app(state) + .oneshot(Request::builder().uri(uri).body(Body::empty()).unwrap()) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + String::from_utf8(to_bytes(resp.into_body(), usize::MAX).await.unwrap().to_vec()).unwrap() + }; + + // The placed sample hangs off the node as a leaf tip (label rendered in the SVG). + let svg = body(state.clone(), "/ytree?root=R-M269").await; + assert!(svg.contains("tree-tip"), "the cladogram renders sample leaf tips"); + assert!(svg.contains("EX-1"), "the placed sample's label appears as a tip"); + assert!(!svg.contains("CIT-1"), "the D2C sample is not a tip"); + + // The sidebar lists the paper sample (with source) and not the D2C one. + let side = body(state.clone(), "/ytree/snp/R-M269").await; + assert!(side.contains("EX-1") && side.contains("EXTERNAL"), "sidebar lists the placed sample"); + assert!(!side.contains("CIT-1"), "D2C sample never surfaces"); + + // The curator triage queue is Curator-gated (unauth → redirect to login). + let guarded = crate::routes::app(state) + .oneshot(Request::builder().uri("/manage/tree-sample/unplaced").body(Body::empty()).unwrap()) + .await + .unwrap(); + assert_eq!(guarded.status(), StatusCode::SEE_OTHER, "unplaced triage requires Curator"); + } +} diff --git a/rust/crates/du-web/src/routes/variants.rs b/rust/crates/du-web/src/routes/variants.rs new file mode 100644 index 00000000..b1621bda --- /dev/null +++ b/rust/crates/du-web/src/routes/variants.rs @@ -0,0 +1,168 @@ +//! Public variant browser. The browser page embeds the first results page inline +//! (no load round-trip); search/pagination and the detail panel are HTMX +//! fragments targeting `#variants-table` / `#detail-panel`. + +use crate::error::AppError; +use crate::i18n::{Locale, T}; +use crate::render::html; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::response::Response; +use axum::routing::get; +use axum::Router; +use du_db::Page; +use du_domain::ids::VariantId; +use du_domain::variant::Variant; +use serde::Deserialize; + +pub fn router() -> Router { + Router::new() + .route("/variants", get(browser)) + .route("/variants/list", get(list)) + .route("/variants/detail/:id", get(detail)) +} + +#[derive(Deserialize)] +struct ListQuery { + query: Option, + page: Option, + page_size: Option, +} + +/// A flattened variant for the list table (templates stay logic-free). +struct RowView { + id: i64, + name: String, + mutation_type: String, + naming_status: String, + builds: String, +} + +impl RowView { + fn from(v: &Variant) -> Self { + let mut builds: Vec<&str> = v.coordinates.0.keys().map(String::as_str).collect(); + builds.sort_unstable(); + RowView { + id: v.id.0, + name: v.canonical_name.clone(), + mutation_type: v.mutation_type.label().to_string(), + naming_status: v.naming_status.label().to_string(), + builds: builds.join(", "), + } + } +} + +/// Shared list-fragment view data (also embedded by the browser page). +struct ListView { + query: String, + rows: Vec, + page: i64, + page_size: i64, + total: i64, + total_pages: i64, +} + +async fn load_list(st: &AppState, q: &ListQuery) -> Result { + let page_num = q.page.unwrap_or(1); + let page_size = q.page_size.unwrap_or(25); + let result: Page = + du_db::variant::search(&st.pool, q.query.as_deref(), page_num, page_size).await?; + Ok(ListView { + query: q.query.clone().unwrap_or_default(), + rows: result.items.iter().map(RowView::from).collect(), + page: result.page, + page_size: result.page_size, + total: result.total, + total_pages: result.total_pages(), + }) +} + +#[derive(askama::Template)] +#[template(path = "variants/browser.html")] +struct BrowserTemplate { + t: T, + next: String, + user: Option, + list: ListView, +} + +#[derive(askama::Template)] +#[template(path = "variants/list.html")] +struct ListTemplate { + t: T, + list: ListView, +} + +struct CoordView { + build: String, + contig: String, + position: i64, + change: Option, +} + +#[derive(askama::Template)] +#[template(path = "variants/detail.html")] +struct DetailTemplate { + t: T, + name: String, + mutation_type: String, + naming_status: String, + common_names: Vec, + rs_ids: Vec, + coords: Vec, +} + +async fn browser( + State(st): State, + locale: Locale, + user: crate::auth::MaybeUser, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&BrowserTemplate { t: locale.t, next: locale.next, user: user.nav(), list })) +} + +async fn list( + State(st): State, + locale: Locale, + Query(q): Query, +) -> Result { + let list = load_list(&st, &q).await?; + Ok(html(&ListTemplate { t: locale.t, list })) +} + +async fn detail( + State(st): State, + locale: Locale, + Path(id): Path, +) -> Result { + let v = du_db::variant::get_by_id(&st.pool, VariantId(id)) + .await? + .ok_or_else(|| AppError::NotFound(format!("variant {id}")))?; + + let mut coords: Vec = v + .coordinates + .0 + .iter() + .map(|(build, c)| CoordView { + build: build.clone(), + contig: c.contig.clone(), + position: c.position, + change: match (&c.ancestral, &c.derived) { + (Some(anc), Some(der)) => Some(format!("{anc}>{der}")), + _ => None, + }, + }) + .collect(); + coords.sort_by(|a, b| a.build.cmp(&b.build)); + + Ok(html(&DetailTemplate { + t: locale.t, + name: v.canonical_name, + mutation_type: v.mutation_type.label().to_string(), + naming_status: v.naming_status.label().to_string(), + common_names: v.aliases.common_names, + rs_ids: v.aliases.rs_ids, + coords, + })) +} diff --git a/rust/crates/du-web/src/routes/versioning.rs b/rust/crates/du-web/src/routes/versioning.rs new file mode 100644 index 00000000..b3e2ab4e --- /dev/null +++ b/rust/crates/du-web/src/routes/versioning.rs @@ -0,0 +1,239 @@ +//! Tree-versioning management API (`/manage/change-sets/*`). Curator- +//! gated (session + Curator role), JSON in/out. Backs the change-set review and +//! apply workflow over `du_db::change_set`. +//! +//! Auth note: the legacy app gated these with an X-API-Key; here they use the +//! same session/Curator guard as the rest of the curator surface. They are not +//! part of the public OpenAPI document (which describes the unauthenticated read +//! API only). + +use crate::auth::Curator; +use crate::error::AppError; +use crate::state::AppState; +use axum::extract::{Path, Query, State}; +use axum::routing::{get, post}; +use axum::{Json, Router}; +use du_domain::enums::DnaType; +use du_domain::merge::SourceNode; +use serde::Deserialize; +use serde_json::{json, Value}; + +pub fn router() -> Router { + Router::new() + .route("/manage/haplogroups/merge", post(merge_run)) + .route("/manage/haplogroups/merge/preview", post(merge_preview)) + .route("/manage/change-sets", get(list).post(create)) + .route("/manage/change-sets/:id", get(detail)) + .route("/manage/change-sets/:id/changes", post(add_change)) + .route("/manage/change-sets/:id/start-review", post(start_review)) + .route("/manage/change-sets/:id/apply", post(apply)) + .route("/manage/change-sets/:id/discard", post(discard)) + .route("/manage/change-sets/:id/comments", get(list_comments).post(add_comment)) + .route("/manage/change-sets/:id/approve-all", post(approve_all)) + .route("/manage/change-sets/:id/changes/:change_id/review", post(review_change)) + .route("/manage/change-sets/:id/diff", get(diff)) +} + +#[derive(Deserialize)] +struct ListQuery { + haplogroup_type: Option, + status: Option, + page: Option, + page_size: Option, +} + +async fn list( + _cur: Curator, + State(st): State, + Query(q): Query, +) -> Result, AppError> { + let page = du_db::change_set::list( + &st.pool, + q.haplogroup_type.as_deref(), + q.status.as_deref(), + q.page.unwrap_or(1), + q.page_size.unwrap_or(20), + ) + .await?; + let total_pages = page.total_pages(); + Ok(Json(json!({ + "items": page.items, "total": page.total, "page": page.page, + "page_size": page.page_size, "total_pages": total_pages + }))) +} + +#[derive(Deserialize)] +struct CreateBody { + source: String, + haplogroup_type: Option, + description: Option, +} + +async fn create( + cur: Curator, + State(st): State, + Json(b): Json, +) -> Result, AppError> { + let id = du_db::change_set::create( + &st.pool, + &b.source, + b.haplogroup_type.as_deref(), + b.description.as_deref(), + &cur.0.display_name, + ) + .await?; + Ok(Json(json!({ "id": id }))) +} + +async fn detail(_cur: Curator, State(st): State, Path(id): Path) -> Result, AppError> { + let d = du_db::change_set::get(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("change set {id}")))?; + Ok(Json(json!({ "summary": d.summary, "changes": d.changes, "comments": d.comments }))) +} + +#[derive(Deserialize)] +struct AddChangeBody { + change_type: String, + haplogroup_id: Option, + old_values: Option, + new_values: Option, +} + +async fn add_change( + _cur: Curator, + State(st): State, + Path(id): Path, + Json(b): Json, +) -> Result, AppError> { + let change_id = du_db::change_set::add_change( + &st.pool, + id, + &b.change_type, + b.haplogroup_id, + b.old_values.as_ref(), + b.new_values.as_ref(), + ) + .await?; + Ok(Json(json!({ "id": change_id }))) +} + +async fn start_review(_cur: Curator, State(st): State, Path(id): Path) -> Result, AppError> { + if du_db::change_set::start_review(&st.pool, id).await? { + Ok(Json(json!({ "status": "UNDER_REVIEW" }))) + } else { + Err(AppError::BadRequest("change set is not in a reviewable state".into())) + } +} + +async fn apply(cur: Curator, State(st): State, Path(id): Path) -> Result, AppError> { + let result = du_db::change_set::apply(&st.pool, id, &cur.0.display_name).await?; + Ok(Json(json!({ "status": "APPLIED", "result": result }))) +} + +async fn discard(cur: Curator, State(st): State, Path(id): Path) -> Result, AppError> { + if du_db::change_set::discard(&st.pool, id, &cur.0.display_name).await? { + Ok(Json(json!({ "status": "DISCARDED" }))) + } else { + Err(AppError::BadRequest("change set cannot be discarded (already applied?)".into())) + } +} + +async fn list_comments(_cur: Curator, State(st): State, Path(id): Path) -> Result, AppError> { + let d = du_db::change_set::get(&st.pool, id) + .await? + .ok_or_else(|| AppError::NotFound(format!("change set {id}")))?; + Ok(Json(json!(d.comments))) +} + +#[derive(Deserialize)] +struct CommentBody { + comment: String, +} + +async fn add_comment( + cur: Curator, + State(st): State, + Path(id): Path, + Json(b): Json, +) -> Result, AppError> { + let cid = du_db::change_set::add_comment(&st.pool, id, &cur.0.display_name, &b.comment).await?; + Ok(Json(json!({ "id": cid }))) +} + +async fn approve_all(_cur: Curator, State(st): State, Path(id): Path) -> Result, AppError> { + let n = du_db::change_set::approve_all(&st.pool, id).await?; + Ok(Json(json!({ "approved": n }))) +} + +#[derive(Deserialize)] +struct ReviewBody { + approve: bool, +} + +async fn review_change( + _cur: Curator, + State(st): State, + Path((_id, change_id)): Path<(i64, i64)>, + Json(b): Json, +) -> Result, AppError> { + if du_db::change_set::review_change(&st.pool, change_id, b.approve).await? { + Ok(Json(json!({ "status": if b.approve { "APPROVED" } else { "REJECTED" } }))) + } else { + Err(AppError::BadRequest("change cannot be reviewed (set already applied/discarded?)".into())) + } +} + +async fn diff(_cur: Curator, State(st): State, Path(id): Path) -> Result, AppError> { + let d = du_db::change_set::diff(&st.pool, id).await?; + Ok(Json(json!(d))) +} + +// ── merge (Identify-Match-Graft) ───────────────────────────────────────────── + +#[derive(Deserialize)] +struct MergeBody { + source_name: String, + haplogroup_type: String, + #[serde(default)] + roots: Vec, +} + +fn parse_dna(s: &str) -> Result { + match s { + "Y_DNA" => Ok(DnaType::YDna), + "MT_DNA" => Ok(DnaType::MtDna), + other => Err(AppError::BadRequest(format!("haplogroup_type must be Y_DNA or MT_DNA, got {other:?}"))), + } +} + +/// Dry-run: run the merge against the current production tree and return the +/// plan + ambiguities without persisting anything. +async fn merge_preview( + _cur: Curator, + State(st): State, + Json(b): Json, +) -> Result, AppError> { + let dna = parse_dna(&b.haplogroup_type)?; + let existing = du_db::haplogroup::existing_tree(&st.pool, dna).await?; + let plan = du_domain::merge::merge(&existing, &b.roots, &b.source_name); + Ok(Json(json!(plan))) +} + +/// Run the merge and materialize the plan into a READY_FOR_REVIEW change set. +async fn merge_run( + cur: Curator, + State(st): State, + Json(b): Json, +) -> Result, AppError> { + let dna = parse_dna(&b.haplogroup_type)?; + let existing = du_db::haplogroup::existing_tree(&st.pool, dna).await?; + let plan = du_domain::merge::merge(&existing, &b.roots, &b.source_name); + let m = du_db::merge::materialize(&st.pool, &plan, &b.source_name, &b.haplogroup_type, &cur.0.display_name).await?; + Ok(Json(json!({ + "change_set_id": m.change_set_id, + "change_count": m.change_count, + "stats": plan.stats, + "ambiguities": plan.ambiguities, + }))) +} diff --git a/rust/crates/du-web/src/sig.rs b/rust/crates/du-web/src/sig.rs new file mode 100644 index 00000000..fb3acc26 --- /dev/null +++ b/rust/crates/du-web/src/sig.rs @@ -0,0 +1,86 @@ +//! Ed25519 DID-signature authentication for the Edge endpoints (D1 exchange, D2/D5 +//! research, D4 assertions, IBD). Every Edge submission signs a canonical message with a +//! key it controls; the AppView verifies it (no OAuth/cookie per call). +//! +//! `did:key` is self-certifying (verified directly). For `did:plc`/`did:web`, the AppView +//! verifies against the caller's **registered device key(s)** — Ed25519 public keys the +//! client published as `com.decodingus.atmosphere.deviceKey` records in its own repo and +//! the AppView ingested (`du_db::fed::device_key`). The DID doc's `#atproto` signing key is +//! NOT used: it's PDS-custodied, so a desktop client can't sign with it and can't add its +//! own verificationMethod. Registering a device key (writing to your own repo = proof of +//! control over the DID) is the bootstrap; revocation is deleting that record. + +use crate::error::AppError; +use du_db::PgPool; + +/// Verify that `signature` (standard base64 Ed25519) over `message` was produced by a key +/// `did` controls. `did:key` self-certifies; otherwise the signature must match one of the +/// DID's registered device keys. A bad/absent key → 403. +pub async fn verify_signed(pool: &PgPool, did: &str, message: &str, signature: &str) -> Result<(), AppError> { + if did.starts_with("did:key:") { + return du_atproto::verify_did_key(did, message.as_bytes(), signature).map_err(|_| AppError::Forbidden); + } + // did:plc / did:web → match any registered device key (none yet ⇒ not yet bootstrapped). + let keys = du_db::fed::device_key::keys_for(pool, did).await?; + if keys.is_empty() { + return Err(AppError::Forbidden); + } + if keys.iter().any(|k| du_atproto::verify_did_key(k, message.as_bytes(), signature).is_ok()) { + Ok(()) + } else { + Err(AppError::Forbidden) + } +} + +#[cfg(test)] +mod tests { + use super::verify_signed; + use base64::engine::general_purpose::STANDARD; + use base64::Engine; + use ed25519_dalek::{Signer, SigningKey}; + + /// A did:plc caller is verified against its registered device key; an unregistered DID + /// and a wrong-key signature are both rejected; did:key still self-certifies (offline). + #[tokio::test] + async fn device_key_registration_gates_did_plc() { + let Some(url) = std::env::var("DATABASE_URL").ok().filter(|s| !s.is_empty()) else { + eprintln!("DATABASE_URL unset — skipping device-key sig test"); + return; + }; + let db = du_db::testing::ephemeral_db(&url).await.expect("ephemeral db"); + let pool = db.pool().clone(); + + let device = SigningKey::from_bytes(&[51u8; 32]); + let device_did_key = du_atproto::did::did_key_from_ed25519(&device.verifying_key()); + let alice = "did:plc:alice"; + let msg = "ibd-poll\ndid:plc:alice\n1700000000"; + let sig = STANDARD.encode(device.sign(msg.as_bytes()).to_bytes()); + + // Before registration: a perfectly valid signature is still rejected (no key on file). + assert!(verify_signed(&pool, alice, msg, &sig).await.is_err(), "unregistered DID → 403"); + + // Register the device key as the ingest would (a deviceKey record in alice's repo). + sqlx::query( + "INSERT INTO fed.device_key (did, rkey, at_uri, public_key, time_us) VALUES ($1,$2,$3,$4,$5)", + ) + .bind(alice) + .bind("dk1") + .bind("at://did:plc:alice/com.decodingus.atmosphere.deviceKey/dk1") + .bind(&device_did_key) + .bind(1_i64) + .execute(&pool) + .await + .unwrap(); + + // Now the registered key verifies the signature. + assert!(verify_signed(&pool, alice, msg, &sig).await.is_ok(), "registered key verifies"); + + // A signature from a DIFFERENT key (not registered) is rejected. + let other = SigningKey::from_bytes(&[52u8; 32]); + let other_sig = STANDARD.encode(other.sign(msg.as_bytes()).to_bytes()); + assert!(verify_signed(&pool, alice, msg, &other_sig).await.is_err(), "wrong key → 403"); + + // did:key remains self-certifying (no DB lookup needed). + assert!(verify_signed(&pool, &device_did_key, msg, &sig).await.is_ok(), "did:key self-certifies"); + } +} diff --git a/rust/crates/du-web/src/state.rs b/rust/crates/du-web/src/state.rs new file mode 100644 index 00000000..7d6cc3e4 --- /dev/null +++ b/rust/crates/du-web/src/state.rs @@ -0,0 +1,22 @@ +//! Shared application state injected into handlers via `State`. + +use crate::oauth::OauthClient; +use du_db::PgPool; +use std::sync::Arc; +use tower_cookies::Key; + +#[derive(Clone)] +pub struct AppState { + pub pool: PgPool, + /// Signing key for session cookies (derived from APP_SECRET). + pub key: Key, + /// AT Protocol OAuth client (None when OAuth isn't configured). + pub oauth: Option>, +} + +// Lets tower_cookies' SignedCookies pull the Key straight from AppState. +impl axum::extract::FromRef for Key { + fn from_ref(state: &AppState) -> Self { + state.key.clone() + } +} diff --git a/rust/crates/du-web/src/tree_layout.rs b/rust/crates/du-web/src/tree_layout.rs new file mode 100644 index 00000000..b8a40ea9 --- /dev/null +++ b/rust/crates/du-web/src/tree_layout.rs @@ -0,0 +1,380 @@ +//! Server-side phylogenetic-tree layout — the Rust port of the legacy Scala +//! `TreeLayoutService`. Given a (depth-windowed) nested haplogroup tree and an +//! orientation, it computes pixel-ready node boxes and right-angle SVG connector +//! paths so the templates can render an inline `` cladogram with no +//! client-side layout library. +//! +//! Two orientations mirror the two Scala render modes: +//! * **Horizontal** — depth runs left→right, breadth top→bottom. +//! * **Vertical** — depth runs top→bottom, breadth left→right. +//! +//! Unlike the Scala engine we do *not* collapse non-backbone branches: the +//! caller already bounds the tree to a fixed depth window, so we lay out exactly +//! what we are given. Backbone / recently-updated remain as node *coloring*. + +/// 1950 — the radiocarbon "before present" reference year, for ybp→calendar. +const PRESENT_YEAR: i32 = 1950; + +const NODE_WIDTH: f64 = 150.0; +const NODE_HEIGHT: f64 = 80.0; +const MARGIN_TOP: f64 = 50.0; +const MARGIN_LEFT: f64 = 120.0; +/// Sample-tip marker radius and label offset. +const TIP_R: f64 = 4.0; +const TIP_LABEL_DX: f64 = 8.0; +/// Approx. label width reserved past a tip marker (for canvas sizing). +const TIP_LABEL_W: f64 = 130.0; + +/// Tree render orientation; persisted in the `tree_orient` cookie. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum Orientation { + Horizontal, + Vertical, +} + +impl Orientation { + /// `"v"`/`"vertical"` → Vertical, anything else → Horizontal. + pub fn parse(s: &str) -> Orientation { + match s.trim().to_ascii_lowercase().as_str() { + "v" | "vertical" | "true" => Orientation::Vertical, + _ => Orientation::Horizontal, + } + } + pub fn code(self) -> &'static str { + match self { + Orientation::Horizontal => "h", + Orientation::Vertical => "v", + } + } + pub fn is_vertical(self) -> bool { + self == Orientation::Vertical + } + /// (depth spacing, breadth spacing) for this orientation. + fn spacing(self) -> (f64, f64) { + match self { + Orientation::Horizontal => (200.0, 90.0), + Orientation::Vertical => (130.0, 180.0), + } + } +} + +/// Input node fed to the layout engine (nested). +#[derive(Debug, Clone)] +pub struct InNode { + pub name: String, + pub variant_count: i64, + /// Placed-sample leaf labels to hang under this node (capped; YFull-style tips). + pub samples: Vec, + /// How many more placed samples beyond `samples` (renders a "+N" overflow tip). + pub sample_overflow: i64, + pub is_backbone: bool, + pub is_recent: bool, + pub formed_ybp: Option, + pub tmrca_ybp: Option, + /// Window-boundary node with clipped children — show a "+" affordance. + pub has_hidden: bool, + pub children: Vec, +} + +/// A laid-out node box, with pixel coordinates ready for the SVG template. +#[derive(Debug, Clone)] +pub struct LaidNode { + pub name: String, + pub variant_count: i64, + /// CSS class selecting the fill: backbone / recent / default. + pub fill_class: &'static str, + pub is_backbone: bool, + pub is_recent: bool, + pub has_hidden: bool, + /// Formatted calendar year (e.g. "2400 BC"), if an age is set. + pub formed: Option, + pub tmrca: Option, + /// Top-left of the 150×80 box. + pub rect_x: f64, + pub rect_y: f64, + /// Box center (text anchor X). + pub cx: f64, + pub cy: f64, + /// Pre-computed text-baseline Y positions (template stays arithmetic-free). + pub name_y: f64, + pub count_y: f64, + pub age_y: f64, +} + +/// A placed sample rendered as a minimal terminal tip hanging off its haplogroup node. +#[derive(Debug, Clone)] +pub struct LaidTip { + /// The sample label (accession/alias) or, for overflow, `+N`. + pub label: String, + /// Tip marker center. + pub cx: f64, + pub cy: f64, + /// Baseline for the label text (to the right of the marker). + pub label_x: f64, + /// When set, this is the `+N` overflow tip; opens the node's sidebar (the haplogroup name). + pub overflow_node: Option, +} + +/// An SVG connector path between a parent and one child. +#[derive(Debug, Clone)] +pub struct Link { + pub path: String, +} + +/// The full laid-out tree. +#[derive(Debug, Clone)] +pub struct Laid { + pub nodes: Vec, + /// Placed-sample leaf tips (rendered minimal + distinct from the haplogroup boxes). + pub tips: Vec, + pub links: Vec, + pub width: f64, + pub height: f64, +} + +/// ybp → " AD" / " BC" (mirrors the Scala `formatYbp`). +fn format_ybp(ybp: i32) -> String { + let year = PRESENT_YEAR - ybp; + if year < 0 { + format!("{} BC", -year) + } else { + format!("{year} AD") + } +} + +fn fill_class(is_backbone: bool, is_recent: bool) -> &'static str { + if is_backbone { + "node-backbone" + } else if is_recent { + "node-recent" + } else { + "node-default" + } +} + +/// Internal placement of a node: its breadth/depth position in the abstract +/// (depth, breadth) plane before mapping to SVG x/y. +struct Placement { + breadth: f64, + depth: f64, +} + +struct Builder { + orientation: Orientation, + depth_spacing: f64, + breadth_spacing: f64, + nodes: Vec, + tips: Vec, + links: Vec, +} + +impl Builder { + /// Map an abstract (depth, breadth) position to an SVG box center. + fn center(&self, depth: f64, breadth: f64) -> (f64, f64) { + match self.orientation { + // depth → X, breadth → Y + Orientation::Horizontal => (depth, breadth), + // breadth → X, depth → Y + Orientation::Vertical => (breadth, depth), + } + } + + /// Returns the node's placement; `breadth_cursor` is advanced past the + /// node's subtree. + fn place(&mut self, node: &InNode, depth: usize, breadth_cursor: &mut f64) -> Placement { + let depth_pos = depth as f64 * self.depth_spacing + + if self.orientation == Orientation::Horizontal { MARGIN_LEFT } else { MARGIN_TOP }; + + // Lay out children first so a parent can center over them. + let child_placements: Vec = node + .children + .iter() + .map(|c| self.place(c, depth + 1, breadth_cursor)) + .collect(); + + // Lay out the node's placed samples as leaf tips one depth-step out (after the subclade + // children in breadth). Each reserves a **full node slot** — sample tips are spaced like + // any other leaf so their labels never collide. A trailing "+N" tip stands in for the + // overflow and opens the node's sidebar. + let tip_depth = (depth + 1) as f64 * self.depth_spacing + + if self.orientation == Orientation::Horizontal { MARGIN_LEFT } else { MARGIN_TOP }; + let mut tip_breadths: Vec = Vec::new(); + let mut tip_labels: Vec<(String, Option)> = + node.samples.iter().map(|s| (s.clone(), None)).collect(); + if node.sample_overflow > 0 { + tip_labels.push((format!("+{}", node.sample_overflow), Some(node.name.clone()))); + } + for (label, overflow_node) in tip_labels { + let b = *breadth_cursor; + *breadth_cursor += self.breadth_spacing; + let (cx, cy) = self.center(tip_depth, b); + self.tips.push(LaidTip { label, cx, cy, label_x: cx + TIP_LABEL_DX, overflow_node }); + tip_breadths.push(b); + } + + // Center the node over its children *and* its sample tips (tips are leaves too). + let mut spans: Vec = child_placements.iter().map(|p| p.breadth).collect(); + spans.extend(tip_breadths.iter().copied()); + let breadth_pos = match (spans.first(), spans.last()) { + (Some(first), Some(last)) => (first + last) / 2.0, + _ => { + // True leaf (no children, no samples): take the cursor, then advance it. + let b = *breadth_cursor; + *breadth_cursor += self.breadth_spacing; + b + } + }; + + let (cx, cy) = self.center(depth_pos, breadth_pos); + self.nodes.push(LaidNode { + name: node.name.clone(), + variant_count: node.variant_count, + fill_class: fill_class(node.is_backbone, node.is_recent), + is_backbone: node.is_backbone, + is_recent: node.is_recent, + has_hidden: node.has_hidden, + formed: node.formed_ybp.map(format_ybp), + tmrca: node.tmrca_ybp.map(format_ybp), + rect_x: cx - NODE_WIDTH / 2.0, + rect_y: cy - NODE_HEIGHT / 2.0, + cx, + cy, + name_y: cy - 16.0, + count_y: cy + 6.0, + age_y: cy + 26.0, + }); + + // Connectors from this node to each child and to each sample tip. + for child in &child_placements { + self.links.push(Link { + path: self.link_path(depth_pos, breadth_pos, child.depth, child.breadth), + }); + } + for &tb in &tip_breadths { + self.links.push(Link { path: self.tip_link_path(depth_pos, breadth_pos, tip_depth, tb) }); + } + + Placement { breadth: breadth_pos, depth: depth_pos } + } + + /// Connector from a node to one of its sample tips. The vertical bus is placed at the + /// **same** position as the node's child connectors (midpoint to the box edge), then the + /// final stub continues out to the tip marker — so a node's subclade and sample links share + /// one clean bus instead of two misaligned bars. + fn tip_link_path(&self, p_depth: f64, p_breadth: f64, c_depth: f64, c_breadth: f64) -> String { + match self.orientation { + Orientation::Horizontal => { + let sx = p_depth + NODE_WIDTH / 2.0; + let bus = (sx + (c_depth - NODE_WIDTH / 2.0)) / 2.0; + let tx = c_depth - TIP_R; + format!("M {sx:.1} {p_breadth:.1} H {bus:.1} V {c_breadth:.1} H {tx:.1}") + } + Orientation::Vertical => { + let sy = p_depth + NODE_HEIGHT / 2.0; + let bus = (sy + (c_depth - NODE_HEIGHT / 2.0)) / 2.0; + let ty = c_depth - TIP_R; + format!("M {p_breadth:.1} {sy:.1} V {bus:.1} H {c_breadth:.1} V {ty:.1}") + } + } + } + + /// Right-angle connector from a parent (depth/breadth) to a child, in the + /// orientation's SVG coordinate space. + fn link_path(&self, p_depth: f64, p_breadth: f64, c_depth: f64, c_breadth: f64) -> String { + match self.orientation { + Orientation::Horizontal => { + let sx = p_depth + NODE_WIDTH / 2.0; + let tx = c_depth - NODE_WIDTH / 2.0; + let mid = (sx + tx) / 2.0; + format!("M {sx:.1} {p_breadth:.1} H {mid:.1} V {c_breadth:.1} H {tx:.1}") + } + Orientation::Vertical => { + let sy = p_depth + NODE_HEIGHT / 2.0; + let ty = c_depth - NODE_HEIGHT / 2.0; + let mid = (sy + ty) / 2.0; + format!("M {p_breadth:.1} {sy:.1} V {mid:.1} H {c_breadth:.1} V {ty:.1}") + } + } + } +} + +/// Lay out the given tree for the orientation. Returns `None` if `root` is None. +pub fn layout(root: Option<&InNode>, orientation: Orientation) -> Option { + let root = root?; + let (depth_spacing, breadth_spacing) = orientation.spacing(); + let initial_breadth = if orientation == Orientation::Horizontal { MARGIN_TOP } else { MARGIN_LEFT }; + let mut b = Builder { + orientation, + depth_spacing, + breadth_spacing, + nodes: Vec::new(), + tips: Vec::new(), + links: Vec::new(), + }; + let mut cursor = initial_breadth; + b.place(root, 0, &mut cursor); + + let node_cx = b.nodes.iter().map(|n| n.cx).fold(0.0_f64, f64::max); + let tip_cx = b.tips.iter().map(|t| t.label_x + TIP_LABEL_W).fold(0.0_f64, f64::max); + let max_cx = node_cx.max(tip_cx); + let node_cy = b.nodes.iter().map(|n| n.cy).fold(0.0_f64, f64::max); + let tip_cy = b.tips.iter().map(|t| t.cy).fold(0.0_f64, f64::max); + let max_cy = node_cy.max(tip_cy); + let width = max_cx + NODE_WIDTH / 2.0 + MARGIN_LEFT; + let height = max_cy + NODE_HEIGHT / 2.0 + MARGIN_TOP; + + Some(Laid { nodes: b.nodes, tips: b.tips, links: b.links, width, height }) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn leaf(name: &str) -> InNode { + InNode { + name: name.into(), + variant_count: 0, + samples: vec![], + sample_overflow: 0, + is_backbone: false, + is_recent: false, + formed_ybp: None, + tmrca_ybp: None, + has_hidden: false, + children: vec![], + } + } + + #[test] + fn parent_centers_over_children() { + let root = InNode { children: vec![leaf("A"), leaf("B")], ..leaf("R") }; + let laid = layout(Some(&root), Orientation::Horizontal).unwrap(); + assert_eq!(laid.nodes.len(), 3); + assert_eq!(laid.links.len(), 2); + // Root (pushed last, after its children) sits at the breadth midpoint. + let root_node = laid.nodes.iter().find(|n| n.name == "R").unwrap(); + let a = laid.nodes.iter().find(|n| n.name == "A").unwrap(); + let bb = laid.nodes.iter().find(|n| n.name == "B").unwrap(); + // Horizontal: breadth is the Y axis. + assert!((root_node.cy - (a.cy + bb.cy) / 2.0).abs() < 0.01); + // Children are one depth-step to the right of the root. + assert!(a.cx > root_node.cx && bb.cx > root_node.cx); + } + + #[test] + fn vertical_swaps_axes() { + let root = InNode { children: vec![leaf("A"), leaf("B")], ..leaf("R") }; + let laid = layout(Some(&root), Orientation::Vertical).unwrap(); + let root_node = laid.nodes.iter().find(|n| n.name == "R").unwrap(); + let a = laid.nodes.iter().find(|n| n.name == "A").unwrap(); + // Vertical: depth is the Y axis — children sit below the root. + assert!(a.cy > root_node.cy); + assert!((root_node.cx - a.cx).abs() < 200.0); + } + + #[test] + fn ybp_formatting() { + assert_eq!(format_ybp(2000), "50 BC"); // 1950 - 2000 = -50 + assert_eq!(format_ybp(1000), "950 AD"); + } +} diff --git a/rust/crates/du-web/templates/account/profile.html b/rust/crates/du-web/templates/account/profile.html new file mode 100644 index 00000000..f008808f --- /dev/null +++ b/rust/crates/du-web/templates/account/profile.html @@ -0,0 +1,22 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("profile.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +
+

{{ t.get("profile.title") }}

+{% if saved %}
{{ t.get("profile.saved") }}
{% endif %} +
+
{{ t.get("profile.roles") }}
{{ roles }}
+ {% if let Some(h) = handle %}
{{ t.get("profile.handle") }}
{{ h }}
{% endif %} + {% if let Some(d) = did %}
DID
{{ d }}
{% endif %} + {% if let Some(e) = email %}
{{ t.get("profile.email") }}
{{ e }}
{% endif %} +
{{ t.get("profile.member_since") }}
{{ member_since }}
+
+
+ +
+ + +
+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/auth/login.html b/rust/crates/du-web/templates/auth/login.html new file mode 100644 index 00000000..57e8e198 --- /dev/null +++ b/rust/crates/du-web/templates/auth/login.html @@ -0,0 +1,22 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("auth.login.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +
+
+

{{ t.get("auth.login.title") }}

+ {% if error %}
{{ t.get("auth.login.error") }}
{% endif %} + {# Full POST (not boosted) so the Set-Cookie + redirect are handled by the browser. #} +
+
+ + +
+
+ + +
+ +
+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/base.html b/rust/crates/du-web/templates/base.html new file mode 100644 index 00000000..ad97a52a --- /dev/null +++ b/rust/crates/du-web/templates/base.html @@ -0,0 +1,124 @@ + + + + + + {% block title %}{{ t.get("app.name") }}{% endblock %} + + + + + {% block head %}{% endblock %} + + + + +
+ {% block content %}{% endblock %} +
+ + + + {# GDPR cookie-consent banner — shown by JS only when no consent cookie is set. #} + + + + {% block scripts %}{% endblock %} + + diff --git a/rust/crates/du-web/templates/biosamples/map.html b/rust/crates/du-web/templates/biosamples/map.html new file mode 100644 index 00000000..a84aca1f --- /dev/null +++ b/rust/crates/du-web/templates/biosamples/map.html @@ -0,0 +1,15 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("map.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block head %} + + + + +{% endblock %} +{% block content %} +
+

{{ t.get("map.title") }}

+ {{ t.get("map.samples") }} +
+
{{ t.get("map.loading") }}
+{% endblock %} diff --git a/rust/crates/du-web/templates/coverage/benchmarks.html b/rust/crates/du-web/templates/coverage/benchmarks.html new file mode 100644 index 00000000..a0ee7661 --- /dev/null +++ b/rust/crates/du-web/templates/coverage/benchmarks.html @@ -0,0 +1,37 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("coverage.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +
+

{{ t.get("coverage.title") }}

+ {{ t.get("coverage.labs.byLab") }} +
+ + + + + + + + + + + + + {% for r in rows %} + + + + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("coverage.col.lab") }}{{ t.get("coverage.col.testType") }}{{ t.get("coverage.col.libraries") }}{{ t.get("coverage.col.meanDepth") }}{{ t.get("coverage.col.cov10x") }}{{ t.get("coverage.col.expectedDepth") }}
{{ r.lab }}{{ r.test_type }}{{ r.libraries }} + {{ r.mean_depth }} + {% if r.meets %}{% else %}!{% endif %} + {{ r.cov_10x }}{{ r.expected }}
{{ t.get("coverage.none") }}
+{% endblock %} diff --git a/rust/crates/du-web/templates/coverage/lab_rows.html b/rust/crates/du-web/templates/coverage/lab_rows.html new file mode 100644 index 00000000..128743c2 --- /dev/null +++ b/rust/crates/du-web/templates/coverage/lab_rows.html @@ -0,0 +1,28 @@ +{# Fragment: one lab's coverage by test type. Target of #lab-detail. #} +
+
{{ lab }}
+
+ + + + + + + + + + {% for r in rows %} + + + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("coverage.col.testType") }}{{ t.get("coverage.col.libraries") }}{{ t.get("coverage.col.meanDepth") }}{{ t.get("coverage.col.cov10x") }}{{ t.get("coverage.col.expectedDepth") }}
{{ r.test_type }}{{ r.libraries }}{{ r.mean_depth }} {% if r.meets %}{% else %}!{% endif %}{{ r.cov_10x }}{{ r.expected }}
{{ t.get("coverage.none") }}
+
+
diff --git a/rust/crates/du-web/templates/coverage/labs.html b/rust/crates/du-web/templates/coverage/labs.html new file mode 100644 index 00000000..5efc5d83 --- /dev/null +++ b/rust/crates/du-web/templates/coverage/labs.html @@ -0,0 +1,33 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("coverage.labs.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +
+

{{ t.get("coverage.labs.title") }}

+ {{ t.get("coverage.labs.allLabs") }} +
+
+
+ + + + + + + + {% for l in labs %} + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("coverage.col.lab") }}{{ t.get("coverage.col.libraries") }}{{ t.get("coverage.labs.testTypes") }}
{{ l.lab }}{{ l.libraries }}{{ l.test_types }}
{{ t.get("coverage.none") }}
+
+
+

{{ t.get("coverage.labs.select") }}

+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/curator/change-sets/detail.html b/rust/crates/du-web/templates/curator/change-sets/detail.html new file mode 100644 index 00000000..22dfec5f --- /dev/null +++ b/rust/crates/du-web/templates/curator/change-sets/detail.html @@ -0,0 +1,111 @@ +{# Fragment: change-set review panel. Target of #change-set-detail. #} +
+
+ {{ cs.source }} + {{ cs.status }} +
+
+ {% if let Some(n) = cs.notice %} +
{{ n }}
+ {% endif %} + +
+
{{ t.get("cs.col.type") }}
{{ cs.dna_type }}
+
{{ t.get("cs.col.changes") }}
{{ cs.change_count }}
+
{{ t.get("cs.col.created") }}
{{ cs.created_by }} · {{ cs.created_at }}
+ {% if let Some(pb) = cs.promoted_by %} +
{{ t.get("cs.finalized") }}
+
{{ pb }}{% if let Some(pa) = cs.promoted_at %} · {{ pa }}{% endif %}
+ {% endif %} +
+ {% if cs.description != "" %}

{{ cs.description }}

{% endif %} + + {# diff summary #} +
+ +{{ cs.added }} {{ t.get("cs.diff.added") }} + -{{ cs.removed }} {{ t.get("cs.diff.removed") }} + ~{{ cs.modified }} {{ t.get("cs.diff.modified") }} + ⤳{{ cs.reparented }} {{ t.get("cs.diff.reparented") }} +
+ {% if !cs.diff.is_empty() %} +
+ {{ t.get("cs.diff.detail") }} +
    + {% for d in cs.diff %} +
  • + {{ d.diff_type }} {{ d.name }} +
    {{ d.detail }}
    +
  • + {% endfor %} +
+
+ {% endif %} + + {# per-change review #} +
{{ t.get("cs.changes") }}
+ + + {% for c in cs.changes %} + + + + + {% else %} + + {% endfor %} + +
+ {{ c.change_type }} + {{ c.name }} + {% if c.new_values != "" %} +
{{ t.get("cs.values") }}
{{ c.new_values }}
+ {% endif %} +
+ {{ c.status }} + {% if cs.can_review %} + + + + + {% endif %} +
{{ t.get("cs.changes.none") }}
+ + {# lifecycle actions #} +
+ {% if cs.can_start %} + + {% endif %} + {% if cs.can_review %} + + {% endif %} + {% if cs.can_apply %} + + {% endif %} + {% if cs.can_discard %} + + {% endif %} +
+ + {# comments #} +
{{ t.get("cs.comments") }}
+ {% for c in cs.comments %} +
{{ c.by }} {{ c.at }}
{{ c.comment }}
+ {% else %} +

{{ t.get("cs.comments.none") }}

+ {% endfor %} +
+ + +
+
+
diff --git a/rust/crates/du-web/templates/curator/change-sets/list.html b/rust/crates/du-web/templates/curator/change-sets/list.html new file mode 100644 index 00000000..a6c434d8 --- /dev/null +++ b/rust/crates/du-web/templates/curator/change-sets/list.html @@ -0,0 +1,34 @@ +{# Fragment: change-set rows. Target of #change-set-table. #} + + + + + + + + + + {% for r in list.rows %} + + + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("cs.col.source") }}{{ t.get("cs.col.type") }}{{ t.get("cs.col.status") }}{{ t.get("cs.col.changes") }}{{ t.get("cs.col.created") }}
{{ r.source }}{{ r.dna_type }}{{ r.status }}{{ r.change_count }}{{ r.created_by }} · {{ r.created_at }}
{{ t.get("cs.none") }}
+{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/curator/change-sets/page.html b/rust/crates/du-web/templates/curator/change-sets/page.html new file mode 100644 index 00000000..4b47450e --- /dev/null +++ b/rust/crates/du-web/templates/curator/change-sets/page.html @@ -0,0 +1,27 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("cs.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +

{{ t.get("cs.title") }}

+
+
+ +
+ {% include "curator/change-sets/list.html" %} +
+
+
+

{{ t.get("cs.select") }}

+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/curator/dashboard.html b/rust/crates/du-web/templates/curator/dashboard.html new file mode 100644 index 00000000..2f3fcdd9 --- /dev/null +++ b/rust/crates/du-web/templates/curator/dashboard.html @@ -0,0 +1,38 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("curator.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +

{{ t.get("curator.title") }}

+

{{ t.get("curator.welcome") }} {{ display_name }} · {{ t.get("curator.roles") }}: {{ roles }}

+ +{% endblock %} diff --git a/rust/crates/du-web/templates/curator/denovo-conflicts/list.html b/rust/crates/du-web/templates/curator/denovo-conflicts/list.html new file mode 100644 index 00000000..df545f1a --- /dev/null +++ b/rust/crates/du-web/templates/curator/denovo-conflicts/list.html @@ -0,0 +1,38 @@ +{# Fragment: de-novo conflict rows. Target of #dc-table. #} + + + + + + + + + + + + {% for r in list.rows %} + + + + + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("dc.col.lineage") }}{{ t.get("dc.col.clade") }}{{ t.get("dc.col.tips") }}{{ t.get("dc.col.magnitude") }}{{ t.get("dc.col.home") }}{{ t.get("dc.col.foreign") }}{{ t.get("dc.col.away") }}
{{ r.lineage }}{{ r.clade }}{% if !r.label.is_empty() %} {{ r.label }}{% endif %}{{ r.n_tips }}{{ r.magnitude }}{{ r.home_node }}{% if r.foreign_in > 0 %}{{ r.foreign_in }}{% else %}—{% endif %}{% if r.members_away > 0 %}{{ r.members_away }}{% else %}—{% endif %}
{{ t.get("dc.none") }}
+{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/curator/denovo-conflicts/page.html b/rust/crates/du-web/templates/curator/denovo-conflicts/page.html new file mode 100644 index 00000000..49142b5e --- /dev/null +++ b/rust/crates/du-web/templates/curator/denovo-conflicts/page.html @@ -0,0 +1,17 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("dc.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +

{{ t.get("dc.title") }}

+

{{ t.get("dc.intro") }}

+ +
+ {% include "curator/denovo-conflicts/list.html" %} +
+{% endblock %} diff --git a/rust/crates/du-web/templates/curator/haplogroups/detail.html b/rust/crates/du-web/templates/curator/haplogroups/detail.html new file mode 100644 index 00000000..cd802570 --- /dev/null +++ b/rust/crates/du-web/templates/curator/haplogroups/detail.html @@ -0,0 +1,57 @@ +{# Fragment: haplogroup detail + actions. Target of #hg-detail. #} +
+
+ {{ hg.name }} + {{ hg.dna }} +
+
+ {% if let Some(err) = error %}
{{ err }}
{% endif %} +
+
{{ t.get("hg.field.lineage") }}
{{ hg.lineage }}
+
{{ t.get("hg.field.source") }}
{{ hg.source }}
+ {% if let Some(p) = hg.parent_name %}
{{ t.get("hg.parent") }}
{{ p }}
{% endif %} +
{{ t.get("hg.field.formed") }}
{{ hg.formed_ybp }}
+
{{ t.get("hg.field.tmrca") }}
{{ hg.tmrca_ybp }}
+
+
+ + {% if can_delete %} + + {% endif %} +
+ + {# ── structural ops ──────────────────────────────────────────────────── #} +
+ {{ t.get("hg.restructure") }} +
+ {# reparent #} +
+ + +
+ + {# merge into parent #} + {% if let Some(p) = hg.parent_name %} + + {% endif %} + + {# split: move some variants to a new child #} + {% if !hg.variants.is_empty() %} +
+
{{ t.get("hg.split.variants") }}: {% for v in hg.variants %}{{ v }}{% if !loop.last %}, {% endif %}{% endfor %}
+
+ + + +
+
+ {% endif %} +
+
+
+
diff --git a/rust/crates/du-web/templates/curator/haplogroups/empty.html b/rust/crates/du-web/templates/curator/haplogroups/empty.html new file mode 100644 index 00000000..29ca6a14 --- /dev/null +++ b/rust/crates/du-web/templates/curator/haplogroups/empty.html @@ -0,0 +1,2 @@ +{# Shown in #hg-detail after a deletion. #} +

{{ t.get("hg.select") }}

diff --git a/rust/crates/du-web/templates/curator/haplogroups/form.html b/rust/crates/du-web/templates/curator/haplogroups/form.html new file mode 100644 index 00000000..0965a3a0 --- /dev/null +++ b/rust/crates/du-web/templates/curator/haplogroups/form.html @@ -0,0 +1,48 @@ +{# Fragment: create/edit form. Posts to {action}, swaps #hg-detail with the saved + panel; the server's HX-Trigger then reloads the list. Target of #hg-detail. #} +
+
{% if is_edit %}{{ t.get("hg.edit") }}{% else %}{{ t.get("hg.new") }}{% endif %}
+
+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+ + +
+
+ + +
+
+
+ + {% if is_edit %} + + {% else %} + + {% endif %} +
+
+
+
diff --git a/rust/crates/du-web/templates/curator/haplogroups/list.html b/rust/crates/du-web/templates/curator/haplogroups/list.html new file mode 100644 index 00000000..09b7e947 --- /dev/null +++ b/rust/crates/du-web/templates/curator/haplogroups/list.html @@ -0,0 +1,30 @@ +{# Fragment: haplogroup rows + pager. Target of #hg-table. #} + + + + + + {% for r in list.rows %} + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("hg.col.name") }}{{ t.get("hg.col.type") }}{{ t.get("hg.col.lineage") }}
{{ r.name }}{{ r.dna }}{{ r.lineage }}
{{ t.get("hg.none") }}
+{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/curator/haplogroups/page.html b/rust/crates/du-web/templates/curator/haplogroups/page.html new file mode 100644 index 00000000..a1c83fd3 --- /dev/null +++ b/rust/crates/du-web/templates/curator/haplogroups/page.html @@ -0,0 +1,39 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("hg.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +
+

{{ t.get("hg.title") }}

+ +
+
+
+
+ + +
+ {# Reloads on load and whenever a mutation fires hg-changed on the body. #} +
+ {% include "curator/haplogroups/list.html" %} +
+
+
+
+

{{ t.get("hg.select") }}

+
+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/curator/instrument-proposals/detail.html b/rust/crates/du-web/templates/curator/instrument-proposals/detail.html new file mode 100644 index 00000000..a0f0f5c4 --- /dev/null +++ b/rust/crates/du-web/templates/curator/instrument-proposals/detail.html @@ -0,0 +1,79 @@ +{# Fragment: proposal review panel. Target of #proposal-detail. #} +
+
+ {% if p.resolved %}{{ t.get("ip.resolved") }}{% else %}{{ p.instrument_id }}{% endif %} + {% if !p.resolved %}{{ p.status }}{% endif %} +
+
+ {% if let Some(n) = p.notice %}
{{ n }}
{% endif %} + + {% if p.resolved %} +

{{ t.get("ip.resolved.note") }}

+ {% else %} +
+
{{ t.get("ip.col.lab") }}
{{ p.proposed_lab }}
+
{{ t.get("ip.obs") }}
{{ p.obs_count }}
+
{{ t.get("ip.citizens") }}
{{ p.citizen_count }}
+
{{ t.get("ip.col.confidence") }}
{{ p.confidence }}
+
+ +

{{ t.get("ip.observations") }}

+ + + + + + + + + {% for o in p.observations %} + + + + + + + {% endfor %} + +
{{ t.get("ip.col.lab") }}{{ t.get("ip.col.platform") }}{{ t.get("ip.col.model") }}{{ t.get("ip.col.citizen") }}
{{ o.lab }}{{ o.platform }}{{ o.model }}{{ o.citizen }}
+ + {% if p.actionable %} +
+
+
+
+ + +
+
+
+ + +
+
+ + +
+
+
+ + +
+ + {{ t.get("ip.accept.hint") }} +
+
+
+
+ + + +
+
+
+ {% endif %} + {% endif %} +
+
diff --git a/rust/crates/du-web/templates/curator/instrument-proposals/list.html b/rust/crates/du-web/templates/curator/instrument-proposals/list.html new file mode 100644 index 00000000..bd503a5e --- /dev/null +++ b/rust/crates/du-web/templates/curator/instrument-proposals/list.html @@ -0,0 +1,46 @@ +{# Fragment: proposal rows + status filter. Target of #proposal-table. #} +
+ + + + + +
+ + + + + + + + + + {% for r in list.rows %} + + + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("ip.col.instrument") }}{{ t.get("ip.col.lab") }}{{ t.get("ip.col.support") }}{{ t.get("ip.col.confidence") }}{{ t.get("ip.col.status") }}
{{ r.instrument_id }}{{ r.lab }}{{ r.obs }} / {{ r.citizens }}{{ r.confidence }}{{ r.status }}
{{ t.get("ip.none") }}
+{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/curator/instrument-proposals/page.html b/rust/crates/du-web/templates/curator/instrument-proposals/page.html new file mode 100644 index 00000000..ff2e0b6c --- /dev/null +++ b/rust/crates/du-web/templates/curator/instrument-proposals/page.html @@ -0,0 +1,18 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("ip.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +

{{ t.get("ip.title") }}

+

{{ t.get("ip.intro") }}

+
+
+
+ {% include "curator/instrument-proposals/list.html" %} +
+
+
+

{{ t.get("ip.select") }}

+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/curator/naming/detail.html b/rust/crates/du-web/templates/curator/naming/detail.html new file mode 100644 index 00000000..8a2af870 --- /dev/null +++ b/rust/crates/du-web/templates/curator/naming/detail.html @@ -0,0 +1,47 @@ +{# Fragment: variant naming panel. Target of #naming-detail. #} +
+
+ {% if let Some(n) = v.name %}{{ n }}{% else %}{{ t.get("nm.unnamed") }}{% endif %} + {{ v.status }} +
+
+ {% if let Some(n) = v.notice %}
{{ n }}
{% endif %} + +
+
{{ t.get("nm.coord") }}
{{ v.coord }}
+
{{ t.get("nm.type") }}
{{ v.mutation_type }}
+ {% if let Some(d) = v.defining %}
{{ t.get("nm.defines") }}
{{ d }}
{% endif %} + {% if !v.aliases.is_empty() %} +
{{ t.get("nm.aliases") }}
+
{% for a in v.aliases %}{{ a }}{% if !loop.last %}, {% endif %}{% endfor %}
+ {% endif %} +
+ + {% if !v.dedup.is_empty() %} +
+ {{ t.get("nm.dedup.warn") }} + {% for c in v.dedup %}{{ c.name }}{% if !loop.last %}, {% endif %}{% endfor %} +
{{ t.get("nm.dedup.hint") }}
+
+ {% endif %} + + {% if v.can_assign %} +
+ + {% if v.status != "PENDING_REVIEW" %} + + {% else %} + + {% endif %} +
+ {% else %} +

{{ t.get("nm.named_note") }}

+ {% endif %} +
+
diff --git a/rust/crates/du-web/templates/curator/naming/list.html b/rust/crates/du-web/templates/curator/naming/list.html new file mode 100644 index 00000000..8f2de2c2 --- /dev/null +++ b/rust/crates/du-web/templates/curator/naming/list.html @@ -0,0 +1,32 @@ +{# Fragment: naming-queue rows. Target of #naming-table. #} + + + + + + + + + {% for r in list.rows %} + + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("nm.col.name") }}{{ t.get("nm.col.coord") }}{{ t.get("nm.col.defines") }}{{ t.get("nm.col.status") }}
{% if r.name == "(unnamed)" %}{{ r.name }}{% else %}{{ r.name }}{% endif %}{{ r.coord }}{{ r.defining }}{{ r.status }}
{{ t.get("nm.none") }}
+{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/curator/naming/page.html b/rust/crates/du-web/templates/curator/naming/page.html new file mode 100644 index 00000000..44882970 --- /dev/null +++ b/rust/crates/du-web/templates/curator/naming/page.html @@ -0,0 +1,27 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("nm.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +

{{ t.get("nm.title") }}

+

{{ t.get("nm.intro") }}

+
+
+ +
+ {% include "curator/naming/list.html" %} +
+
+
+

{{ t.get("nm.select") }}

+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/curator/proposals/detail.html b/rust/crates/du-web/templates/curator/proposals/detail.html new file mode 100644 index 00000000..2e3069c8 --- /dev/null +++ b/rust/crates/du-web/templates/curator/proposals/detail.html @@ -0,0 +1,53 @@ +{# Fragment: proposal detail + review actions. Target of #proposal-detail. #} +
+
+ {{ row.name }} + {{ row.status }} +
+
+ {% if split %} +
{{ t.get("prop.split.note") }}
+ {% endif %} +
+
{{ t.get("prop.col.parent") }}
{{ row.parent }}
+
{{ t.get("prop.col.dna") }}
{{ row.dna }}
+
{{ t.get("prop.col.evidence") }}
{{ row.evidence_count }}
+
{{ t.get("prop.col.submitters") }}
{{ row.submitter_count }}
+
{{ t.get("prop.col.confidence") }}
{{ row.confidence }}
+
+ + {% if !variants.is_empty() %} +
{{ t.get("prop.variants") }}
+ + + + {% for v in variants %} + + {% endfor %} + +
{{ t.get("prop.var.name") }}{{ t.get("prop.var.support") }}
{{ v.name }}{{ v.support }}
+ {% endif %} + +
{{ t.get("prop.evidence") }}
+ {% for e in evidence %} +
{{ e }}
+ {% endfor %} + + {% if open %} +
+ +
+ + + +
+
+ {% endif %} + {% if accepted %} + + {% endif %} +
+
diff --git a/rust/crates/du-web/templates/curator/proposals/list.html b/rust/crates/du-web/templates/curator/proposals/list.html new file mode 100644 index 00000000..e269c41c --- /dev/null +++ b/rust/crates/du-web/templates/curator/proposals/list.html @@ -0,0 +1,33 @@ +{# Fragment: proposal rows. Target of #proposal-table. #} + + + + + + + + + {% for r in list.rows %} + + + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("prop.col.name") }}{{ t.get("prop.col.parent") }}{{ t.get("prop.col.status") }}E/S{{ t.get("prop.col.confidence") }}
{{ r.name }}{{ r.parent }}{{ r.status }}{{ r.evidence_count }}/{{ r.submitter_count }}{{ r.confidence }}
{{ t.get("prop.none") }}
+{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/curator/proposals/page.html b/rust/crates/du-web/templates/curator/proposals/page.html new file mode 100644 index 00000000..7c194439 --- /dev/null +++ b/rust/crates/du-web/templates/curator/proposals/page.html @@ -0,0 +1,26 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("prop.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +

{{ t.get("prop.title") }}

+
+
+ +
+ {% include "curator/proposals/list.html" %} +
+
+
+

{{ t.get("prop.select") }}

+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/curator/publications/detail.html b/rust/crates/du-web/templates/curator/publications/detail.html new file mode 100644 index 00000000..37a661c3 --- /dev/null +++ b/rust/crates/du-web/templates/curator/publications/detail.html @@ -0,0 +1,47 @@ +{# Fragment: candidate review panel. Target of #candidate-detail. #} +
+
+ {{ c.title }} + {{ c.status }} +
+
+ {% if let Some(n) = c.notice %} +
{{ n }}
+ {% endif %} + +
+
{{ t.get("pc.journal") }}
{{ c.journal }}
+
{{ t.get("pc.date") }}
{{ c.date }}
+
{{ t.get("pc.relevance") }}
{{ c.relevance }}
+ {% if let Some(doi) = c.doi %} +
DOI
+
{% if let Some(url) = c.doi_url %}{{ doi }}{% else %}{{ doi }}{% endif %}
+ {% endif %} +
OpenAlex
{{ c.openalex_id }}
+
+ + {% if let Some(a) = c.abstract_text %} +
+ {{ t.get("pc.abstract") }} +

{{ a }}

+
+ {% endif %} + + {% if c.can_act %} +
+ + + +
+ {% else %} +

{{ t.get("pc.accepted_note") }}

+ {% endif %} +
+
diff --git a/rust/crates/du-web/templates/curator/publications/list.html b/rust/crates/du-web/templates/curator/publications/list.html new file mode 100644 index 00000000..e05677d7 --- /dev/null +++ b/rust/crates/du-web/templates/curator/publications/list.html @@ -0,0 +1,32 @@ +{# Fragment: candidate rows. Target of #candidate-table. #} + + + + + + + + + {% for r in list.rows %} + + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("pc.col.title") }}{{ t.get("pc.col.date") }}{{ t.get("pc.col.status") }}{{ t.get("pc.col.relevance") }}
{{ r.title }}
{{ r.journal }}
{{ r.date }}{{ r.status }}{{ r.relevance }}
{{ t.get("pc.none") }}
+{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/curator/publications/page.html b/rust/crates/du-web/templates/curator/publications/page.html new file mode 100644 index 00000000..b9c805b3 --- /dev/null +++ b/rust/crates/du-web/templates/curator/publications/page.html @@ -0,0 +1,27 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("pc.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +

{{ t.get("pc.title") }}

+

{{ t.get("pc.intro") }}

+
+
+ +
+ {% include "curator/publications/list.html" %} +
+
+
+

{{ t.get("pc.select") }}

+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/curator/reconcile-flags/detail.html b/rust/crates/du-web/templates/curator/reconcile-flags/detail.html new file mode 100644 index 00000000..766fc1e2 --- /dev/null +++ b/rust/crates/du-web/templates/curator/reconcile-flags/detail.html @@ -0,0 +1,36 @@ +{# Fragment: reconcile-flag merge panel. Target of #flag-detail. #} +
+
{% if f.resolved %}{{ t.get("rf.resolved") }}{% else %}{{ f.locus }}{% endif %}
+
+ {% if let Some(n) = f.notice %}
{{ n }}
{% endif %} + + {% if f.resolved %} +

{{ t.get("rf.resolved.note") }}

+ {% else %} +

{{ t.get("rf.synonyms") }}: {{ f.names }}

+

{{ t.get("rf.split_note") }}

+ +
+ + + + + + + + {% for v in f.variants %} + + + + + + {% endfor %} + +
{{ t.get("rf.keep") }}{{ t.get("rf.col.canonical") }}{{ t.get("rf.col.defines") }}
{{ v.canonical }}{{ v.defines }}
+ + {{ t.get("rf.merge.hint") }} +
+ {% endif %} +
+
diff --git a/rust/crates/du-web/templates/curator/reconcile-flags/list.html b/rust/crates/du-web/templates/curator/reconcile-flags/list.html new file mode 100644 index 00000000..55a3c390 --- /dev/null +++ b/rust/crates/du-web/templates/curator/reconcile-flags/list.html @@ -0,0 +1,30 @@ +{# Fragment: reconcile-flag rows. Target of #flag-table. #} + + + + + + + + {% for r in list.rows %} + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("rf.col.locus") }}{{ t.get("rf.col.names") }}{{ t.get("rf.col.variants") }}
{{ r.locus }}{{ r.names }}{{ r.variant_count }}
{{ t.get("rf.none") }}
+{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/curator/reconcile-flags/page.html b/rust/crates/du-web/templates/curator/reconcile-flags/page.html new file mode 100644 index 00000000..1afe2e3d --- /dev/null +++ b/rust/crates/du-web/templates/curator/reconcile-flags/page.html @@ -0,0 +1,18 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("rf.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +

{{ t.get("rf.title") }}

+

{{ t.get("rf.intro") }}

+
+
+
+ {% include "curator/reconcile-flags/list.html" %} +
+
+
+

{{ t.get("rf.select") }}

+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/curator/regions/detail.html b/rust/crates/du-web/templates/curator/regions/detail.html new file mode 100644 index 00000000..fa752a13 --- /dev/null +++ b/rust/crates/du-web/templates/curator/regions/detail.html @@ -0,0 +1,18 @@ +{# Fragment: region detail + actions. Target of #region-detail. #} +
+
+ {{ name }} + {{ region_type }} +
+
+
{{ t.get("region.field.coordinates") }}
+
{{ coordinates }}
+
{{ t.get("region.field.properties") }}
+
{{ properties }}
+
+ + +
+
+
diff --git a/rust/crates/du-web/templates/curator/regions/empty.html b/rust/crates/du-web/templates/curator/regions/empty.html new file mode 100644 index 00000000..6401f8c6 --- /dev/null +++ b/rust/crates/du-web/templates/curator/regions/empty.html @@ -0,0 +1 @@ +

{{ t.get("region.select") }}

diff --git a/rust/crates/du-web/templates/curator/regions/form.html b/rust/crates/du-web/templates/curator/regions/form.html new file mode 100644 index 00000000..51da44ba --- /dev/null +++ b/rust/crates/du-web/templates/curator/regions/form.html @@ -0,0 +1,35 @@ +{# Fragment: region create/edit form with JSON textareas. Target of #region-detail. #} +
+
{% if is_edit %}{{ t.get("region.edit") }}{% else %}{{ t.get("region.new") }}{% endif %}
+
+ {% if let Some(err) = error %}
{{ err }}
{% endif %} +
+
+
+ + +
+
+ + +
+
+
+ + +
+
+ + +
+
+ + {% if is_edit %} + + {% else %} + + {% endif %} +
+
+
+
diff --git a/rust/crates/du-web/templates/curator/regions/list.html b/rust/crates/du-web/templates/curator/regions/list.html new file mode 100644 index 00000000..e6a30d61 --- /dev/null +++ b/rust/crates/du-web/templates/curator/regions/list.html @@ -0,0 +1,26 @@ +{# Fragment: region rows + pager. Target of #region-table. #} + + + + {% for r in list.rows %} + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("region.col.type") }}{{ t.get("region.col.name") }}{{ t.get("region.col.builds") }}
{{ r.region_type }}{{ r.name }}{{ r.builds }}
{{ t.get("region.none") }}
+{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/curator/regions/page.html b/rust/crates/du-web/templates/curator/regions/page.html new file mode 100644 index 00000000..903e19f3 --- /dev/null +++ b/rust/crates/du-web/templates/curator/regions/page.html @@ -0,0 +1,27 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("region.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +
+

{{ t.get("region.title") }}

+ +
+
+
+ +
+ {% include "curator/regions/list.html" %} +
+
+
+

{{ t.get("region.select") }}

+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/curator/variants/detail.html b/rust/crates/du-web/templates/curator/variants/detail.html new file mode 100644 index 00000000..1e27615c --- /dev/null +++ b/rust/crates/du-web/templates/curator/variants/detail.html @@ -0,0 +1,23 @@ +{# Fragment: variant detail + actions. Target of #variant-detail. #} +
+
+ {{ v.name }} + {{ v.mutation_type }} +
+
+ {% if let Some(err) = error %}
{{ err }}
{% endif %} +
+
{{ t.get("var.field.status") }}
{{ v.naming_status }}
+
{{ t.get("var.field.commonNames") }}
{{ v.common_names }}
+
{{ t.get("var.field.rsIds") }}
{{ v.rs_ids }}
+
{{ t.get("var.field.builds") }}
{{ v.builds }}
+
+
+ + {% if can_delete %} + + {% endif %} +
+
+
diff --git a/rust/crates/du-web/templates/curator/variants/empty.html b/rust/crates/du-web/templates/curator/variants/empty.html new file mode 100644 index 00000000..7e3fd927 --- /dev/null +++ b/rust/crates/du-web/templates/curator/variants/empty.html @@ -0,0 +1 @@ +

{{ t.get("var.select") }}

diff --git a/rust/crates/du-web/templates/curator/variants/form.html b/rust/crates/du-web/templates/curator/variants/form.html new file mode 100644 index 00000000..641325c6 --- /dev/null +++ b/rust/crates/du-web/templates/curator/variants/form.html @@ -0,0 +1,49 @@ +{# Fragment: variant create/edit form. Target of #variant-detail. #} +
+
{% if is_edit %}{{ t.get("var.edit") }}{% else %}{{ t.get("var.new") }}{% endif %}
+
+
+
+ + +
+
+
+ + +
+
+ + +
+
+
+ + +
+
+ + +
+
+ + {% if is_edit %} + + {% else %} + + {% endif %} +
+
+
+
diff --git a/rust/crates/du-web/templates/curator/variants/list.html b/rust/crates/du-web/templates/curator/variants/list.html new file mode 100644 index 00000000..333cc2a7 --- /dev/null +++ b/rust/crates/du-web/templates/curator/variants/list.html @@ -0,0 +1,26 @@ +{# Fragment: variant rows + pager. Target of #variant-table. #} + + + + {% for r in list.rows %} + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("var.col.name") }}{{ t.get("var.col.type") }}{{ t.get("var.col.status") }}
{{ r.name }}{{ r.mutation_type }}{{ r.naming_status }}
{{ t.get("var.none") }}
+{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/curator/variants/page.html b/rust/crates/du-web/templates/curator/variants/page.html new file mode 100644 index 00000000..99a9e8c0 --- /dev/null +++ b/rust/crates/du-web/templates/curator/variants/page.html @@ -0,0 +1,27 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("var.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +
+

{{ t.get("var.title") }}

+ +
+
+
+ +
+ {% include "curator/variants/list.html" %} +
+
+
+

{{ t.get("var.select") }}

+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/index.html b/rust/crates/du-web/templates/index.html new file mode 100644 index 00000000..73b37666 --- /dev/null +++ b/rust/crates/du-web/templates/index.html @@ -0,0 +1,12 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("home.title") }}{% endblock %} +{% block content %} +
+

{{ t.get("home.heading") }}

+

{{ t.get("home.lead") }}

+ +
+{% endblock %} diff --git a/rust/crates/du-web/templates/references/biosamples.html b/rust/crates/du-web/templates/references/biosamples.html new file mode 100644 index 00000000..b73d3ab7 --- /dev/null +++ b/rust/crates/du-web/templates/references/biosamples.html @@ -0,0 +1,48 @@ +{# Fragment: biosample report for one publication. Target of #reference-detail. #} +
+
+ {{ t.get("references.biosamples.title") }} +
{{ pub_title }}
+ {% if let Some(doi) = pub_doi %} + {{ t.get("references.viewDoi") }}: {{ doi }} + {% endif %} +
+
+ {% if rows.is_empty() %} +

{{ t.get("references.biosamples.none") }}

+ {% else %} + + + + + + + + {% for b in rows %} + + + + + + {% endfor %} + +
{{ t.get("references.col.accession") }}{{ t.get("references.col.source") }}{{ t.get("references.col.alias") }}
+ {% if b.accession.is_empty() %}{{ b.accession }} + {% else %}{{ b.accession }}{% endif %} + {{ b.source }}{{ b.alias }}
+ {% if total_pages > 1 %} + + {% else %} + {{ total }} {{ t.get("pagination.total") }} + {% endif %} + {% endif %} +
+
diff --git a/rust/crates/du-web/templates/references/list.html b/rust/crates/du-web/templates/references/list.html new file mode 100644 index 00000000..f757df75 --- /dev/null +++ b/rust/crates/du-web/templates/references/list.html @@ -0,0 +1,35 @@ +{# Fragment: publication list + pager. Target of #references-table. #} + + +{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/crates/du-web/templates/references/page.html b/rust/crates/du-web/templates/references/page.html new file mode 100644 index 00000000..c18c5715 --- /dev/null +++ b/rust/crates/du-web/templates/references/page.html @@ -0,0 +1,28 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("references.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +
+

{{ t.get("references.title") }}

+ {{ t.get("submit.cta") }} +
+
+
+ + +
+ {% include "references/list.html" %} +
+
+
+
+

{{ t.get("references.select") }}

+
+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/references/submit.html b/rust/crates/du-web/templates/references/submit.html new file mode 100644 index 00000000..5b8aec62 --- /dev/null +++ b/rust/crates/du-web/templates/references/submit.html @@ -0,0 +1,28 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("submit.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block head %}{% if let Some(_k) = site_key %}{% endif %}{% endblock %} +{% block content %} +
+

{{ t.get("submit.title") }}

+

{{ t.get("submit.intro") }}

+ +{% if let Some(title) = queued %} +
+ {{ t.get("submit.queued") }}
{{ title }} +
+{{ t.get("submit.back") }} +{% else %} +{% if let Some(err) = error %}
{{ err }}
{% endif %} +
+
+ + +
{{ t.get("submit.doi.help") }}
+
+ {% if let Some(k) = site_key %}
{% endif %} + + {{ t.get("submit.back") }} +
+{% endif %} +
+{% endblock %} diff --git a/rust/crates/du-web/templates/samples/_pathway.html b/rust/crates/du-web/templates/samples/_pathway.html new file mode 100644 index 00000000..642ce188 --- /dev/null +++ b/rust/crates/du-web/templates/samples/_pathway.html @@ -0,0 +1,42 @@ +{# Macro: one haplogroup pathway (root → tip). Imported by samples/report.html. + `t` is the translator, passed in since macros can't see template fields. #} +{% macro pathway(t, title, pv) %} +
+
+ {{ title }} + {% if let Some(call) = pv.call %}{{ call }}{% endif %} +
+ {% if pv.reconciled %} +
+ {{ t.get("sample.recon.consensus") }} + {% if pv.run_count != "" %}{{ pv.run_count }} {{ t.get("sample.recon.runs") }}{% endif %} + {% if pv.confidence != "" %}{{ t.get("sample.recon.confidence") }} {{ pv.confidence }}{% endif %} + {% if pv.concordance != "" %}{{ t.get("sample.recon.concordance") }} {{ pv.concordance }}{% endif %} +
+ {% endif %} + {% if pv.call.is_none() %} +
{{ t.get("sample.pathway.noCall") }}
+ {% else if !pv.placed %} +
{{ t.get("sample.pathway.unplaced") }}
+ {% else %} +
+
    + {% for st in pv.steps %} +
  1. +
    + {{ st.name }} + {% if !st.snps.is_empty() %} +
    {{ st.snps|join(", ") }}
    + {% endif %} +
    + + {% if st.formed != "—" %}{{ st.formed }}{% endif %} + {% if st.tmrca != "—" %}{{ st.tmrca }} {{ t.get("sample.ybp") }}{% endif %} + +
  2. + {% endfor %} +
+
+ {% endif %} +
+{% endmacro %} diff --git a/rust/crates/du-web/templates/samples/_public_toggle.html b/rust/crates/du-web/templates/samples/_public_toggle.html new file mode 100644 index 00000000..1087e521 --- /dev/null +++ b/rust/crates/du-web/templates/samples/_public_toggle.html @@ -0,0 +1,16 @@ +{# Curator visibility toggle. Target of its own HTMX swap. #} +
+
+ {{ t.get("sample.curator.visibility") }} +
+ + +
+
+
diff --git a/rust/crates/du-web/templates/samples/report.html b/rust/crates/du-web/templates/samples/report.html new file mode 100644 index 00000000..356e2e1b --- /dev/null +++ b/rust/crates/du-web/templates/samples/report.html @@ -0,0 +1,189 @@ +{% extends "base.html" %} +{% import "samples/_pathway.html" as paths %} +{% block title %}{{ s.display_name }} — {{ t.get("app.name") }}{% endblock %} +{% block head %} + + + +{% endblock %} + +{% block content %} +
+

{{ s.display_name }}

+ + {% if s.federated %}{{ t.get("sample.federated") }}{% endif %} + {{ s.source }} + +
+ +{% if is_curator %}{% include "samples/_public_toggle.html" %}{% endif %} + +{# 1. Identity #} +
+
{{ t.get("sample.identity.title") }}
+
+
+ {% if let Some(a) = s.accession %} +
{{ t.get("sample.field.accession") }}
+
{{ a }}
+ {% endif %} + {% if let Some(a) = s.alias %} +
{{ t.get("sample.field.alias") }}
+
{{ a }}
+ {% endif %} + {% if let Some(x) = s.sex %} +
{{ t.get("sample.field.sex") }}
+
{{ x }}
+ {% endif %} + {% if let Some(c) = s.center_name %} +
{{ t.get("sample.field.center") }}
+
{{ c }}
+ {% endif %} + {% if let Some(d) = s.description %} +
{{ t.get("sample.field.description") }}
+
{{ d }}
+ {% endif %} + {% if !s.publications.is_empty() %} +
{{ t.get("sample.field.publications") }}
+
+
    + {% for p in s.publications %} +
  • + {% if let Some(href) = p.href %} + {{ p.title }} + {% else %}{{ p.title }}{% endif %} + {% if !p.year.is_empty() %}({{ p.year }}){% endif %} +
  • + {% endfor %} +
+
+ {% endif %} +
+
+
+ +{# 2. Haplogroup pathways #} +
+
{% call paths::pathway(t, t.get("sample.ydna.title"), s.y) %}
+
{% call paths::pathway(t, t.get("sample.mtdna.title"), s.mt) %}
+
+ +{# 3. Origin map #} +
+
{{ t.get("sample.map.title") }}
+
+ {% if let Some(o) = s.origin %} +
+ {% else %} +

{{ t.get("sample.map.none") }}

+ {% endif %} +
+
+ +{# 4. Sequencing & coverage #} +
+
{{ t.get("sample.seq.title") }}
+
+ {% if s.sequencing.is_empty() && s.coverage.is_empty() %} +

{{ t.get("sample.seq.none") }}

+ {% else %} + {% if !s.sequencing.is_empty() %} + + + + + + + + + + + {% for r in s.sequencing %} + + + + + {% endfor %} + +
{{ t.get("sample.seq.platform") }}{{ t.get("sample.seq.instrument") }}{{ t.get("sample.seq.testType") }}{{ t.get("sample.seq.layout") }}{{ t.get("sample.seq.reads") }}{{ t.get("sample.seq.readLength") }}
{{ r.platform }}{{ r.instrument }}{{ r.test_type }}{{ r.layout }}{{ r.reads }}{{ r.read_length }}
+ {% endif %} + {% if !s.coverage.is_empty() %} + + + + + + + + + + + + + + {% for c in s.coverage %} + + + + + + + + {% endfor %} + +
{{ t.get("sample.cov.build") }}{{ t.get("sample.cov.aligner") }}{{ t.get("sample.cov.testtype") }}{{ t.get("sample.cov.mean") }}{{ t.get("sample.cov.expected") }}{{ t.get("sample.cov.10x") }}{{ t.get("sample.cov.20x") }}{{ t.get("sample.cov.30x") }}{{ t.get("sample.cov.conformance") }}
{{ c.build }}{{ c.aligner }}{{ c.test_type }}{{ c.mean }}{{ c.expected }}
{{ t.get("sample.cov.cohort") }} {{ c.norm }}
{{ c.pct_10x }}{{ c.pct_20x }}{{ c.pct_30x }} + {% if c.conformance == "BELOW" %}{{ t.get("sample.cov.below") }} + {% else if c.conformance == "AT" %}{{ t.get("sample.cov.at") }} + {% else if c.conformance == "ABOVE" %}{{ t.get("sample.cov.above") }} + {% endif %} +
+ {% endif %} + {% endif %} +
+
+ +{# 5. Ancestry breakdown #} +
+
+ {{ t.get("sample.ancestry.title") }} + {% if let Some(m) = s.ancestry_method %}{{ m }}{% endif %} +
+
+ {% if s.ancestry.is_empty() %} +

{{ t.get("sample.ancestry.none") }}

+ {% else %} +
+ {% for c in s.ancestry %} +
{{ c.pct_label }}
+ {% endfor %} +
+
    + {% for c in s.ancestry %} +
  • +   + {{ c.label }} — {{ c.pct_label }} +
  • + {% endfor %} +
+ {% endif %} +
+
+ +

{{ t.get("sample.future.note") }}

+{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/rust/crates/du-web/templates/static/contact.html b/rust/crates/du-web/templates/static/contact.html new file mode 100644 index 00000000..81e03c1b --- /dev/null +++ b/rust/crates/du-web/templates/static/contact.html @@ -0,0 +1,22 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("contact.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block head %}{% if let Some(_k) = site_key %}{% endif %}{% endblock %} +{% block content %} +
+

{{ t.get("contact.title") }}

+{% if sent %} +
{{ t.get("contact.sent") }}
+{{ t.get("nav.about") }} +{% else %} +{% if let Some(err) = error %}
{{ err }}
{% endif %} +
+
+
+
+
+ {% if let Some(k) = site_key %}
{% endif %} + +
+{% endif %} +
+{% endblock %} diff --git a/rust/crates/du-web/templates/static/page.html b/rust/crates/du-web/templates/static/page.html new file mode 100644 index 00000000..4ad9feb2 --- /dev/null +++ b/rust/crates/du-web/templates/static/page.html @@ -0,0 +1,287 @@ +{% extends "base.html" %} +{% block title %}{{ title }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +
+

{{ title }}

+ +{% if page == "about" %} +

{{ t.get("app.name") }} is a citizen-science platform for phylogenetic + (Y-DNA and mitochondrial) and population genomics. It maintains a curated + known-variant catalog and haplogroup tree, and aggregates anonymized coverage + and ancestry summaries contributed across the federation.

+

Per-sample raw data never leaves the contributor's device or personal data + store; only computed summaries are shared. See the privacy + policy for details.

+ + +{% else if page == "faq" %} +
    +
  1. +
    +
    +

    Question 1

    +

    What is Decoding-Us.com?

    +
    +
    +

    Decoding-Us.com is a collaborative platform designed for the genetic genealogy and population research community. It serves two main purposes:

    +
      +
    • Public Resource: We analyze and visualize publicly available Whole Genome Sequencing (WGS) data from academic sources, providing alternative Y-DNA and Mitochondrial DNA trees, coverage benchmarks, and reference data.
    • +
    • Federated Collaboration: We are building a decentralized application using the AT Protocol (the same technology behind Bluesky) that allows researchers to securely share and compare genetic data without relying on a central server to hold their files.
    • +
    +

    Our goal is to empower citizen scientists with tools to explore human history while maintaining complete ownership and control over their sensitive genetic data.

    +
    +
    +
  2. + +
  3. +
    +
    +

    Question 2

    +

    How does the "Federated" model work?

    +
    +
    +

    In a traditional model (like AncestryDNA or 23andMe), you provide a biological sample (such as buccal swab or blood) for sequencing. The service then processes your sample, sequences your genetic data, and often maintains ownership or significant control over that data on their servers. Decoding-Us.com offers a unique alternative: our platform is designed to bridge data from any genetic service—be it 23&Me, Ancestry, FTDNA, YSEQ, or direct-to-consumer Whole Genome Sequencing (WGS) providers. As long as you maintain ownership and access to your sequencing data, our system empowers you to bring it into our federated network, uniting insights from across different providers.

    +

    In our Federated Model (Phase 3 of our roadmap), you don't upload your raw data to us. Instead:

    +
      +
    1. You run a "Personal Data Server" (PDS) application—software that lives on your own computer or private cloud.
    2. +
    3. This software analyzes your WGS data locally to extract non-sensitive summaries (like your haplogroup or anonymous matching tokens).
    4. +
    5. You choose to publish these summaries to the "Atmosphere"—our secure network.
    6. +
    7. Decoding-Us.com acts as an "AppView," indexing this public information so you can find matches and collaborate with other researchers.
    8. +
    +

    This approach, known as a "Decentralized AppView," ensures that your massive and sensitive raw data files (BAM/CRAM) never leave your control, while still allowing you to participate in community discoveries.

    +
    +
    +
  4. + +
  5. +
    +
    +

    Question 3

    +

    Can I upload my Big Y, WGS, or other DNA files?

    +
    +
    +

    No, we do not accept direct file uploads. To protect your privacy and reduce infrastructure costs, we do not host user data files.

    +

    Instead, we provide tools that allow you to process your data on your own machine. Currently, we offer an Alpha version of the Decoding-Us Navigator (also known as the Navigator Workbench). This edge-computing application runs locally on your PC (Windows/Linux/Mac) and empowers you to:

    +
      +
    • Analyze Locally: Process BAM/CRAM files directly on your machine to generate coverage metrics and haplogroup determinations without uploading massive files.
    • +
    • Integrate with Atmosphere: Future versions will allow you to publish anonymized summaries to your Personal Data Server, enabling you to share insights with the federated network while keeping your raw data private.
    • +
    +

    The Navigator is built on the JVM (Java/Scala) for performance and cross-platform compatibility. It represents the core of our privacy-first philosophy: bring the analysis to the data, not the data to the analysis.

    +
    +
    +
  6. + +
  7. +
    +
    +

    Question 4

    +

    What is the "Atmosphere" and the AT Protocol?

    +
    +
    +

    The AT Protocol is a new technology for decentralized social networking. It allows users to own their identity and data, moving freely between different services without losing their connections.

    +

    We call our implementation the Atmosphere. Just as the air connects us all, the Atmosphere connects individual researchers' Personal Data Servers. It uses "Lexicons"—standard dictionaries for genetic data—to ensure that a researcher in the UK can automatically compare notes with a researcher in the US, even if they use different software, provided they both speak the "Atmosphere" language.

    +
    +
    +
  8. + +
  9. +
    +
    +

    Question 5

    +

    Why the .com domain? Is this a commercial service?

    +
    +
    +

    Decoding-Us.com is currently a free resource. However, hosting high-performance databases and indexing the global federation costs money. We chose a .com domain to keep our options open for long-term sustainability.

    +

    Future sustainability models might include:

    +
      +
    • Patronage Donation System: A voluntary tiered donation model where community members can become "Patrons" to support hardware and hosting costs. Contributions are optional, but help ensure the platform remains free and open for everyone.
    • +
    • Managed PDS Hosting: For users who don't want to run their own server, we could offer a paid, secure hosting service for their Personal Data Server.
    • +
    • Sponsorships: Partnerships with sequencing labs or academic institutions, provided they align with our strict privacy and anti-tracking values.
    • +
    +

    Regardless of the model, our commitment to open-source code and user data sovereignty will remain unchanged.

    +
    +
    +
  10. + +
  11. +
    +
    +

    Question 6

    +

    How can I submit my haplogroup tree research for merging into DecodingUs?

    +
    +
    +

    DecodingUs welcomes contributions from researchers to help build a more comprehensive and accurate haplogroup tree. You can submit your data via our automated Tree Merge API.

    +

    For a detailed guide on how to format your tree data, the rules for provenance (how your research gets credited), and how conflicts are resolved, please refer to our API documentation: + Learn more about Submitting Tree Data +

    +
    +
    +
  12. +
+ +{% else if page == "terms" %} +

Acceptance of Terms

+

By accessing and using Decoding-Us.com (the "Website"), you agree to be bound by these Terms of Use.

+ +

Use of Content

+

The content provided on Decoding-Us.com, including the haplogroup trees and related information, is made available under the Creative Commons Attribution 4.0 International License (CC BY 4.0). This means you are free to:

+
    +
  • Share — copy and redistribute the material in any medium or format
  • +
  • Adapt — remix, transform, and build upon the material for any purpose, even commercially.
  • +
+

Under the following terms:

+
    +
  • Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
  • +
+

Specifically, when providing attribution, we request that you credit "Decoding-Us.com" and, where possible, include a link back to our Website: decoding-us.com.

+

The haplogroup trees and related information on this Website are generated and enhanced by integrating data from multiple sources, including aggregated, non-personally identifiable data obtained from a private federation of genetic genealogy researchers operating AT Protocol Personal Data Server (PDS) applications, and publicly available genetic data from repositories such as the European Nucleotide Archive and PGP: Harvard. This integration allows for broader population coverage and more comprehensive branch identification. The data from the private federation is made publicly available by the researchers within their PDS privacy controls and is crawled by Decoding-Us.com. Publicly available data from repositories is accessed and integrated according to their respective terms of use.

+ +

Features for Research Collaboration

+

Decoding-Us.com provides an Application View that allows researchers within the private federation using AT Protocol PDS applications to communicate with each other based on shared genealogical and genetic information. This messaging feature is facilitated to encourage collaboration and potential relative matching. Users of the PDS control whether they receive and engage with these messages.

+

The Website may also display anonymized ancestral population and Earliest Known Most Distant Ancestor (MDKA) information obtained from publicly shared data within the federation. This information is intended to provide genealogical context and facilitate connections among researchers. Please note that the MDKA's date of birth may be masked to protect potential privacy.

+ +

Responsibility for PDS User Interactions

+

The messaging functionality is a feature provided to facilitate communication between users of the AT Protocol PDS within the private federation. Decoding-Us.com acts as an interface and does not assume responsibility for the content, conduct, or outcomes of these direct interactions between PDS users. Users are solely responsible for their communications and interactions with others within the federated environment.

+ +

Use of Anonymized Genealogical Data

+

Any anonymized ancestral population and MDKA information displayed on Decoding-Us.com is derived from publicly shared data within the private research federation. While this information is intended to be helpful for genealogical research, Decoding-Us.com makes no warranties regarding its accuracy or completeness. Users should exercise their own judgment and conduct independent research to verify any genealogical information obtained through the Website.

+ +

No Warranties

+

THE CONTENT ON THIS WEBSITE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED. TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, DECODING-US.COM DISCLAIMS ALL WARRANTIES, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. WE DO NOT WARRANT THAT THE WEBSITE WILL BE UNINTERRUPTED OR ERROR-FREE, THAT DEFECTS WILL BE CORRECTED, OR THAT THE WEBSITE OR THE SERVERS THAT MAKE IT AVAILABLE ARE FREE OF VIRUSES OR OTHER HARMFUL COMPONENTS.

+ +

Limitation of Liability

+

TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL DECODING-US.COM BE LIABLE FOR ANY INDIRECT, INCIDENTAL, SPECIAL, CONSEQUENTIAL, OR PUNITIVE DAMAGES (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS, DATA, USE, GOODWILL, OR OTHER INTANGIBLE LOSSES) ARISING OUT OF OR RELATING TO YOUR ACCESS TO OR USE OF, OR YOUR INABILITY TO ACCESS OR USE, THE WEBSITE OR ITS CONTENT, WHETHER BASED ON WARRANTY, CONTRACT, TORT (INCLUDING NEGLIGENCE), STATUTE, OR ANY OTHER LEGAL THEORY, EVEN IF DECODING-US.COM HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

+ +

Governing Law

+

These Terms of Use shall be governed by and construed in accordance with the laws of the State of Wisconsin, USA, without regard to its conflict of law provisions.

+ +

Last Updated: 2025-05-13

+ +{% else if page == "privacy" %} +

This Privacy Statement explains how Decoding-Us.com (the "Website") handles the limited data processed during your interaction with our application, including the use of data from a federated platform.

+ +

1. Introduction

+

This statement outlines our practices regarding the processing of personal data in connection with Decoding-Us.com. As a Hypermedia as the Engine of Application State (HATEOAS) application focused on providing public data, we also integrate anonymized and public data from a federated platform to enhance our content.

+ +

2. Features Facilitating Research Collaboration

+

Decoding-Us.com provides an Application View that allows users of participating AT Protocol Personal Data Servers (PDS) within the private research federation to send messages to one another. This messaging functionality is intended to facilitate collaboration among researchers based on shared haplogroups in the genealogical era, ancestral populations, or existing matching networks. Users on the PDS have full control over whether to allow or ignore incoming messages.

+

To further enhance research and potential connections, Decoding-Us.com may also collect anonymized ancestral population information and Earliest Known Most Distant Ancestor (MDKA) data from publicly shared data on the federated PDS. This MDKA information may include geocoordinates, date of birth, date of death, and surname. To protect potential privacy, the MDKA's date of birth will be masked if it falls within the last 100 years.

+

It is crucial to understand that researchers control what information they make public on their PDS through their privacy settings. Decoding-Us.com only accesses and processes data that has been explicitly shared publicly by these researchers within the federated environment. We do not access or collect any private or personally identifiable genetic or genealogical data beyond what researchers choose to share publicly.

+

The purpose of these features is to empower researchers to connect, share insights, and collaboratively refine haplogroup understanding and genealogical connections. The anonymized MDKA information helps provide context without revealing recent personal details.

+ +

3. Data We Collect

+

Currently, this Website does not actively collect personal data such as names, email addresses, or account information directly from our viewing users. Our design does not require or facilitate user registration or login on Decoding-Us.com itself.

+

However, our server logs automatically record certain information when you access the Website, including:

+
    +
  • Your IP address
  • +
  • The date and time of your request
  • +
  • The type of browser and operating system you are using (user-agent string)
  • +
  • The specific endpoints (URLs) you access
  • +
+

Through our Application View functionality for the private research federation, we may also process the following types of publicly shared, anonymized data from participating PDS:

+
    +
  • Aggregated statistics on genetic tests and coverage.
  • +
  • Information about private SNPs beyond the terminal haplogroup branch.
  • +
  • Data on negative calls for the terminal branch and its siblings.
  • +
  • Anonymized ancestral population information.
  • +
  • Earliest Known Most Distant Ancestor (MDKA) data (geocoordinates, masked date of birth if within the last 100 years, date of death, and surname).
  • +
+

This data is processed to facilitate research collaboration and enhance the haplogroup information presented on Decoding-Us.com.

+ +

4. Communication Between PDS Users

+

The messaging feature is facilitated by the AT Protocol PDS infrastructure. Decoding-Us.com provides the interface for this communication, but the control over allowing or ignoring messages resides entirely with the individual PDS users. We do not monitor the content of these messages.

+ +

5. Data Sharing

+

Decoding-Us.com does not share the personal data of our website viewers with the federated researchers or their AT Protocol PDS applications. Our interaction with the federation involves crawling and analyzing aggregated, non-personally identifiable data that researchers have chosen to make public within their PDS privacy controls. We also facilitate direct messaging between PDS users within the federation, but we do not access or store the content of these messages. The anonymized ancestral population and MDKA information we collect is used to enhance the research capabilities and content on Decoding-Us.com and is not shared back with individual PDS users in a personally identifiable way.

+ +

6. Your Rights and International Data Protection Laws

+

Depending on your location and applicable data protection laws, such as the General Data Protection Regulation (GDPR) in the European Union, the California Consumer Privacy Act (CCPA) in California, and similar regulations elsewhere, you may have certain rights regarding your personal data. These rights can include the right to access, rectify, erase, restrict processing, and object to the processing of your personal data, as well as the right to data portability.

+

While the scope of personal data we currently process from our users is limited to server logs, we are committed to respecting these rights where applicable. If you have any inquiries regarding your rights under relevant data protection laws and how they might apply to the limited data we process, please contact us using the information below.

+ +

7. Updates to this Privacy Statement

+

We may update this Privacy Statement from time to time to reflect changes in our practices or legal requirements, including the integration with the federated platform. We will post any changes on this page, and we encourage you to review this statement periodically.

+ +

Last Updated: 2025-05-13

+ +{% else if page == "cookies" %} +

This Cookie Usage Policy explains how Decoding-Us.com (the "Website") uses cookies.

+ +

What are Cookies?

+

Cookies are small text files that are placed on your computer or mobile device when you visit a website. They are widely used to make websites work, or work more efficiently, as well as to provide information to the owners of the website.

+ +

Cookies We Use

+

This Website uses cookies exclusively for authentication and authorization purposes. Specifically:

+
    +
  • Session Cookies: To maintain your logged-in state as you navigate the Website. These cookies are essential for the authentication system to function and are deleted when you close your browser or log out.
  • +
  • Security Cookies: To help protect your account and prevent unauthorized access, including CSRF (Cross-Site Request Forgery) protection tokens.
  • +
+ +

What We Do NOT Use Cookies For

+

We want to be clear about what we do not do with cookies:

+
    +
  • No Tracking: We do not track your browsing activity on other websites.
  • +
  • No Analytics: We do not use cookies for analytics or to monitor user behavior patterns.
  • +
  • No Marketing: We do not use cookies for advertising or marketing purposes.
  • +
  • No Third-Party Sharing: We do not share cookie data with any third parties.
  • +
  • No Data Sales: We do not sell any information collected through cookies.
  • +
+ +

Third-Party Cookies

+

This Website does not utilize any third-party cookies. We do not embed third-party tracking scripts, advertising networks, or social media widgets that would set their own cookies.

+ +

Managing Cookies

+

Since our cookies are essential for authentication, disabling them will prevent you from logging into the Website. You can manage cookies through your browser settings:

+
    +
  • Most browsers allow you to view, manage, and delete cookies in the privacy or security settings.
  • +
  • If you delete our session cookies, you will be logged out and need to sign in again.
  • +
+ +

Updates to this Cookie Policy

+

We may update this Cookie Usage Policy from time to time. Any changes will be posted on this page. Our commitment to using cookies only for authentication and authorization purposes will not change without clear notice to our users.

+ +

Last Updated: 2025-12-09

+ +{% else if page == "reputation" %} +

The Decoding Us Reputation System is designed to foster a high-quality, trustworthy community for genetic genealogy and research. It rewards positive contributions and protects the community from spam and abuse.

+ +

How it Works

+

Every user starts with a neutral reputation score. As you contribute to the platform, your score increases. Conversely, actions that harm the community may lower your score. Higher scores unlock advanced features, ensuring that powerful tools are used by trusted members.

+ +

Earning Reputation

+

You can earn reputation points through:

+
    +
  • Identity Verification: Verifying your email and identity (+10 points).
  • +
  • Data Contributions: Submitting sequencer metadata that is verified by the community (+5 points).
  • +
  • Community Engagement: Having your posts or comments upvoted by others (+1 point).
  • +
  • Research Participation: Accepting recruitment requests from researchers (+2 points).
  • +
  • Welcome Bonus: New users receive a starter bonus (+5 points).
  • +
+ +

Losing Reputation

+

Your reputation score decreases if:

+
    +
  • Spam Reports: Your content is marked as spam by moderators or community consensus (-50 points).
  • +
  • Negative Feedback: Your posts are consistently downvoted (-1 point).
  • +
+ +

Reputation Levels

+

Your score determines your access to certain features:

+
    +
  • Newcomer (0-10): Read-only access to public feeds.
  • +
  • Member (10+): Can post to the public feed and upvote content.
  • +
  • Trusted (20+): Can initiate Direct Messages (DMs) with other users.
  • +
  • Community Leader (50+): Can create new groups and has higher visibility.
  • +
+ +
+ Note: Reputation scores are calculated transparently based on these rules. Our goal is to create a safe environment where scientific collaboration can thrive. +
+{% endif %} + +
+{% endblock %} diff --git a/rust/crates/du-web/templates/tree/page.html b/rust/crates/du-web/templates/tree/page.html new file mode 100644 index 00000000..1c4b12a2 --- /dev/null +++ b/rust/crates/du-web/templates/tree/page.html @@ -0,0 +1,111 @@ +{% extends "base.html" %} +{% block title %}{{ title }} — {{ t.get("app.name") }}{% endblock %} +{# Full viewport width — the cladogram wants all the horizontal room it can get. #} +{% block main_container %}container-fluid{% endblock %} +{% block content %} +
+

{{ title }}

+ + {# Search: haplogroup name OR defining variant. Swaps just the tree. #} + + + {# Depth selector — persisted in localStorage and injected into every tree + request (see configRequest below), so it survives nav and sessions. #} +
+ + +
+ + {# Orientation toggle — full-page (boosted) nav so the shell + cookie update. #} + +
+ +
+ {{ t.get("tree.legend.backbone") }} + {{ t.get("tree.legend.recent") }} + {{ t.get("tree.legend.default") }} +
+ +{# Tree takes the full width; the SNP detail panel is an off-canvas that slides in + on a variant click, so nothing is reserved while it's empty. #} +
+ {% include "tree/svg.html" %} +
+ +
+
+
+
+
+{% endblock %} + +{% block scripts %} + +{% endblock %} diff --git a/rust/crates/du-web/templates/tree/snp_sidebar.html b/rust/crates/du-web/templates/tree/snp_sidebar.html new file mode 100644 index 00000000..3b546c96 --- /dev/null +++ b/rust/crates/du-web/templates/tree/snp_sidebar.html @@ -0,0 +1,88 @@ +{# SNP-detail sidebar for one haplogroup. Loaded into #snpSidebar by HTMX. #} +
+
+ {{ name }} + +
+ + {# Branch provenance — where this node came from (source, cross-source names, + when it was last updated, backbone, age). Grows in value as trees fold in. #} + {% if let Some(p) = provenance %} +
+
{{ t.get("tree.prov.title") }}
+
+
{{ t.get("tree.prov.source") }}
+
+ {{ p.source }} + {% if p.backbone %}{{ t.get("tree.legend.backbone") }}{% endif %} +
+ {% if !p.aliases.is_empty() %} +
{{ t.get("tree.prov.aka") }}
+
{% for a in p.aliases %}{{ a }}{% if !loop.last %}, {% endif %}{% endfor %}
+ {% endif %} + {% if let Some(u) = p.updated %} +
{{ t.get("tree.prov.updated") }}
+
{{ u }}
+ {% endif %} + {% if let Some(f) = p.formed_ybp %} +
{{ t.get("hg.field.formed") }}
+
{{ f }}
+ {% endif %} + {% if let Some(tm) = p.tmrca_ybp %} +
{{ t.get("hg.field.tmrca") }}
+
{{ tm }}
+ {% endif %} +
+
+ {% endif %} + + {% if variants.is_empty() %} +
+

{{ t.get("tree.snp.none") }}

+
+ {% else %} +
+
    + {% for v in variants %} +
  • + {{ v.name }} + {{ v.mutation_type }} + {% if v.back_mutation %}{{ t.get("tree.snp.backMutation") }} + {% else if v.recurrent %}{{ t.get("tree.snp.recurrent") }}{% endif %} + {% if let Some(tr) = v.transition %} + {{ tr }} + {% endif %} + {% if !v.aliases.is_empty() %} +
    {{ t.get("tree.snp.aliases") }}: {{ v.aliases|join(", ") }}
    + {% endif %} + {% for c in v.coordinates %} +
    {{ c }}
    + {% endfor %} +
  • + {% endfor %} +
+
+ {% endif %} + + {# Placed non-D2C sample leaves at or below this node (YFull-style). #} + {% if !samples.is_empty() %} +
+
{{ t.get("tree.samples.title") }}
+
    + {% for s in samples %} +
  • + {{ s.label }} + {{ s.source }} + {% if let Some(c) = s.citation %} +
    {{ c }}
    + {% endif %} +
  • + {% endfor %} +
+ {% if samples_more > 0 %} +
+{{ samples_more }} {{ t.get("tree.samples.more") }}
+ {% endif %} +
+ {% endif %} +
diff --git a/rust/crates/du-web/templates/tree/svg.html b/rust/crates/du-web/templates/tree/svg.html new file mode 100644 index 00000000..b81f680b --- /dev/null +++ b/rust/crates/du-web/templates/tree/svg.html @@ -0,0 +1,60 @@ +{# One tree window as an inline SVG cladogram. The node ``s re-root via + HTMX (swap #tree-container); variant labels open the SNP sidebar. + References: t, base_path, root_name, crumbs, laid (Option). #} + + +{% if let Some(l) = laid %} +
+ + {% for link in l.links %} + + {% endfor %} + {# Placed-sample leaf tips — minimal markers, visually distinct from haplogroup boxes. #} + {% for tip in l.tips %} + {% if let Some(node) = tip.overflow_node %} + + + {{ tip.label }} + + {% else %} + + + {{ tip.label }} + + {% endif %} + {% endfor %} + {% for n in l.nodes %} + + + {{ n.name }}{% if let Some(tm) = n.tmrca %} · TMRCA {{ tm }}{% endif %} + + {% if n.is_backbone %}✓ {% else if n.is_recent %}★ {% endif %}{{ n.name }}{% if n.has_hidden %} +{% endif %} + {{ n.variant_count }} {{ t.get("tree.variants") }} ▸ + {% if let Some(f) = n.formed %} + {{ t.get("tree.formed") }} {{ f }} + {% else if let Some(tm) = n.tmrca %} + TMRCA {{ tm }} + {% endif %} + + {% endfor %} + +
+{% else %} +

{{ t.get("tree.noChildren") }}

+{% endif %} diff --git a/rust/crates/du-web/templates/variants/browser.html b/rust/crates/du-web/templates/variants/browser.html new file mode 100644 index 00000000..42781d40 --- /dev/null +++ b/rust/crates/du-web/templates/variants/browser.html @@ -0,0 +1,26 @@ +{% extends "base.html" %} +{% block title %}{{ t.get("variants.title") }} — {{ t.get("app.name") }}{% endblock %} +{% block content %} +

{{ t.get("variants.title") }}

+
+
+ + + {# First page embedded inline (no load round-trip); search/pager swap it. #} +
+ {% include "variants/list.html" %} +
+
+
+
+

{{ t.get("variants.detail.select") }}

+
+
+
+{% endblock %} diff --git a/rust/crates/du-web/templates/variants/detail.html b/rust/crates/du-web/templates/variants/detail.html new file mode 100644 index 00000000..45c381c2 --- /dev/null +++ b/rust/crates/du-web/templates/variants/detail.html @@ -0,0 +1,45 @@ +{# Fragment: variant detail card. Target of #detail-panel. #} +
+
+ {{ name }} + {{ mutation_type }} +
+
+

{{ t.get("variants.detail.naming") }} {{ naming_status }}

+ + {% if !common_names.is_empty() %} +

{{ t.get("variants.detail.aka") }} + {% for n in common_names %}{{ n }} {% endfor %} +

+ {% endif %} + {% if !rs_ids.is_empty() %} +

{{ t.get("variants.detail.rsids") }} + {% for r in rs_ids %}{{ r }} {% endfor %} +

+ {% endif %} + +
{{ t.get("variants.detail.coordinates") }}
+ {% if coords.is_empty() %} +

{{ t.get("variants.detail.nocoords") }}

+ {% else %} + + + + + + + + + {% for c in coords %} + + + + + + + {% endfor %} + +
{{ t.get("variants.col.build") }}{{ t.get("variants.col.contig") }}{{ t.get("variants.col.position") }}{{ t.get("variants.col.change") }}
{{ c.build }}{{ c.contig }}{{ c.position }}{% if let Some(ch) = c.change %}{{ ch }}{% else %}—{% endif %}
+ {% endif %} +
+
diff --git a/rust/crates/du-web/templates/variants/list.html b/rust/crates/du-web/templates/variants/list.html new file mode 100644 index 00000000..c28adef3 --- /dev/null +++ b/rust/crates/du-web/templates/variants/list.html @@ -0,0 +1,44 @@ +{# Fragment: results table + pager. Target of #variants-table; also included by + the browser page. References `t` and `list`. #} + + + + + + + + + + + {% for r in list.rows %} + + + + + + + {% else %} + + {% endfor %} + +
{{ t.get("variants.col.name") }}{{ t.get("variants.col.type") }}{{ t.get("variants.col.status") }}{{ t.get("variants.col.builds") }}
{{ r.name }}{{ r.mutation_type }}{{ r.naming_status }}{{ r.builds }}
{{ t.get("variants.none") }}
+ +{% if list.total_pages > 1 %} + +{% else %} +{{ list.total }} {{ t.get("pagination.total") }} +{% endif %} diff --git a/rust/docs/atproto-edge-reply.md b/rust/docs/atproto-edge-reply.md new file mode 100644 index 00000000..ea8122f1 --- /dev/null +++ b/rust/docs/atproto-edge-reply.md @@ -0,0 +1,78 @@ +# AT Protocol federation — decodingus reply to Navigator/Edge + +**Re:** `DUNavigator/documents/atmosphere/12-OAuth-Edge-Reply.md` (and 08/11) +**From:** decodingus (AppView) team +**Date:** 2026-06-01 + +Agreed on the whole shape: two clients / two scopes, AppView re-scoped off the +mirror, standard relay/Jetstream stays for discovery, custom REST/Kafka relay +gone. Point-by-point below, then the decisions you asked us to make. + +## Answers to your asks + +1. **Public-client (PKCE-only) token exchange — done.** `du-atproto::oauth` now + exposes `par_form_public` and `token_form_public` (no `client_assertion`; + PKCE + DPoP only). Confirmed by test (`public_client_forms_omit_client_assertion`). + So Navigator reuses the same `Pkce`, `dpop_proof`, DID/handle resolution, and + PDS discovery; only the form builders differ from the confidential path. The + confidential pieces (`client_assertion`, served metadata/JWKS, cookie session) + stay decodingus-only, as you noted. + +2. **Hosting Navigator's client metadata — yes.** We'll serve your native + `client-metadata.json` at `https://decoding-us.com/navigator/client-metadata.json` + (static, alongside the web client's). Send us the JSON contents (or a PR) and + we'll wire the route. + +3. **Scopes.** Confirmed: AppView requests **no PDS read scope** for now — our + two surviving flows don't need it (variants via the curation submission API; + coverage from public summary records). We'll revisit a read scope only if/when + private match data uses notify-fetch (your #7, deferred). Our web OAuth is + effectively **user login/identity**. You own the `navigatorCore` write-set + lexicon; the collection NSIDs in 11 §3 match our expectations. + +4. **DPoP nonce.** We implement single-retry on `DPoP-Nonce` at PAR + token. We'll + share the auth server's actual nonce behavior once we have a test server. + +5. **AppView re-scope acknowledged.** We will **not** build the full-CRUD + `subscribeRepos` mirror or per-collection ingestion handlers/tables. Note: the + Rust rewrite never ported the legacy `FirehoseController`/`AtmosphereEventHandler` + mirror, so there's nothing to remove — we simply build to the new two-flow role. + +## Decisions you asked decodingus to make + +- **Shared-crate extraction (your ask #1): DECIDED — a dedicated `decodingus-shared` + git repo.** We'll extract `du-domain`, `du-atproto`, `du-bio` there; both + decodingus and DUNavigator git-dep on it (fixes flow both ways, clear ownership). + They're already cleanly separated with no server/web coupling, so the move is + mechanical. Coordinating repo creation + remote next; we'll send the repo URL. +- **Haploid variant caller (your ask #3): DECIDED — Navigator-only crate.** Keep + it in a Navigator-owned crate so any heavy/edge-only deps stay off the AppView; + `du-bio` stays I/O + liftover + callable. If the AppView later needs the same + calling logic we can promote a pure subset into `du-bio` then. + +## What we're building next (AppView side) + +1. **Curation submission API** — a Navigator-authenticated endpoint accepting + variant/branch proposals → pool/consensus → curator review → promote to the + catalog. Maps onto our existing `tree.proposed_branch` / `proposed_branch_variant` + / `proposed_branch_evidence` schema. We'll share the request shape for your + `navigator submit` path. +2. **Coverage mirror (revised, supersedes "on-demand aggregation").** We reversed + the doc 08 §3 / pointers-only plan: on-demand aggregation means an HTTP fan-out + to every PDS per query, which doesn't scale. Instead the AppView **mirrors the + public coverage *summaries*** — a Jetstream consumer subscribed to + `com.decodingus.atmosphere.alignment` writes each record's QC metrics (summary + only, never raw reads) into `fed.coverage_summary`; population views aggregate + that table with query-time SQL. This is **not** the old full-CRUD network + mirror — one collection, summary metrics only, no per-sample raw data, no + orphan/sync machinery. Please update 08-AppView-Lifecycle.md §3 to match (we're + reading published `alignment.metrics`, so the record shape is unchanged for + you). Cursor-resumed + reconnecting; the upsert is idempotent and ordered. + +## Still needed from you to test end-to-end + +- A **test PDS + account** (handle + DID) and its auth-server endpoints. +- Confirmation the auth server accepts a **public client (PKCE + DPoP + loopback)**. +- The **DID method** edge accounts use (`did:plc` via `plc.directory` — self-hosted? — or `did:web`). +- The `navigatorCore` set lexicon (NSIDs) once published, and the Navigator native + client-metadata JSON to host. diff --git a/rust/docs/atproto-oauth-findings.md b/rust/docs/atproto-oauth-findings.md new file mode 100644 index 00000000..668c0795 --- /dev/null +++ b/rust/docs/atproto-oauth-findings.md @@ -0,0 +1,267 @@ +# AT Protocol federation — initial findings (for the Edge team) + +Status of the DecodingUs (Rust) side of AT Protocol federation, and the open +points to settle jointly before the end-to-end handshake can be tested. + +## Design pivot + +We are **dropping the custom "private firehose."** Current atproto direction +resolves its purpose: + +- **Permissions / permission sets** (mature spec): apps declare granular OAuth + scopes as lexicon-published permission sets; the PDS enforces them. + +- **Private data bypasses the firehose**: private/group-private records live in a + separate namespace (no MST/commit, not broadcast); a consumer gets a + **notification then fetches the record from the PDS over scoped OAuth**. + (Working group; group-private spec still maturing upstream.) + +So federation = **OAuth (permission-scoped) access to PDS records + notify/fetch**, +not a bespoke relay. + +## What's built on our side (this repo) + +- `du-atproto`: DID/AT-URI parsing, `did:key` Ed25519 verification, DID-doc/PDS + resolution; OAuth client crypto — PKCE (S256), ES256 JOSE, **DPoP** proofs, + `private_key_jwt` client assertion, client + authorization-server metadata, + PAR/authorize/token builders. All unit-tested (PKCE vs RFC 7636 vector, ES256 + sign/verify, DPoP shape). +- `du-web`: serves the two documents below and wires `/login/atproto` (resolve → + PAR → redirect) and `/oauth/callback` (token exchange → session). DPoP-nonce + single-retry implemented. Session is our existing signed cookie; users are + upserted by DID (`ident.users` + an `atproto` login_info row). + +### Concrete artifacts to review / register + +Served at the deployed base URL (e.g. `https://decoding-us.com`): + +- **`/oauth/client-metadata.json`** — `client_id` = that URL; confidential web + client; `token_endpoint_auth_method = private_key_jwt`, `alg = ES256`; + `dpop_bound_access_tokens = true`; `redirect_uris = [".../oauth/callback"]`; + `scope` from `OAUTH_SCOPE`. +- **`/oauth/jwks.json`** — the client's public P-256 JWK (no private material); + `kid` = JWK thumbprint. Private key is supplied via `OAUTH_EC_KEY` (base64url + of the 32-byte scalar); if unset, an ephemeral key is generated and logged. + +## Open points to settle with the Edge team + +1. **Client auth method.** We assume a *confidential* web client using + `private_key_jwt` (ES256) + DPoP. Confirm the PDS/authorization server the + edge accounts use supports this (vs requiring a public client). +2. **Hosting / registration.** `client_id` must be a publicly reachable HTTPS URL + serving `client-metadata.json`, and `redirect_uris` must match exactly. + Confirm the production base URL and that `/oauth/*` is deployed there. +3. **Scopes / permission sets.** We default to `atproto transition:generic`. + What does the app actually need? We expect a **permission set lexicon** + (e.g. under `app.decodingus.*`) granting read access to the genomic record + collections. Edge team to define those collections + the permission set the + PDS will grant. +4. **Signing key lifecycle.** Persist `OAUTH_EC_KEY`; agree on rotation (JWKS can + publish multiple keys; `kid` already set). +5. **DPoP nonce.** We retry once on a server `DPoP-Nonce`. Confirm the + authorization server's nonce behavior (PAR + token endpoints). +6. **Identity resolution.** Handle→DID uses the HTTPS well-known method only + (DNS `_atproto` TXT is a future add). DID→PDS uses `plc.directory` for + `did:plc` and well-known for `did:web`. Confirm whether edge accounts use + `did:plc` (and a self-hosted PLC, if any) or `did:web`. +7. **Private data / notify-fetch.** Once group-private data lands, the app will + fetch records from the PDS with the access token. Define: which collections, + and the notification mechanism (does Navigator push to us, or do we poll?). + +## What we need from Edge to test end-to-end + +A test PDS + test account (handle + DID) and its authorization-server endpoints, +so `/login/atproto?handle=` can complete the real PAR → redirect → token +flow against it. Everything up to the network handshake is implemented and +unit-tested; the live exchange is the joint step. + +## Local PDS handshake — validated (2026-06) + +Stood up the official PDS in a local container and validated the live +**discovery + PAR** path (the browser redirect + token exchange still needs HTTPS +identity infra — see "remaining" below). + +> **Navigator team:** the shared, client-facing version of this runbook — incl. +> test-account creation and the public/loopback-client gotchas — lives in +> `DUNavigator/documents/atmosphere/13-Local-PDS-Testing.md`. + +### Runbook + +```sh +# 1. Pull + boot the PDS (Apple `container`; each gets its own IP, no port-map). +container image pull ghcr.io/bluesky-social/pds:latest +mkdir -p /tmp/pdsdata/blocks +container run -d --name pds -v /tmp/pdsdata:/pds \ + -e PDS_HOSTNAME=pds.test -e PDS_PORT=3000 \ + -e PDS_JWT_SECRET=$(openssl rand --hex 16) \ + -e PDS_ADMIN_PASSWORD=$(openssl rand --hex 16) \ + -e PDS_PLC_ROTATION_KEY_K256_PRIVATE_KEY_HEX=$(openssl rand --hex 32) \ + -e PDS_DATA_DIRECTORY=/pds -e PDS_BLOBSTORE_DISK_LOCATION=/pds/blocks \ + -e PDS_DID_PLC_URL=https://plc.directory -e PDS_INVITE_REQUIRED=false -e PDS_DEV_MODE=true \ + ghcr.io/bluesky-social/pds:latest +IP=$(container ls | awk '$1=="pds"{print $6}' | cut -d/ -f1) # e.g. 192.168.64.5 + +# 2. Run the gated live handshake test (decodingus-shared). +PDS_TEST_URL=http://$IP:3000 cargo test -p du-atproto --test live_pds -- --nocapture +``` + +Gotchas learned: +- PDS rejects `.local` hostnames; use a `.test` domain. It needs `/pds` to exist + (bind-mount). It serves HTTP on `:3000` and expects TLS termination in front; + the OAuth **issuer is `https://PDS_HOSTNAME`** regardless. +- **DPoP `htu` must be the server's canonical endpoint** (`https://pds.test/oauth/par` + from metadata), NOT the transport URL you connect over (`http://:3000/...`). + Signing the transport URL yields `invalid_dpop_proof: DPoP "htu" mismatch`. +- The PDS issues a **`use_dpop_nonce`** on the first PAR; our single-retry with the + `DPoP-Nonce` response header then returns `201` + a `request_uri`. ✓ + +### What this validates (`crates/du-atproto/tests/live_pds.rs`) + +Authorization-server metadata fetch + parse, the **public (loopback) client** PAR +form, the DPoP proof, and the `use_dpop_nonce` retry — accepted by a real atproto +auth server (`201 Created`, `request_uri` returned). Confirms `token_endpoint_auth_ +methods = [none, private_key_jwt]` and `client_id_metadata_document_supported`. + +### Remaining for the full browser loop (deferred) + +The redirect → consent → `code` → token exchange needs the auth server reachable +over **HTTPS at its canonical host** with a cert our client trusts, because DPoP +`htu` + the issuer are https-canonical. Options: a TLS reverse proxy at +`https://pds.test` (hosts entry + dev CA trusted by reqwest) for a full local loop, +or an HTTPS tunnel to a real account for the confidential-client path. Identity +resolution (handle→DID→PDS) similarly wants HTTPS well-known / a PLC. + +## Full browser loop over TLS — wired + verified to consent (2026-06) + +Closed the gap from "discovery + PAR" to a **real HTTPS handshake** against the +local PDS, using a TLS proxy so the auth server is reachable at its canonical +`https://pds.test` (issuer + DPoP `htu` are https-canonical, so this removes the +http-transport `htu` workaround entirely). + +> **One command:** `make oauth-dev` (Postgres + PDS + Caddy + CA + test account, +> then runs du-web with the env). `make oauth-up` for the stack only, `oauth-down` +> to tear down. Runtime-agnostic (Apple `container` or Docker) — see +> `scripts/oauth-dev.sh`. The sections below are the underlying mechanics it +> automates (and the manual-browser consent step, §6.3-equivalent, still applies). + +### Infra: TLS proxy (Caddy internal CA) + +```sh +PDS_IP=$(container ls | awk '$1=="pds"{print $6}' | cut -d/ -f1) +printf '{\n auto_https disable_redirects\n}\npds.test {\n tls internal\n reverse_proxy %s:3000\n}\n' "$PDS_IP" > /tmp/Caddyfile +container run -d --name caddy -v /tmp/Caddyfile:/etc/caddy/Caddyfile docker.io/library/caddy:2 +CADDY_IP=$(container ls | awk '$1=="caddy"{print $6}' | cut -d/ -f1) +container exec caddy cat /data/caddy/pki/authorities/local/root.crt > /tmp/caddy_ca.crt # the dev CA +``` + +### du-web dev OAuth (public / loopback client) + +`oauth.rs` gained a dev path that avoids the confidential-client's hosted-metadata ++ HTTPS requirement: a **public loopback client** (PKCE, no client assertion), +fixed-PDS (skips handle→DID→PDS resolution), and an HTTP client that trusts the +dev CA and pins `pds.test`→IP (no `/etc/hosts` needed server-side): + +```sh +DATABASE_URL=... APP_SECRET=... PORT=9000 \ +OAUTH_BASE_URL=http://127.0.0.1:9000 \ +DU_OAUTH_DEV_PDS=https://pds.test \ +DU_OAUTH_DEV_CA=/tmp/caddy_ca.crt \ +DU_OAUTH_DEV_RESOLVE=pds.test:$CADDY_IP \ +DU_OAUTH_LOOPBACK=http://127.0.0.1:9000 \ +cargo run -p du-web +``` + +Route: `GET /login/atproto/dev?handle=` → discover → PAR → 303 to +`https://pds.test/oauth/authorize`. The callback (`/oauth/callback`) does the +**public** token exchange (`token_form_public`) → session. + +### Verified (no browser) + +- Discovery + PAR + DPoP + `use_dpop_nonce` retry over **canonical `https://pds.test`** + (dev CA trusted, host pinned) → `request_uri`, then a 303 to the authorize page. +- The authorize page renders (`200`) with our **loopback client accepted** — + `__authorizeData.clientMetadata.redirect_uris = [http://127.0.0.1:9000/oauth/callback]`. + +### Manual browser step (the consent) + +The authorize → sign-in → consent → `code` step is intentionally browser-gated +(requires `Sec-Fetch-*` headers + a minified SPA with CSRF), so it's completed in +a real browser rather than scripted: + +1. Trust `/tmp/caddy_ca.crt` in the browser (or click through the warning) and add + a hosts entry `pds.test → $CADDY_IP` (the browser needs name resolution; the + server side uses `DU_OAUTH_DEV_RESOLVE` instead). +2. Open `http://127.0.0.1:9000/login/atproto/dev?handle=alice.pds.test`. +3. Sign in (e.g. `alice.pds.test` / the account password) and approve. +4. The PDS redirects to `http://127.0.0.1:9000/oauth/callback?code=…&state=…`; + du-web exchanges the code (public flow) and sets the session cookie. + +The token-exchange path is implemented and ready; only the human consent click is +out of band. (A headless completion would mean reproducing the oauth-provider's +sign-in/accept SPA calls — brittle + version-specific; not worth scripting.) + +## Confidential web-client — joint test plan (2026-06) + +The public/loopback client (above) is verified end-to-end against a local PDS up +to consent. The **confidential** web client (`private_key_jwt` + hosted metadata) +adds two things the public flow doesn't exercise: the auth server **fetching + +validating our `client-metadata.json`/`jwks.json`**, and accepting the +**`private_key_jwt`** client assertion. These need our OAuth documents reachable +over HTTPS at the `client_id` host — which a local PDS on Apple `container` can't +resolve (no `--add-host` to point a fake `client_id` host at our box), so this is +the **joint test with the Edge team against a real PDS** (real DNS/HTTPS). + +### Our side — VERIFIED (no joint step needed) + +- `/oauth/client-metadata.json` is spec-correct: `client_id` == the doc URL, + `redirect_uris` https under the same origin, `token_endpoint_auth_method = + private_key_jwt`, `token_endpoint_auth_signing_alg = ES256`, + `dpop_bound_access_tokens = true`, `grant_types = [authorization_code, + refresh_token]`, `response_types = [code]`, `application_type = web`, `jwks_uri` + correct. (Verified live against `OAUTH_BASE_URL=https://decoding-us.com`.) +- `/oauth/jwks.json` is a clean public P-256 JWK (`use=sig`, `alg=ES256`, `kid` = + JWK thumbprint, no private material). +- `client_assertion` JWT, DPoP proof, PKCE, client-metadata shape — all unit-tested + in `du-atproto` (`es256_jws_roundtrips_and_verifies`: `alg=ES256`, `kid` == + thumbprint, `iss`/`sub` == client_id, `aud` == issuer, signature verifies). +- The confidential PAR/token forms + `use_dpop_nonce` single-retry are wired in + `du-web/oauth.rs` (`/login/atproto`); the request mechanics (PAR/DPoP/nonce) are + the same ones already proven live via the public-client path — the only delta is + adding the (unit-tested) `client_assertion`. + +### Prereqs for the joint test + +- **decodingus:** deploy `/oauth/client-metadata.json` + `/oauth/jwks.json` at the + public `client_id` host (`OAUTH_BASE_URL`, e.g. `https://decoding-us.com`); set a + **persisted `OAUTH_EC_KEY`** (base64url 32-byte scalar) so the JWKS `kid` is + stable; `redirect_uris` must exactly match the deployed callback. +- **Edge:** a test PDS + account (handle + DID) and its issuer; confirm the auth + server accepts a confidential client (`token_endpoint_auth_methods` includes + `private_key_jwt` — the reference PDS does) and supports + `client_id_metadata_document` fetch. + +### Steps + +1. **Metadata fetch.** Confirm the PDS can GET our `client_id` doc + `jwks_uri` + over HTTPS (200, `application/json`, `client_id` matches the URL). Watch our + access logs for the fetch. +2. **PAR.** `GET /login/atproto?handle=` → resolve handle→DID→PDS→issuer → + PAR (`private_key_jwt` + DPoP). Expect `use_dpop_nonce` then `201` + `request_uri`. +3. **Authorize + consent.** Redirect to `/oauth/authorize`; sign in + approve in + the browser → redirect to our `/oauth/callback?code&state`. +4. **Token.** Exchange the code (`private_key_jwt` + DPoP + nonce) → access/refresh + tokens; `sub` = the DID. We upsert the user by DID + set the session cookie. +5. **Scopes.** Confirm the granted scope matches what we requested (`atproto …` / + the agreed permission set). + +### Confirm on each side + +| decodingus | Edge / PDS | +|:---|:---| +| metadata + JWKS served at client_id; `kid` stable (persisted key) | PDS fetches + validates the client-metadata document | +| `redirect_uris` exact-match the deployed callback | `private_key_jwt` (ES256) assertion accepted at PAR + token | +| DPoP-nonce single-retry on PAR + token | the auth server's actual `DPoP-Nonce` behavior | +| session established; user upserted by DID | scopes / permission set actually granted | + +See the "Open points to settle with the Edge team" section above for the +still-open decisions (scopes/permission set, key lifecycle, identity resolution). diff --git a/rust/docs/scala-vs-rust-diff.md b/rust/docs/scala-vs-rust-diff.md new file mode 100644 index 00000000..608732cf --- /dev/null +++ b/rust/docs/scala-vs-rust-diff.md @@ -0,0 +1,192 @@ +# Scala (legacy) ↔ Rust (rewrite) — functional difference catalog + +What the legacy Play/Scala app does vs. what the Rust AppView does, by domain. +Derived from a survey of the Scala surface (47 controllers, ~385 routes, ~490 +files, Slick model, Pekko/Quartz jobs) cross-referenced against the Rust +workspace (`rust/`, verified route + schema inventory, 2026-06). + +**Legend:** ✅ ported (equivalent) · 🔁 re-scoped/replaced by design · 🟡 partial +(core ported, sub-features missing) · ⬜ in scope, not yet built · ➖ dropped +(deliberately out of scope) · 🌐 built but externally-gated. + +## TL;DR — the shape of the difference + +The Rust app is the **AppView**: a curated catalog + public read surface + +federated aggregation, with a **narrowed, inverted federation model** and several +legacy subsystems deliberately gone. + +- **Biggest re-scope — federation.** Scala ran a **credential-holding relay**: an + inbound `POST /api/firehose/event` ingesting 11 collection types, a PDS fleet + (register/heartbeat/submissions/config), and an IBD relay (WebSocket) — all + authenticated by holding PDS keys / app passwords. Rust **inverts and narrows** + this: an **outbound Jetstream consumer** mirrors anonymized *summary* records + into `fed.*` reporting tables, plus a single machine-auth **proposal-intake** + endpoint. No inbound firehose, no fleet, no relay. +- **Auth replaced/upgraded.** Scala = app-password login + a bespoke + PDS-signature credential-holding scheme. Rust = proper **AT Protocol OAuth** + (PKCE, DPoP, `private_key_jwt`), app passwords deprecated. +- **Manual sample ingestion dropped.** Scala had hand-entry biosample/donor/ + sequence/publication-link endpoints (standard/external/PGP). Rust drops these — + curators work in Navigator; the AppView keeps catalog **review + naming** only. +- **Subsystems still to build (placeholder tables, no logic yet):** IBD matching, + the social layer, and sequencer-lab inference are **in scope** as AppView + coordination roles: the AppView spots IBD **introduction candidates** across the + federation (coordinates + tracks match state; Edge does the encrypted comparison + + holds raw autosomal); the social layer (messaging/consent/notifications/feed/ + reputation/group projects) underpins that and stands alone; and instrument→lab + inference lets Edge nodes skip a data-entry step. +- **Whole subsystems dropped (out of scope):** patronage/billing only. (STR + profiles were brought back into scope and shipped.) +- **Heavy genomics confirmed edge-only on both sides:** neither app does BAM/CRAM + extraction or variant calling server-side (Navigator/edge does it). + +--- + +## 1. Public HTTP / UI surface + +| Capability (Scala) | Rust | Notes | +|:---|:--|:---| +| Home / nav | ✅ | `/` | +| FAQ, Terms, Privacy, App-password help | ✅ | `/faq /terms /privacy /help/app-password` | +| About | ✅ | Rust-only consolidation of "content" pages | +| Reputation static page | ⬜ | rides the social layer (in scope) — not yet built | +| How-to-submit-tree static page | ➖ | not ported | +| sitemap.xml, robots.txt, health | ✅ | | +| Login / logout | ✅ | session cookie | +| App-password auth | 🔁 | replaced by OAuth; app passwords deprecated | +| Cookie consent | 🟡 | Rust: `POST /cookie-consent` + JS banner. Scala also had `GET /cookies/check`; Rust checks the cookie client-side | +| Profile view + update | ✅ | `/profile` shows account fields; `POST /profile` updates the display name (`du_db::auth::update_display_name`). **Built 2026-06** | +| Language switch | ✅ | `/language/:lang` | +| Y/MT tree — two SVG cladogram render modes (horizontal + vertical) | ✅ | **Rewritten 2026-06** (`du-web/tree_layout.rs` ports `TreeLayoutService`): server-computed inline SVG, breadcrumb re-root, `?orient=h\|v` toggle persisted to `tree_orient` cookie, search-by-name-or-variant, backbone/recent node coloring + legend, fixed depth window (re-root descends) replacing the legacy backbone-collapse. `/ytree /mtree`, HTMX `#tree-container` fragment, `/api/v1/{y,mt}-tree` | +| SNP sidebar fragment | ✅ | `GET /{y,mt}tree/snp/:name` → HTMX `#snpSidebar`; lists defining variants (name/type/aliases/coords) | +| Variant browser + fragments + by-id + by-haplogroup API | ✅ | | +| Variant export | 🔁 | Scala: daily **gzipped JSONL** file artifact + metadata. Rust: **live CSV** stream `/api/v1/variants/export` + `/export/metadata` | +| References/publications list + API | ✅ | | +| Public "submit publication" DOI form | ✅ | **Built 2026-06** `GET/POST /references/submit` (`references.rs`): resolves the DOI via OpenAlex and queues a pending `publication_candidate` for curator review (never a published reference directly) — feeds `/curator/publications`. reCAPTCHA-guarded when configured | +| Biosample map (PostGIS) + geo-data + studies API | ✅ | | +| Coverage benchmarks UI + API | ✅ | | +| Coverage per-lab list + lab-benchmark fragments | ✅ | **Built 2026-06** `/coverage/labs` (two-panel: labs list + per-lab test-type fragment), alongside the flat `/coverage-benchmarks` | +| Genome-regions public API (builds + by-build) | ✅ | | +| Contact form | ✅ | `/contact` → `support.contact_message` (+ reCAPTCHA when configured) | +| My-messages (user threads + badge) | ⬜ | social layer is **in scope** (supports IBD coordination + standalone) — not yet built | +| Sequencer lab-by-instrument lookup API | ⬜ | **in scope** — lets Edge auto-resolve the lab (skip a data-entry step); not yet built | +| Inbound firehose event endpoint | 🔁 | see Federation — replaced by outbound Jetstream consume | +| PDS registration endpoint | ➖ | fleet model dropped | +| IBD discovery/introduction + consent endpoints | ⬜ | **in scope** (AppView coordinates candidates + dual-consent + match-state); the WebSocket *relay* stays Edge-to-Edge | +| Legacy project CRUD endpoints | ➖ | were already deprecated in Scala | +| OpenAPI / Swagger UI | ✅ | Rust documents the **public read API only**; mgmt/curation deliberately excluded | +| **Federated population reports** | ➕ | **Rust-new:** `/api/v1/reports/{coverage,ancestry,haplogroups}` over the `fed.*` mirror | + +## 2. Curator / admin surface + +| Capability (Scala) | Rust | Notes | +|:---|:--|:---| +| Change-set lifecycle (list/detail/start-review/apply/discard/approve-all/per-change review/comments/diff) | ✅ | JSON `/manage/change-sets/*` + HTMX UI `/curator/change-sets` | +| Change-set conflict resolution (resolve reparent/edit-variants/merge-existing/defer, deferred list, tree-preview, ambiguity report) | ✅ | **Built 2026-06** (`du-db/wip.rs` + `du-web/routes/reviews.rs`, `/curator/reviews`): SNP-graft Phase-4 flags + name-collisions + graft-blocked items are staged into the `tree.wip_*` tables (`tree-init --stage-review`); a two-panel HTMX screen shows each with SNP-scatter context + tree-preview and a resolution form (accept-anchor / reparent / merge-existing / defer); decisions (`wip_resolution`) are enacted by the change-set apply engine's WIP pass. Remaining: `EDIT_VARIANTS` resolution + cascading a graft-blocked *subtree* from one decision | +| Haplogroup merge (full + preview) | ✅ | `/manage/haplogroups/merge[/preview]` | +| Haplogroup merge — explicit subtree endpoint | 🟡 | Rust's merge algorithm is subtree-scoped by design; no separate `/merge/subtree` route | +| Haplogroup CRUD | ✅ | `/curator/haplogroups/*` | +| Haplogroup restructure (split / merge-into-parent / reparent as discrete ops) | ✅ | **Built 2026-06** (`du_db::haplogroup` reparent/merge_into_parent/split + `/curator/haplogroups/:id/{reparent,merge,split}`): direct temporal-model edits from the haplogroup detail panel, with cycle/name/root guards. (Bulk change-set authoring still available for batch work.) | +| Variant CRUD | ✅ | `/curator/variants/*` | +| Haplogroup↔variant associate/remove | ✅ | curator | +| Haplogroup↔variant association history | 🟡 | `ident.audit_log` exists (mig 0010); no per-association history route | +| Genome-region curation (CRUD UI) | ✅ | `/curator/regions/*` | +| Genome-region management API (+ bulk + bootstrap-from-CHM13) | ⬜ | Rust does region ingestion via jobs/ETL (du-bio), not a curator API | +| Genomics admin manual triggers (YBrowse/HipSTR/regions bootstrap) | 🔁 | Rust runs **YBrowse ingest as a scheduled job**; no manual admin trigger endpoints; HipSTR not ported | +| Curation/discovery proposals — intake → review → name → promote (proposed branches) | ✅ | `/manage/curation/proposals` (X-API-Key) → `/curator/proposals` review/promote → `tree.proposed_branch` → catalog | +| Publication-candidate review UI (accept/reject/defer/bulk) | ✅ | **Built 2026-06** (`du-db/publication.rs` candidate fns + `du-web/routes/publications.rs`, `/curator/publications`): status-filtered queue + review panel (title/journal/date/DOI/abstract/relevance) with Accept (promote → `pubs.publication`) / Reject / Defer. Single-item; **bulk** actions not yet built | +| Sequencing-lab admin CRUD | ⬜ | lab-inference **in scope** — not yet built | +| Instrument/sequencer proposals review | ⬜ | lab-inference **in scope** (consensus from observations → curator) | +| Support admin (message triage/reply/status) | ⬜ | rides the social/messaging layer (**in scope**) | +| Biosample original-haplogroup assignment (per-publication) | ➖ | manual-ingestion concern → Navigator | +| Curator dashboard | ✅ | `/curator` | + +## 3. Federation / AT Protocol / identity + +| Capability (Scala) | Rust | Notes | +|:---|:--|:---| +| Inbound firehose `POST /api/firehose/event` (11 collection handlers, credential-holding) | 🔁 | **Replaced** by an outbound **Jetstream consumer** mirroring published *summary* records into `fed.*` (alignment/biosample/sequencerun/project/workspace/genotype/populationBreakdown/haplogroupReconciliation) | +| Branch-discovery harvest of `privateVariants` from ingested biosamples | 🔁 | replaced by the **proposal-intake** API (Navigator submits → pool/consensus → curator) | +| instrumentObservation / matchConsent / matchRequest / groupProject / projectMembership | ⬜ | lab-inference / IBD / social are **in scope**; ingested via Jetstream/coordination (not the old inbound firehose) | +| PDS registration + fleet (heartbeat, submissions, config, node removal) | ➖ | the credential-holding fleet is the dropped network-mirror; `fed.pds_*` tables exist but are unused | +| IBD matching: discovery/suggestions, requests, consent, match-state | ⬜ | **in scope** — AppView coordinates + tracks match state (`ibd` schema 0007); Edge does the comparison + holds raw autosomal (no relay in AppView) | +| Auth: app-password login + PDS-signature (Ed25519/P-256) credential-holding verification | 🔁 | **Replaced** by AT Protocol **OAuth** — PKCE(S256), DPoP, `private_key_jwt` confidential client + public/loopback client; served `client-metadata.json`/`jwks.json` | +| AT Proto OAuth (auth-server/client metadata models only, endpoints unimplemented) | 🌐 | Rust **implements** the handshake; verified live to consent against a local PDS; confidential round-trip is the Edge joint test | +| DID/handle resolution (DNS+well-known, did:plc/did:web), PDS discovery | ✅ | `du-atproto` | +| Patronage / billing API (subscriptions, tiers, Stripe/PayPal) | ➖ | not in production (`billing` placeholder) | + +## 4. Sample ingestion / donors / sequencing / genomics + +| Capability (Scala) | Rust | Notes | +|:---|:--|:---| +| Biosample create/update — standard, external/citizen, PGP (manual) | ➖ | manual sample-entry APIs dropped — curators use Navigator | +| Sequence-data + file-metadata linking, publication linking, haplogroup assignment (manual) | ➖ | dropped (manual ingestion) | +| Specimen-donor merge (conflict strategies) | ➖ | manual ingestion concern | +| Sequencer↔lab association + proposals | ⬜ | lab-inference **in scope** | +| Projects (controller scaffolded/empty in Scala) | 🟡 | Rust mirrors `project` as a read-only `fed.*` reporting row; no project management | +| YBrowse Y-SNP ingest (GFF3 parse, normalize, **liftover** to GRCh38/GRCh37/hs1) | ✅ | **Reworked 2026-06 to the `snps_hg38.gff3`** — the central doc authorities flow through. `du-jobs/ybrowse` streams the GFF3 (~3M lines), lifts GRCh38→GRCh37/hs1 (`du-bio` chains), and writes the **`source.ybrowse_snp` mirror** (verbatim); `du_db::ybrowse::reconcile` then *derives* `core.variant` — folding synonyms (~339k physical SNPs have ≥2 names) into one row each, capturing authority metadata into `evidence` (mig 0017/0018), **idempotently and without clobbering curator decisions** (full-snapshot source has no deltas). `YBROWSE_GFF` env. (Old GRCh38-VCF + direct-upsert paths retired.) | +| HipSTR STR ingest + liftover | ➖ | STR subsystem not in production | +| Genome-region bootstrap from S3/CHM13 + liftover | 🟡 | du-bio has the liftover; the S3-bootstrap pipeline isn't ported (regions seeded via migrations/ETL) | +| BAM/CRAM extraction, coverage compute, variant calling | ➖ (both) | edge-only on both sides — Navigator does it; AppView aggregates summaries | +| ENA study-metadata client + enrichment | ✅ | `du-external::ena` + `ena-study-enrichment` job (Scala fetched ENA via the submit form) | +| NCBI/PubMed metadata client + enrichment | ✅ | `du-external::ncbi` + `publication-pubmed-update` job | + +## 5. Data model / schema + +Rust schema (migrations `0001–0012`): `core, tree, genomics, pubs, ident, fed, +ibd, social, billing` + audit + coverage-mirror + fed-reporting. + +| Scala entity area | Rust | Notes | +|:---|:--|:---| +| Variants, haplogroups, relationships, anchors, change-sets, tree_change, wip_*, proposed-branch | ✅ | full catalog + versioning + merge | +| Biosamples, donors, callable-loci, variant-calls | ✅ | catalog side (ETL-loaded; no manual create) | +| Genomics: sequencing, alignment, coverage, test-types, populations | ✅ | | +| **Pangenome** (graph/node/path/variant-link/coverage) | 🟡 | tables + ETL present (mig 0004 / `du-migrate`); **no surfaced API/UI** (same as Scala — modeled, thin surface) | +| **STR profiles + signatures + prediction + age** | ✅ | **Brought into scope 2026-06.** P1: `fed.str_profile` mirror (Jetstream), `du-db::ystr` modal aggregation → `tree.haplogroup_ancestral_str`, recompute job, `GET …/str-signature`. P2: `ystr::predict` (genetic distance) at `POST /api/v1/str/predict` + STR→WGS nudge. **STR age** (`StrAgeService` analog, McDonald 2021): `ystr::compute_str_age` → `tree.haplogroup_age_estimate` (`STR_VARIANCE`), `GET …/age` — a contributing factor, not authoritative `tmrca_ybp`. `genomics.str_mutation_rate` table present (ships empty; default rate until imported). **Combined-age framework DONE** (`du-db::age`): inverse-variance Gaussian combine of STR + **SNP-Poisson** (`t=Σm/(µ·Σb)`) + **genealogical/aDNA anchor** terms → `COMBINED` estimate, gap-fills `tmrca_ybp` (curated values preserved). SNP/anchor terms data-gated. Remaining: `formed_ybp` + aDNA-calibration refinement ⬜ | +| Publications, studies, candidates, search configs | ✅ | | +| ident: users, roles, permissions, login-info, pds-info, cookie-consent | ✅ | + `audit_log` | +| federation: `pds_node/heartbeat/fleet_config/submission` | 🟡 | tables exist (mig 0008) but **unused** (fleet dropped); `fed.coverage_summary` + `fed.*` reporting tables are the live federation store | +| social: messages, conversations, feed, blocks, reputation | ⬜ | **in scope** — placeholder tables (mig 0009); logic/endpoints to build | +| group projects + membership/policies | ⬜ | **in scope** (part of the social layer) — to build | +| billing: patron subscriptions | ➖ | placeholder; no logic | +| IBD: suggestions, discovery-index, attestations, overlap scores | ⬜ | **in scope** — placeholder (mig 0007); logic to build | +| support: contact messages | ✅ | `support.contact_message` + `du-db::support` | + +## 6. Scheduled jobs + +| Scala job | Rust | Notes | +|:---|:--|:---| +| PublicationUpdater (OpenAlex, bi-weekly) | ✅ | `publication-update` | +| PublicationDiscovery (OpenAlex, weekly) | ✅ | `publication-discovery` (creates candidates; review UI at `/curator/publications`) | +| YBrowseVariantUpdate (weekly) | ✅ | `ybrowse-variant-ingest` | +| VariantExport (daily gzipped JSONL) | 🔁 | replaced by the live CSV endpoint; no file-artifact job | +| MatchDiscovery (daily IBD candidate scan) | ⬜ | **in scope** — IBD introduction-candidate discovery job, to build | +| — | ➕ | **Rust-new:** `ena-study-enrichment`, `publication-pubmed-update`, `db-heartbeat`, the **Jetstream coverage/reporting-mirror consumer** | + +## Net summary + +- **Equivalent or improved:** catalog (variants/haplogroups/tree) + versioning + + merge, public read surface + JSON API, coverage/maps/references, OpenAlex/ENA/ + NCBI enrichment, OAuth (upgraded), curation proposal flow, curator change-set + review UI. +- **Re-scoped by design:** federation (inbound relay → outbound summary mirror + + proposal intake), auth (app-password/PDS-signature → OAuth), variant export + (file → live CSV), genomics ingest triggers (manual → scheduled). +- **Dropped (out of scope):** manual sample ingestion, patronage/billing, PDS + fleet, AppView→PDS backfeed, HipSTR ingest, server-side BAM/CRAM. (STR profiles + were **brought back into scope** 2026-06 — Phase 1/2 shipped.) +- **In scope, not yet built — coordination subsystems (placeholder schemas):** + **IBD matching** (AppView discovers introduction candidates + dual-consent + + match-state tracking; Edge does the comparison), the **social layer** + (messaging/consent/notifications/feed/reputation/group projects — underpins IBD + coordination + standalone), **sequencer-lab inference** (instrument→lab lookup + + consensus discovery, so Edge skips a data-entry step), **haplogroup-discovery + automation**, and **multi-test-type completion**. Plus the **region management + API + bootstrap-from-CHM13** (region CRUD UI already exists). (Built 2026-06: + change-set conflict-resolution UI + `wip_*` staging — §2 `/curator/reviews`; + publication-candidate review UI — §2 `/curator/publications`; public DOI-submit + form — §1 `/references/submit`; profile update — §1 `POST /profile`; haplogroup + restructure ops — §2 `/curator/haplogroups`; per-lab coverage drill-down — §1 + `/coverage/labs`.) +- **Externally gated:** confidential-OAuth Edge joint test; current-schema dump + for ETL cutover (see STATUS "Cutover blocker"). diff --git a/rust/locales/en.txt b/rust/locales/en.txt new file mode 100644 index 00000000..0c28cfc7 --- /dev/null +++ b/rust/locales/en.txt @@ -0,0 +1,491 @@ +# English message catalog. key=value, '#' comments. Ported subset (slice scope). +app.name=Decoding Us +nav.home=Home +nav.ytree=Y-DNA Tree +nav.mtree=mtDNA Tree +nav.variants=Variants +nav.references=References +nav.map=Map +nav.coverage=Coverage +nav.curator=Curator +nav.tools=Tools +nav.profile=Profile +nav.api=API +nav.login=Login +nav.logout=Logout +footer.copyright=© DecodingUs +lang.label=Language +lang.en=English +lang.es=Español +lang.fr=Français + +home.title=Decoding Us — genetic genealogy & population research +home.heading=Decoding Us +home.lead=A collaborative platform for genetic genealogy and population research — Y/mtDNA haplogroup trees, a public variant browser, and privacy-preserving relative discovery. +home.cta.tree=Browse the Y-DNA tree +home.cta.variants=Search variants + +variants.title=Variant Browser +variants.search.placeholder=Search by name or alias (e.g. M269, rs9786153)… +variants.col.name=Name +variants.col.type=Type +variants.col.status=Status +variants.col.builds=Builds +variants.none=No variants match. +variants.detail.select=Select a variant to see details. +variants.detail.naming=Naming status: +variants.detail.aka=Also known as: +variants.detail.rsids=rs IDs: +variants.detail.coordinates=Coordinates +variants.detail.nocoords=No mapped coordinates. +variants.col.build=Build +variants.col.contig=Contig +variants.col.position=Position +variants.col.change=Change + +pagination.previous=Previous +pagination.next=Next +pagination.page=Page +pagination.of=of +pagination.total=total + +tree.title.y=Y-DNA Tree +tree.title.mt=mtDNA Tree +tree.loading=Loading tree… +tree.allRoots=← All roots +tree.rootLineages=Root lineages +tree.noChildren=No child haplogroups. +tree.formed=formed +tree.search.placeholder=Haplogroup or variant… +tree.search.button=Go +tree.orientation=Orientation +tree.depth=Depth +tree.horizontal=Horizontal +tree.vertical=Vertical +tree.legend.backbone=Backbone +tree.legend.recent=Recently updated +tree.legend.default=Other +tree.variants=variants +tree.samples=samples +tree.samples.title=Placed samples +tree.samples.more=more +tree.snp.none=No defining variants recorded. +tree.snp.aliases=aka +tree.snp.recurrent=recurrent +tree.snp.recurrent.help=This SNP also occurs on other branches (homoplasy). +tree.snp.backMutation=back-mutation +tree.snp.backMutation.help=This branch reverted to the ancestral state. +tree.snp.close=Close +tree.prov.title=Provenance +tree.prov.source=Source +tree.prov.aka=Also known as +tree.prov.updated=Updated + +references.title=References +references.search.placeholder=Search by title, journal, or DOI… +references.col.title=Title +references.col.journal=Journal +references.col.year=Year +references.col.citations=Citations +references.none=No publications match. +references.select=Select a publication to see its samples. +references.viewDoi=DOI +references.biosamples.title=Samples in this study +references.biosamples.none=No samples linked to this publication. +references.col.accession=Accession +references.col.alias=Alias +references.col.source=Source +references.col.description=Description + +map.title=Biosample Map +map.samples=samples +map.loading=Loading map… + +coverage.title=Coverage Benchmarks +coverage.col.lab=Lab +coverage.col.testType=Test type +coverage.col.libraries=Libraries +coverage.col.meanDepth=Mean depth +coverage.col.cov10x=Coverage ≥10× +coverage.col.expectedDepth=Expected +coverage.none=No coverage data yet. + +auth.login.title=Sign in +auth.login.handle=Handle or email +auth.login.password=Password +auth.login.submit=Sign in +auth.login.error=Invalid handle or password. + +curator.title=Curator Dashboard +curator.welcome=Signed in as +curator.roles=Roles +curator.tool.haplogroups=Haplogroups +curator.tool.variants=Variants +curator.tool.regions=Genome regions +curator.tool.proposals=Proposals + +prop.title=Curation Proposals +prop.filter.all=All statuses +prop.col.name=Proposed name +prop.col.parent=Parent +prop.col.status=Status +prop.col.evidence=Evidence +prop.col.submitters=Submitters +prop.col.confidence=Confidence +prop.none=No proposals match. +prop.select=Select a proposal to review. +prop.evidence=Evidence +prop.col.dna=DNA +prop.variants=Defining variants +prop.var.name=Variant +prop.var.support=Support +prop.split.note=The engine flagged a diverging submitter — this branch may need to be split. Review the supporting variants before deciding. +prop.notes=Notes (optional) +prop.approve=Approve +prop.reject=Reject +prop.defer=Defer +prop.promote=Promote to catalog + +hg.title=Haplogroups +hg.new=New haplogroup +hg.search=Search by name… +hg.filter.all=All lineages +hg.col.name=Name +hg.col.type=Lineage +hg.col.lineage=Path +hg.none=No haplogroups match. +hg.select=Select a haplogroup, or create one. +hg.field.name=Name +hg.field.type=Lineage +hg.field.lineage=Path +hg.field.source=Source +hg.field.formed=Formed (ybp) +hg.field.tmrca=TMRCA (ybp) +hg.save=Save +hg.cancel=Cancel +hg.edit=Edit +hg.delete=Delete +hg.delete.confirm=Delete this haplogroup? +hg.deleteBlocked=Cannot delete: it still has tree relationships. + +var.title=Variants +var.new=New variant +var.search=Search by name or alias… +var.col.name=Name +var.col.type=Type +var.col.status=Status +var.none=No variants match. +var.select=Select a variant, or create one. +var.field.name=Canonical name +var.field.type=Mutation type +var.field.status=Naming status +var.field.commonNames=Common names (comma-separated) +var.field.rsIds=rs IDs (comma-separated) +var.field.builds=Coordinate builds +var.save=Save +var.cancel=Cancel +var.edit=Edit +var.delete=Delete +var.delete.confirm=Delete this variant? +var.deleteBlocked=Cannot delete: it defines a haplogroup. + +region.title=Genome Regions +region.new=New region +region.search=Search by name or type… +region.col.type=Type +region.col.name=Name +region.col.builds=Builds +region.none=No regions match. +region.select=Select a region, or create one. +region.field.type=Region type +region.field.name=Name +region.field.coordinates=Coordinates (JSON) +region.field.properties=Properties (JSON) +region.save=Save +region.cancel=Cancel +region.edit=Edit +region.delete=Delete +region.delete.confirm=Delete this region? + +# Change-set / merge review +curator.tool.change_sets=Change sets +cs.title=Change-Set Review +cs.filter.all=All statuses +cs.col.source=Source +cs.col.type=Type +cs.col.status=Status +cs.col.changes=Changes +cs.col.created=Created +cs.none=No change sets match. +cs.select=Select a change set to review. +cs.finalized=Finalized by +cs.diff.added=added +cs.diff.removed=removed +cs.diff.modified=modified +cs.diff.reparented=reparented +cs.changes=Changes +cs.values=values +cs.changes.none=No changes. +cs.approve=Approve +cs.reject=Reject +cs.start_review=Start review +cs.approve_all=Approve all +cs.apply=Apply +cs.apply.confirm=Apply approved changes to the live tree? +cs.discard=Discard +cs.discard.confirm=Discard this change set? +cs.comments=Comments +cs.comments.none=No comments yet. +cs.comment.placeholder=Add a comment… +cs.comment.add=Add +cs.notice.no_transition=No state change (not allowed in the current status). +cs.notice.approved=changes approved. +cs.notice.applied=Applied +cs.diff.detail=Diff detail + +# Secondary pages + cookie consent +nav.about=About +page.about.title=About +page.faq.title=FAQ +page.terms.title=Terms of Service +page.privacy.title=Privacy Policy +page.cookies.title=Cookie Policy +page.reputation.title=Reputation +page.updated=Last updated June 2026. +consent.message=We use essential cookies to sign you in, and non-essential cookies only with your consent. See our +consent.accept=Accept +consent.decline=Decline + +# Profile + contact +profile.title=Your Profile +profile.name=Display name +profile.roles=Roles +profile.handle=Handle +profile.email=Email +profile.member_since=Member since +contact.title=Contact +contact.name=Name +contact.email=Email +contact.subject=Subject +contact.message=Message +contact.send=Send +contact.sent=Thanks — your message has been sent. +contact.error.empty=Please enter a message. +contact.error.captcha=Captcha verification failed. Please try again. + +# Curator merge-review (curator/reviews) + +# Curator publication-candidate review (curator/publications) +curator.tool.publications=Publication candidates +pc.title=Publication Candidates +pc.intro=Papers surfaced by the OpenAlex discovery job, awaiting editorial review. +pc.filter.pending=Pending +pc.filter.accepted=Accepted +pc.filter.rejected=Rejected +pc.filter.deferred=Deferred +pc.filter.all=All +pc.select=Select a candidate to review. +pc.none=No candidates match. +pc.col.title=Title / journal +pc.col.date=Date +pc.col.status=Status +pc.col.relevance=Score +pc.journal=Journal +pc.date=Published +pc.relevance=Relevance +pc.abstract=Abstract +pc.accept=Accept +pc.accept.confirm=Promote this candidate to a published reference? +pc.reject=Reject +pc.defer=Defer +pc.accepted_note=Accepted — promoted to the references catalog. +pc.notice.accepted=Accepted and promoted to references +pc.notice.rejected=Rejected. +pc.notice.deferred=Deferred for later. + +# Public DOI-submit form (references/submit) +submit.cta=Suggest a paper +submit.title=Suggest a Publication +submit.intro=Know a relevant Y/mtDNA or population-genetics paper we're missing? Submit its DOI and a curator will review it. +submit.doi=DOI +submit.doi.help=Paste the DOI (or a doi.org link). We look it up in OpenAlex. +submit.button=Submit for review +submit.back=Back to references +submit.queued=Thanks — queued for curator review: +submit.error.empty=Please enter a DOI. +submit.error.captcha=Captcha verification failed. Please try again. +submit.error.exists=That paper is already in our references. +submit.error.notfound=We couldn't find that DOI in OpenAlex. Check it and try again. +submit.error.lookup=Lookup failed — please try again later. +profile.save=Save +profile.saved=Profile updated. + +# Curator haplogroup restructure ops +hg.parent=Parent +hg.restructure=Restructure (reparent / merge / split) +hg.reparent=Reparent +hg.reparent.placeholder=New parent name +hg.merge=Merge into parent +hg.merge.confirm=Merge this node into its parent? Its children move up and its variants fold in; the node is removed. +hg.split=Split off +hg.split.name=New child name +hg.split.move=Variants to move (comma-separated) +hg.split.variants=Defining variants +hg.op.unknown=Unknown node +hg.op.no_variants=None of those variant names are on this node. + +# Per-lab coverage drill-down +coverage.labs.byLab=View by lab +coverage.labs.title=Coverage by Lab +coverage.labs.allLabs=All labs (flat) +coverage.labs.testTypes=Test types +coverage.labs.select=Select a lab to see its coverage by test type. + +# Curator variant naming authority (curator/naming) +curator.tool.naming=Variant naming +nm.title=Variant Naming Authority +nm.intro=Assign official DU names to unnamed or flagged variants. Reuse an established name where one exists; otherwise mint the next DU identifier. +nm.mode.needs=Needs a name +nm.mode.pending=Pending review +nm.mode.named=Named (DU) +nm.mode.backlog=Unnamed backlog +nm.mode.all=All +nm.select=Select a variant to name. +nm.none=No variants match. +nm.unnamed=(unnamed) +nm.col.name=Name +nm.col.coord=GRCh38 +nm.col.defines=Defines +nm.col.status=Status +nm.coord=GRCh38 coordinate +nm.type=Mutation type +nm.defines=Defines branch +nm.aliases=Known as +nm.dedup.warn=A named variant already exists at this coordinate: +nm.dedup.hint=Consider reusing it (add as an alias on the branch) instead of minting a new DU name. +nm.assign=Assign DU name +nm.assign.confirm=Mint the next DU identifier for this variant and publish it as NAMED? +nm.flag=Flag for review +nm.unflag=Back to unnamed +nm.named_note=Already named — DU identifier assigned. +nm.notice.minted=Minted + +# Curator YBrowse reconcile-flag review (curator/reconcile-flags) +curator.tool.reconcile_flags=Reconcile flags +rf.title=Reconcile Flags +rf.intro=YBrowse synonym clusters whose names are split across multiple catalog variants — pick the one to keep and merge the rest. +rf.select=Select a flag to resolve. +rf.none=No reconcile flags — nothing to resolve. +rf.col.locus=Locus +rf.col.names=Synonyms +rf.col.variants=Variants +rf.synonyms=YBrowse synonyms +rf.split_note=These names are one physical SNP per YBrowse, but the catalog has them split across the variants below. Choose the canonical variant to keep; the others merge into it (links, aliases) and are removed. +rf.keep=Keep +rf.col.canonical=Canonical +rf.col.defines=Defines branches +rf.merge=Merge +rf.merge.confirm=Merge the other variants into the selected one? Their tree links and aliases move to it and they are deleted. +rf.merge.hint=The unselected variants are folded into the keeper. +rf.notice.merged=Merged variants: +rf.resolved=Resolved +rf.resolved.note=This flag has been resolved. + +# Curator sequencer-lab proposal review (curator/instrument-proposals) +curator.tool.instrument_proposals=Sequencer-lab proposals +ip.title=Sequencer-Lab Proposals +ip.intro=Inferred instrument→lab associations from federated sequencing runs. Accept to set the lab the public lookup resolves, or reject to suppress it. +ip.select=Select a proposal to review. +ip.none=No proposals. +ip.col.instrument=Instrument +ip.col.lab=Proposed lab +ip.col.support=Obs / citizens +ip.col.confidence=Confidence +ip.col.status=Status +ip.col.platform=Platform +ip.col.model=Model +ip.col.citizen=Citizen +ip.filter.all=All +ip.filter.ready=Ready +ip.filter.pending=Pending +ip.filter.accepted=Accepted +ip.filter.rejected=Rejected +ip.obs=Observations +ip.citizens=Distinct citizens +ip.observations=Supporting observations +ip.lab_name=Lab name +ip.manufacturer=Manufacturer +ip.model=Model +ip.is_d2c=Direct-to-consumer (D2C) lab +ip.reason=Reason (optional) +ip.accept=Accept +ip.reject=Reject +ip.accept.confirm=Set this lab for the instrument? This resolves the public lookup. +ip.reject.confirm=Reject this association? The dominant lab will not be re-proposed for this instrument. +ip.accept.hint=Accepting sets the instrument's lab. +ip.notice.accepted=Accepted — lab set: +ip.notice.rejected=Proposal rejected. +ip.resolved=Resolved +ip.resolved.note=This proposal has been resolved. + +# Per-sample public report (samples/report.html) +sample.identity.title=Sample +sample.field.accession=Accession +sample.field.alias=Alias +sample.field.sex=Sex +sample.field.center=Center +sample.field.description=Description +sample.field.publications=Source publications +sample.ydna.title=Y-DNA pathway +sample.mtdna.title=mtDNA pathway +sample.pathway.noCall=No haplogroup call for this sample. +sample.pathway.unplaced=Called haplogroup is not yet placed in the tree. +sample.recon.consensus=Cross-technology consensus +sample.recon.runs=runs +sample.recon.confidence=confidence +sample.recon.concordance=concordance +sample.ybp=ybp +sample.map.title=Origin +sample.map.none=No geographic coordinate recorded. +sample.seq.title=Sequencing & coverage +sample.seq.platform=Platform +sample.seq.instrument=Instrument +sample.seq.testType=Test type +sample.seq.layout=Layout +sample.seq.reads=Reads +sample.seq.readLength=Read length +sample.seq.none=No sequencing data available. +sample.cov.build=Build +sample.cov.aligner=Aligner +sample.cov.testtype=Test type +sample.cov.mean=Mean depth +sample.cov.expected=Expected +sample.cov.cohort=cohort +sample.cov.10x=10x % +sample.cov.20x=20x % +sample.cov.30x=30x % +sample.cov.conformance=Vs. spec +sample.cov.below=Below spec +sample.cov.at=Meets spec +sample.cov.above=Above spec +sample.ancestry.title=Ancestry +sample.ancestry.none=No ancestry breakdown available. +sample.future.note=Predicted phenotype and genetic-distance metrics are not yet available. +sample.curator.visibility=Public visibility (curator) +sample.curator.public=Public +sample.curator.private=Private +sample.federated=Federated +curator.tool.denovo_conflicts=De-novo conflicts +dc.title=De-novo Conflicts +dc.intro=Reference clades (ISOGG / PhyloTree) whose de-novo placement disagrees — foreign tips inside the clade's home node, or clade members scattered elsewhere. Worst first. +dc.none=No conflicts. +dc.filter.all=All +dc.filter.y=Y-DNA +dc.filter.mt=mtDNA +dc.col.lineage=Lineage +dc.col.clade=Reference clade +dc.col.tips=Tips +dc.col.magnitude=Magnitude +dc.col.home=Home node +dc.col.foreign=Foreign in +dc.col.away=Members away diff --git a/rust/locales/es.txt b/rust/locales/es.txt new file mode 100644 index 00000000..f0ec1bc4 --- /dev/null +++ b/rust/locales/es.txt @@ -0,0 +1,479 @@ +# Spanish message catalog. +app.name=Decoding Us +nav.home=Inicio +nav.ytree=Árbol Y-ADN +nav.mtree=Árbol ADNmt +nav.variants=Variantes +nav.references=Referencias +nav.map=Mapa +nav.coverage=Cobertura +nav.curator=Curador +nav.tools=Herramientas +nav.profile=Perfil +nav.api=API +nav.login=Iniciar sesión +nav.logout=Cerrar sesión +footer.copyright=© DecodingUs +lang.label=Idioma +lang.en=English +lang.es=Español +lang.fr=Français + +home.title=Decoding Us — genealogía genética e investigación de poblaciones +home.heading=Decoding Us +home.lead=Una plataforma colaborativa para la genealogía genética y la investigación de poblaciones: árboles de haplogrupos Y/ADNmt, un explorador público de variantes y descubrimiento de parientes que preserva la privacidad. +home.cta.tree=Explorar el árbol Y-ADN +home.cta.variants=Buscar variantes + +variants.title=Explorador de variantes +variants.search.placeholder=Buscar por nombre o alias (p. ej. M269, rs9786153)… +variants.col.name=Nombre +variants.col.type=Tipo +variants.col.status=Estado +variants.col.builds=Ensamblajes +variants.none=No hay variantes coincidentes. +variants.detail.select=Seleccione una variante para ver los detalles. +variants.detail.naming=Estado de nomenclatura: +variants.detail.aka=También conocido como: +variants.detail.rsids=Identificadores rs: +variants.detail.coordinates=Coordenadas +variants.detail.nocoords=Sin coordenadas asignadas. +variants.col.build=Ensamblaje +variants.col.contig=Contig +variants.col.position=Posición +variants.col.change=Cambio + +pagination.previous=Anterior +pagination.next=Siguiente +pagination.page=Página +pagination.of=de +pagination.total=en total + +tree.title.y=Árbol Y-ADN +tree.title.mt=Árbol ADNmt +tree.loading=Cargando el árbol… +tree.allRoots=← Todas las raíces +tree.rootLineages=Linajes raíz +tree.noChildren=Sin haplogrupos descendientes. +tree.formed=formado +tree.search.placeholder=Haplogrupo o variante… +tree.search.button=Ir +tree.orientation=Orientación +tree.depth=Profundidad +tree.horizontal=Horizontal +tree.vertical=Vertical +tree.legend.backbone=Tronco principal +tree.legend.recent=Actualizado recientemente +tree.legend.default=Otro +tree.variants=variantes +tree.samples=muestras +tree.samples.title=Muestras ubicadas +tree.samples.more=más +tree.snp.none=No hay variantes definitorias registradas. +tree.snp.aliases=alias +tree.snp.recurrent=recurrente +tree.snp.recurrent.help=Este SNP también aparece en otras ramas (homoplasia). +tree.snp.backMutation=retromutación +tree.snp.backMutation.help=Esta rama revirtió al estado ancestral. +tree.snp.close=Cerrar +tree.prov.title=Procedencia +tree.prov.source=Origen +tree.prov.aka=También conocido como +tree.prov.updated=Actualizado + +references.title=Referencias +references.search.placeholder=Buscar por título, revista o DOI… +references.col.title=Título +references.col.journal=Revista +references.col.year=Año +references.col.citations=Citas +references.none=No hay publicaciones coincidentes. +references.select=Seleccione una publicación para ver sus muestras. +references.viewDoi=DOI +references.biosamples.title=Muestras de este estudio +references.biosamples.none=No hay muestras vinculadas a esta publicación. +references.col.accession=Número de acceso +references.col.alias=Alias +references.col.source=Origen +references.col.description=Descripción + +map.title=Mapa de muestras +map.samples=muestras +map.loading=Cargando el mapa… + +coverage.title=Referencias de cobertura +coverage.col.lab=Laboratorio +coverage.col.testType=Tipo de prueba +coverage.col.libraries=Bibliotecas +coverage.col.meanDepth=Profundidad media +coverage.col.cov10x=Cobertura ≥10× +coverage.col.expectedDepth=Esperada +coverage.none=Aún no hay datos de cobertura. + +auth.login.title=Iniciar sesión +auth.login.handle=Usuario o correo +auth.login.password=Contraseña +auth.login.submit=Iniciar sesión +auth.login.error=Usuario o contraseña no válidos. + +curator.title=Panel del curador +curator.welcome=Sesión iniciada como +curator.roles=Roles +curator.tool.haplogroups=Haplogrupos +curator.tool.variants=Variantes +curator.tool.regions=Regiones genómicas +curator.tool.proposals=Propuestas + +prop.title=Propuestas de curación +prop.filter.all=Todos los estados +prop.col.name=Nombre propuesto +prop.col.parent=Padre +prop.col.status=Estado +prop.col.evidence=Evidencia +prop.col.submitters=Remitentes +prop.col.confidence=Confianza +prop.none=No hay propuestas coincidentes. +prop.select=Seleccione una propuesta para revisar. +prop.evidence=Evidencia +prop.col.dna=ADN +prop.variants=Variantes definitorias +prop.var.name=Variante +prop.var.support=Apoyo +prop.split.note=El motor detectó un remitente divergente — esta rama podría necesitar dividirse. Revise las variantes de apoyo antes de decidir. +prop.notes=Notas (opcional) +prop.approve=Aprobar +prop.reject=Rechazar +prop.defer=Aplazar +prop.promote=Promover al catálogo + +hg.title=Haplogrupos +hg.new=Nuevo haplogrupo +hg.search=Buscar por nombre… +hg.filter.all=Todos los linajes +hg.col.name=Nombre +hg.col.type=Linaje +hg.col.lineage=Ruta +hg.none=No hay haplogrupos coincidentes. +hg.select=Seleccione un haplogrupo o cree uno. +hg.field.name=Nombre +hg.field.type=Linaje +hg.field.lineage=Ruta +hg.field.source=Fuente +hg.field.formed=Formado (ybp) +hg.field.tmrca=TMRCA (ybp) +hg.save=Guardar +hg.cancel=Cancelar +hg.edit=Editar +hg.delete=Eliminar +hg.delete.confirm=¿Eliminar este haplogrupo? +hg.deleteBlocked=No se puede eliminar: aún tiene relaciones en el árbol. + +var.title=Variantes +var.new=Nueva variante +var.search=Buscar por nombre o alias… +var.col.name=Nombre +var.col.type=Tipo +var.col.status=Estado +var.none=No hay variantes coincidentes. +var.select=Seleccione una variante o cree una. +var.field.name=Nombre canónico +var.field.type=Tipo de mutación +var.field.status=Estado de nomenclatura +var.field.commonNames=Nombres comunes (separados por comas) +var.field.rsIds=Identificadores rs (separados por comas) +var.field.builds=Ensamblajes de coordenadas +var.save=Guardar +var.cancel=Cancelar +var.edit=Editar +var.delete=Eliminar +var.delete.confirm=¿Eliminar esta variante? +var.deleteBlocked=No se puede eliminar: define un haplogrupo. + +region.title=Regiones genómicas +region.new=Nueva región +region.search=Buscar por nombre o tipo… +region.col.type=Tipo +region.col.name=Nombre +region.col.builds=Ensamblajes +region.none=No hay regiones coincidentes. +region.select=Seleccione una región o cree una. +region.field.type=Tipo de región +region.field.name=Nombre +region.field.coordinates=Coordenadas (JSON) +region.field.properties=Propiedades (JSON) +region.save=Guardar +region.cancel=Cancelar +region.edit=Editar +region.delete=Eliminar +region.delete.confirm=¿Eliminar esta región? + +# Revisión de conjuntos de cambios +curator.tool.change_sets=Conjuntos de cambios +cs.title=Revisión de conjuntos de cambios +cs.filter.all=Todos los estados +cs.col.source=Origen +cs.col.type=Tipo +cs.col.status=Estado +cs.col.changes=Cambios +cs.col.created=Creado +cs.none=Ningún conjunto de cambios coincide. +cs.select=Selecciona un conjunto de cambios para revisar. +cs.finalized=Finalizado por +cs.diff.added=añadidos +cs.diff.removed=eliminados +cs.diff.modified=modificados +cs.diff.reparented=reubicados +cs.changes=Cambios +cs.values=valores +cs.changes.none=Sin cambios. +cs.approve=Aprobar +cs.reject=Rechazar +cs.start_review=Iniciar revisión +cs.approve_all=Aprobar todo +cs.apply=Aplicar +cs.apply.confirm=¿Aplicar los cambios aprobados al árbol en producción? +cs.discard=Descartar +cs.discard.confirm=¿Descartar este conjunto de cambios? +cs.comments=Comentarios +cs.comments.none=Aún no hay comentarios. +cs.comment.placeholder=Añadir un comentario… +cs.comment.add=Añadir +cs.notice.no_transition=Sin cambio de estado (no permitido en el estado actual). +cs.notice.approved=cambios aprobados. +cs.notice.applied=Aplicado +cs.diff.detail=Detalle de cambios + +# Páginas secundarias + consentimiento de cookies +nav.about=Acerca de +page.about.title=Acerca de +page.faq.title=Preguntas frecuentes +page.terms.title=Términos del servicio +page.privacy.title=Política de privacidad +page.cookies.title=Política de cookies +page.reputation.title=Reputación +page.updated=Última actualización junio de 2026. +consent.message=Usamos cookies esenciales para iniciar sesión y cookies no esenciales solo con tu consentimiento. Consulta nuestra +consent.accept=Aceptar +consent.decline=Rechazar + +# Perfil + contacto +profile.title=Tu perfil +profile.name=Nombre visible +profile.roles=Roles +profile.handle=Identificador +profile.email=Correo electrónico +profile.member_since=Miembro desde +contact.title=Contacto +contact.name=Nombre +contact.email=Correo electrónico +contact.subject=Asunto +contact.message=Mensaje +contact.send=Enviar +contact.sent=Gracias — tu mensaje ha sido enviado. +contact.error.empty=Por favor, escribe un mensaje. +contact.error.captcha=La verificación del captcha falló. Inténtalo de nuevo. + +# Backfill: curator/UI keys (matches en.txt) +curator.tool.publications=Candidatos de publicación +pc.title=Candidatos de publicación +pc.intro=Artículos detectados por el trabajo de descubrimiento de OpenAlex, en espera de revisión editorial. +pc.filter.pending=Pendientes +pc.filter.accepted=Aceptados +pc.filter.rejected=Rechazados +pc.filter.deferred=Aplazados +pc.filter.all=Todos +pc.select=Seleccione un candidato para revisar. +pc.none=Ningún candidato coincide. +pc.col.title=Título / revista +pc.col.date=Fecha +pc.col.status=Estado +pc.col.relevance=Puntuación +pc.journal=Revista +pc.date=Publicado +pc.relevance=Relevancia +pc.abstract=Resumen +pc.accept=Aceptar +pc.accept.confirm=¿Promover este candidato a una referencia publicada? +pc.reject=Rechazar +pc.defer=Aplazar +pc.accepted_note=Aceptado: promovido al catálogo de referencias. +pc.notice.accepted=Aceptado y promovido a referencias +pc.notice.rejected=Rechazado. +pc.notice.deferred=Aplazado para más tarde. +submit.cta=Sugerir un artículo +submit.title=Sugerir una publicación +submit.intro=¿Conoce un artículo relevante de Y/mtDNA o genética de poblaciones que nos falte? Envíe su DOI y un curador lo revisará. +submit.doi=DOI +submit.doi.help=Pegue el DOI (o un enlace doi.org). Lo buscamos en OpenAlex. +submit.button=Enviar para revisión +submit.back=Volver a referencias +submit.queued=Gracias: en cola para revisión de un curador: +submit.error.empty=Introduzca un DOI. +submit.error.captcha=Falló la verificación del captcha. Inténtelo de nuevo. +submit.error.exists=Ese artículo ya está en nuestras referencias. +submit.error.notfound=No encontramos ese DOI en OpenAlex. Verifíquelo e inténtelo de nuevo. +submit.error.lookup=La búsqueda falló: inténtelo más tarde. +profile.save=Guardar +profile.saved=Perfil actualizado. +hg.parent=Padre +hg.restructure=Reestructurar (reasignar / fusionar / dividir) +hg.reparent=Reasignar padre +hg.reparent.placeholder=Nombre del nuevo padre +hg.merge=Fusionar con el padre +hg.merge.confirm=¿Fusionar este nodo con su padre? Sus hijos suben y sus variantes se integran; el nodo se elimina. +hg.split=Separar +hg.split.name=Nombre del nuevo hijo +hg.split.move=Variantes a mover (separadas por comas) +hg.split.variants=Variantes definitorias +hg.op.unknown=Nodo desconocido +hg.op.no_variants=Ninguno de esos nombres de variante está en este nodo. +coverage.labs.byLab=Ver por laboratorio +coverage.labs.title=Cobertura por laboratorio +coverage.labs.allLabs=Todos los laboratorios (plano) +coverage.labs.testTypes=Tipos de prueba +coverage.labs.select=Seleccione un laboratorio para ver su cobertura por tipo de prueba. +curator.tool.naming=Nomenclatura de variantes +nm.title=Autoridad de nomenclatura de variantes +nm.intro=Asigne nombres oficiales DU a variantes sin nombre o marcadas. Reutilice un nombre establecido cuando exista; de lo contrario, acuñe el siguiente identificador DU. +nm.mode.needs=Necesita nombre +nm.mode.pending=Pendiente de revisión +nm.mode.named=Con nombre (DU) +nm.mode.backlog=Pendientes sin nombre +nm.mode.all=Todas +nm.select=Seleccione una variante para nombrar. +nm.none=Ninguna variante coincide. +nm.unnamed=(sin nombre) +nm.col.name=Nombre +nm.col.coord=GRCh38 +nm.col.defines=Define +nm.col.status=Estado +nm.coord=Coordenada GRCh38 +nm.type=Tipo de mutación +nm.defines=Define la rama +nm.aliases=Conocida como +nm.dedup.warn=Ya existe una variante con nombre en esta coordenada: +nm.dedup.hint=Considere reutilizarla (añadirla como alias en la rama) en lugar de acuñar un nuevo nombre DU. +nm.assign=Asignar nombre DU +nm.assign.confirm=¿Acuñar el siguiente identificador DU para esta variante y publicarla como NAMED? +nm.flag=Marcar para revisión +nm.unflag=Volver a sin nombre +nm.named_note=Ya tiene nombre: identificador DU asignado. +nm.notice.minted=Acuñado +curator.tool.reconcile_flags=Marcas de reconciliación +rf.title=Marcas de reconciliación +rf.intro=Grupos de sinónimos de YBrowse cuyos nombres están repartidos entre varias variantes del catálogo: elija la que conservar y fusione el resto. +rf.select=Seleccione una marca para resolver. +rf.none=No hay marcas de reconciliación: nada que resolver. +rf.col.locus=Locus +rf.col.names=Sinónimos +rf.col.variants=Variantes +rf.synonyms=Sinónimos de YBrowse +rf.split_note=Según YBrowse estos nombres son un único SNP físico, pero el catálogo los tiene repartidos entre las variantes de abajo. Elija la variante canónica a conservar; las demás se fusionan en ella (enlaces, alias) y se eliminan. +rf.keep=Conservar +rf.col.canonical=Canónica +rf.col.defines=Define ramas +rf.merge=Fusionar +rf.merge.confirm=¿Fusionar las otras variantes en la seleccionada? Sus enlaces del árbol y alias se trasladan a ella y se eliminan. +rf.merge.hint=Las variantes no seleccionadas se integran en la conservada. +rf.notice.merged=Variantes fusionadas: +rf.resolved=Resuelta +rf.resolved.note=Esta marca se ha resuelto. + +# Curator sequencer-lab proposal review (curator/instrument-proposals) +curator.tool.instrument_proposals=Propuestas de laboratorio-secuenciador +ip.title=Propuestas de laboratorio-secuenciador +ip.intro=Asociaciones instrumento→laboratorio inferidas a partir de las secuenciaciones federadas. Acepte para fijar el laboratorio que resuelve la búsqueda pública, o rechace para descartarla. +ip.select=Seleccione una propuesta para revisar. +ip.none=No hay propuestas. +ip.col.instrument=Instrumento +ip.col.lab=Laboratorio propuesto +ip.col.support=Obs. / ciudadanos +ip.col.confidence=Confianza +ip.col.status=Estado +ip.col.platform=Plataforma +ip.col.model=Modelo +ip.col.citizen=Ciudadano +ip.filter.all=Todas +ip.filter.ready=Listas +ip.filter.pending=Pendientes +ip.filter.accepted=Aceptadas +ip.filter.rejected=Rechazadas +ip.obs=Observaciones +ip.citizens=Ciudadanos distintos +ip.observations=Observaciones de respaldo +ip.lab_name=Nombre del laboratorio +ip.manufacturer=Fabricante +ip.model=Modelo +ip.is_d2c=Laboratorio directo al consumidor (D2C) +ip.reason=Motivo (opcional) +ip.accept=Aceptar +ip.reject=Rechazar +ip.accept.confirm=¿Fijar este laboratorio para el instrumento? Esto resuelve la búsqueda pública. +ip.reject.confirm=¿Rechazar esta asociación? El laboratorio dominante no se volverá a proponer para este instrumento. +ip.accept.hint=Aceptar fija el laboratorio del instrumento. +ip.notice.accepted=Aceptada — laboratorio fijado: +ip.notice.rejected=Propuesta rechazada. +ip.resolved=Resuelta +ip.resolved.note=Esta propuesta se ha resuelto. + +# Informe público por muestra (samples/report.html) +sample.identity.title=Muestra +sample.field.accession=Número de acceso +sample.field.alias=Alias +sample.field.sex=Sexo +sample.field.center=Centro +sample.field.description=Descripción +sample.field.publications=Publicaciones de origen +sample.ydna.title=Linaje Y-DNA +sample.mtdna.title=Linaje mtDNA +sample.pathway.noCall=No hay haplogrupo asignado para esta muestra. +sample.pathway.unplaced=El haplogrupo asignado aún no está ubicado en el árbol. +sample.recon.consensus=Consenso entre tecnologías +sample.recon.runs=ejecuciones +sample.recon.confidence=confianza +sample.recon.concordance=concordancia +sample.ybp=años AP +sample.map.title=Origen +sample.map.none=No se registró ninguna coordenada geográfica. +sample.seq.title=Secuenciación y cobertura +sample.seq.platform=Plataforma +sample.seq.instrument=Instrumento +sample.seq.testType=Tipo de prueba +sample.seq.layout=Disposición +sample.seq.reads=Lecturas +sample.seq.readLength=Longitud de lectura +sample.seq.none=No hay datos de secuenciación disponibles. +sample.cov.build=Ensamblaje +sample.cov.aligner=Alineador +sample.cov.testtype=Tipo de prueba +sample.cov.mean=Profundidad media +sample.cov.expected=Esperada +sample.cov.cohort=cohorte +sample.cov.10x=10x % +sample.cov.20x=20x % +sample.cov.30x=30x % +sample.cov.conformance=Vs. especificación +sample.cov.below=Bajo lo esperado +sample.cov.at=Cumple +sample.cov.above=Supera +sample.ancestry.title=Ascendencia +sample.ancestry.none=No hay desglose de ascendencia disponible. +sample.future.note=El fenotipo predicho y las métricas de distancia genética aún no están disponibles. +sample.curator.visibility=Visibilidad pública (curador) +sample.curator.public=Público +sample.curator.private=Privado +sample.federated=Federado +curator.tool.denovo_conflicts=Conflictos de novo +dc.title=Conflictos de novo +dc.intro=Clados de referencia (ISOGG / PhyloTree) cuya ubicación de novo discrepa — muestras ajenas en el nodo del clado, o miembros dispersos. Peores primero. +dc.none=Sin conflictos. +dc.filter.all=Todos +dc.filter.y=ADN-Y +dc.filter.mt=ADNmt +dc.col.lineage=Linaje +dc.col.clade=Clado de referencia +dc.col.tips=Muestras +dc.col.magnitude=Magnitud +dc.col.home=Nodo +dc.col.foreign=Ajenas +dc.col.away=Dispersos diff --git a/rust/locales/fr.txt b/rust/locales/fr.txt new file mode 100644 index 00000000..9caae760 --- /dev/null +++ b/rust/locales/fr.txt @@ -0,0 +1,479 @@ +# French message catalog. +app.name=Decoding Us +nav.home=Accueil +nav.ytree=Arbre Y-ADN +nav.mtree=Arbre ADNmt +nav.variants=Variants +nav.references=Références +nav.map=Carte +nav.coverage=Couverture +nav.curator=Curateur +nav.tools=Outils +nav.profile=Profil +nav.api=API +nav.login=Connexion +nav.logout=Déconnexion +footer.copyright=© DecodingUs +lang.label=Langue +lang.en=English +lang.es=Español +lang.fr=Français + +home.title=Decoding Us — généalogie génétique et recherche sur les populations +home.heading=Decoding Us +home.lead=Une plateforme collaborative pour la généalogie génétique et la recherche sur les populations : arbres d’haplogroupes Y/ADNmt, un explorateur public de variants et une découverte de parents respectueuse de la vie privée. +home.cta.tree=Explorer l’arbre Y-ADN +home.cta.variants=Rechercher des variants + +variants.title=Explorateur de variants +variants.search.placeholder=Rechercher par nom ou alias (p. ex. M269, rs9786153)… +variants.col.name=Nom +variants.col.type=Type +variants.col.status=Statut +variants.col.builds=Assemblages +variants.none=Aucun variant correspondant. +variants.detail.select=Sélectionnez un variant pour voir les détails. +variants.detail.naming=Statut de nomenclature : +variants.detail.aka=Aussi connu sous le nom de : +variants.detail.rsids=Identifiants rs : +variants.detail.coordinates=Coordonnées +variants.detail.nocoords=Aucune coordonnée cartographiée. +variants.col.build=Assemblage +variants.col.contig=Contig +variants.col.position=Position +variants.col.change=Changement + +pagination.previous=Précédent +pagination.next=Suivant +pagination.page=Page +pagination.of=sur +pagination.total=au total + +tree.title.y=Arbre Y-ADN +tree.title.mt=Arbre ADNmt +tree.loading=Chargement de l’arbre… +tree.allRoots=← Toutes les racines +tree.rootLineages=Lignées racines +tree.noChildren=Aucun haplogroupe descendant. +tree.formed=formé +tree.search.placeholder=Haplogroupe ou variant… +tree.search.button=Aller +tree.orientation=Orientation +tree.depth=Profondeur +tree.horizontal=Horizontal +tree.vertical=Vertical +tree.legend.backbone=Tronc principal +tree.legend.recent=Récemment mis à jour +tree.legend.default=Autre +tree.variants=variants +tree.samples=échantillons +tree.samples.title=Échantillons placés +tree.samples.more=de plus +tree.snp.none=Aucun variant déterminant enregistré. +tree.snp.aliases=alias +tree.snp.recurrent=récurrent +tree.snp.recurrent.help=Ce SNP apparaît aussi sur d'autres branches (homoplasie). +tree.snp.backMutation=rétromutation +tree.snp.backMutation.help=Cette branche est revenue à l'état ancestral. +tree.snp.close=Fermer +tree.prov.title=Provenance +tree.prov.source=Source +tree.prov.aka=Aussi connu sous +tree.prov.updated=Mis à jour + +references.title=Références +references.search.placeholder=Rechercher par titre, revue ou DOI… +references.col.title=Titre +references.col.journal=Revue +references.col.year=Année +references.col.citations=Citations +references.none=Aucune publication correspondante. +references.select=Sélectionnez une publication pour voir ses échantillons. +references.viewDoi=DOI +references.biosamples.title=Échantillons de cette étude +references.biosamples.none=Aucun échantillon lié à cette publication. +references.col.accession=Numéro d’accès +references.col.alias=Alias +references.col.source=Origine +references.col.description=Description + +map.title=Carte des échantillons +map.samples=échantillons +map.loading=Chargement de la carte… + +coverage.title=Références de couverture +coverage.col.lab=Laboratoire +coverage.col.testType=Type de test +coverage.col.libraries=Bibliothèques +coverage.col.meanDepth=Profondeur moyenne +coverage.col.cov10x=Couverture ≥10× +coverage.col.expectedDepth=Attendue +coverage.none=Aucune donnée de couverture pour le moment. + +auth.login.title=Connexion +auth.login.handle=Identifiant ou e-mail +auth.login.password=Mot de passe +auth.login.submit=Se connecter +auth.login.error=Identifiant ou mot de passe invalide. + +curator.title=Tableau de bord du curateur +curator.welcome=Connecté en tant que +curator.roles=Rôles +curator.tool.haplogroups=Haplogroupes +curator.tool.variants=Variants +curator.tool.regions=Régions génomiques +curator.tool.proposals=Propositions + +prop.title=Propositions de curation +prop.filter.all=Tous les statuts +prop.col.name=Nom proposé +prop.col.parent=Parent +prop.col.status=Statut +prop.col.evidence=Preuves +prop.col.submitters=Émetteurs +prop.col.confidence=Confiance +prop.none=Aucune proposition correspondante. +prop.select=Sélectionnez une proposition à examiner. +prop.evidence=Preuves +prop.col.dna=ADN +prop.variants=Variantes définissantes +prop.var.name=Variante +prop.var.support=Soutien +prop.split.note=Le moteur a signalé un contributeur divergent — cette branche pourrait devoir être scindée. Examinez les variantes à l'appui avant de décider. +prop.notes=Notes (facultatif) +prop.approve=Approuver +prop.reject=Rejeter +prop.defer=Différer +prop.promote=Promouvoir au catalogue + +hg.title=Haplogroupes +hg.new=Nouvel haplogroupe +hg.search=Rechercher par nom… +hg.filter.all=Toutes les lignées +hg.col.name=Nom +hg.col.type=Lignée +hg.col.lineage=Chemin +hg.none=Aucun haplogroupe correspondant. +hg.select=Sélectionnez un haplogroupe ou créez-en un. +hg.field.name=Nom +hg.field.type=Lignée +hg.field.lineage=Chemin +hg.field.source=Source +hg.field.formed=Formé (ybp) +hg.field.tmrca=TMRCA (ybp) +hg.save=Enregistrer +hg.cancel=Annuler +hg.edit=Modifier +hg.delete=Supprimer +hg.delete.confirm=Supprimer cet haplogroupe ? +hg.deleteBlocked=Suppression impossible : des relations d’arbre existent encore. + +var.title=Variants +var.new=Nouveau variant +var.search=Rechercher par nom ou alias… +var.col.name=Nom +var.col.type=Type +var.col.status=Statut +var.none=Aucun variant correspondant. +var.select=Sélectionnez un variant ou créez-en un. +var.field.name=Nom canonique +var.field.type=Type de mutation +var.field.status=Statut de nomenclature +var.field.commonNames=Noms communs (séparés par des virgules) +var.field.rsIds=Identifiants rs (séparés par des virgules) +var.field.builds=Assemblages de coordonnées +var.save=Enregistrer +var.cancel=Annuler +var.edit=Modifier +var.delete=Supprimer +var.delete.confirm=Supprimer ce variant ? +var.deleteBlocked=Suppression impossible : il définit un haplogroupe. + +region.title=Régions génomiques +region.new=Nouvelle région +region.search=Rechercher par nom ou type… +region.col.type=Type +region.col.name=Nom +region.col.builds=Assemblages +region.none=Aucune région correspondante. +region.select=Sélectionnez une région ou créez-en une. +region.field.type=Type de région +region.field.name=Nom +region.field.coordinates=Coordonnées (JSON) +region.field.properties=Propriétés (JSON) +region.save=Enregistrer +region.cancel=Annuler +region.edit=Modifier +region.delete=Supprimer +region.delete.confirm=Supprimer cette région ? + +# Revue des ensembles de modifications +curator.tool.change_sets=Ensembles de modifications +cs.title=Revue des ensembles de modifications +cs.filter.all=Tous les statuts +cs.col.source=Source +cs.col.type=Type +cs.col.status=Statut +cs.col.changes=Modifications +cs.col.created=Créé +cs.none=Aucun ensemble de modifications ne correspond. +cs.select=Sélectionnez un ensemble de modifications à examiner. +cs.finalized=Finalisé par +cs.diff.added=ajoutés +cs.diff.removed=supprimés +cs.diff.modified=modifiés +cs.diff.reparented=déplacés +cs.changes=Modifications +cs.values=valeurs +cs.changes.none=Aucune modification. +cs.approve=Approuver +cs.reject=Rejeter +cs.start_review=Démarrer la revue +cs.approve_all=Tout approuver +cs.apply=Appliquer +cs.apply.confirm=Appliquer les modifications approuvées à l'arbre en production ? +cs.discard=Abandonner +cs.discard.confirm=Abandonner cet ensemble de modifications ? +cs.comments=Commentaires +cs.comments.none=Aucun commentaire pour l'instant. +cs.comment.placeholder=Ajouter un commentaire… +cs.comment.add=Ajouter +cs.notice.no_transition=Aucun changement d'état (non autorisé dans le statut actuel). +cs.notice.approved=modifications approuvées. +cs.notice.applied=Appliqué +cs.diff.detail=Détail des différences + +# Pages secondaires + consentement aux cookies +nav.about=À propos +page.about.title=À propos +page.faq.title=FAQ +page.terms.title=Conditions d'utilisation +page.privacy.title=Politique de confidentialité +page.cookies.title=Politique relative aux cookies +page.reputation.title=Réputation +page.updated=Dernière mise à jour juin 2026. +consent.message=Nous utilisons des cookies essentiels pour vous connecter, et des cookies non essentiels uniquement avec votre consentement. Consultez notre +consent.accept=Accepter +consent.decline=Refuser + +# Profil + contact +profile.title=Votre profil +profile.name=Nom affiché +profile.roles=Rôles +profile.handle=Identifiant +profile.email=E-mail +profile.member_since=Membre depuis +contact.title=Contact +contact.name=Nom +contact.email=E-mail +contact.subject=Sujet +contact.message=Message +contact.send=Envoyer +contact.sent=Merci — votre message a été envoyé. +contact.error.empty=Veuillez saisir un message. +contact.error.captcha=Échec de la vérification du captcha. Veuillez réessayer. + +# Backfill: curator/UI keys (matches en.txt) +curator.tool.publications=Candidats de publication +pc.title=Candidats de publication +pc.intro=Articles repérés par la tâche de découverte OpenAlex, en attente de revue éditoriale. +pc.filter.pending=En attente +pc.filter.accepted=Acceptés +pc.filter.rejected=Rejetés +pc.filter.deferred=Reportés +pc.filter.all=Tous +pc.select=Sélectionnez un candidat à examiner. +pc.none=Aucun candidat ne correspond. +pc.col.title=Titre / revue +pc.col.date=Date +pc.col.status=Statut +pc.col.relevance=Score +pc.journal=Revue +pc.date=Publié +pc.relevance=Pertinence +pc.abstract=Résumé +pc.accept=Accepter +pc.accept.confirm=Promouvoir ce candidat en référence publiée ? +pc.reject=Rejeter +pc.defer=Reporter +pc.accepted_note=Accepté — promu au catalogue de références. +pc.notice.accepted=Accepté et promu aux références +pc.notice.rejected=Rejeté. +pc.notice.deferred=Reporté à plus tard. +submit.cta=Suggérer un article +submit.title=Suggérer une publication +submit.intro=Vous connaissez un article pertinent en Y/mtDNA ou génétique des populations qui nous manque ? Envoyez son DOI et un curateur l'examinera. +submit.doi=DOI +submit.doi.help=Collez le DOI (ou un lien doi.org). Nous le recherchons dans OpenAlex. +submit.button=Soumettre pour examen +submit.back=Retour aux références +submit.queued=Merci — en file pour examen par un curateur : +submit.error.empty=Veuillez saisir un DOI. +submit.error.captcha=Échec de la vérification du captcha. Veuillez réessayer. +submit.error.exists=Cet article est déjà dans nos références. +submit.error.notfound=DOI introuvable dans OpenAlex. Vérifiez-le et réessayez. +submit.error.lookup=La recherche a échoué — veuillez réessayer plus tard. +profile.save=Enregistrer +profile.saved=Profil mis à jour. +hg.parent=Parent +hg.restructure=Restructurer (reparenter / fusionner / scinder) +hg.reparent=Reparenter +hg.reparent.placeholder=Nom du nouveau parent +hg.merge=Fusionner dans le parent +hg.merge.confirm=Fusionner ce nœud dans son parent ? Ses enfants remontent et ses variantes y sont intégrées ; le nœud est supprimé. +hg.split=Scinder +hg.split.name=Nom du nouvel enfant +hg.split.move=Variantes à déplacer (séparées par des virgules) +hg.split.variants=Variantes définitoires +hg.op.unknown=Nœud inconnu +hg.op.no_variants=Aucun de ces noms de variante n'est sur ce nœud. +coverage.labs.byLab=Voir par laboratoire +coverage.labs.title=Couverture par laboratoire +coverage.labs.allLabs=Tous les laboratoires (à plat) +coverage.labs.testTypes=Types de test +coverage.labs.select=Sélectionnez un laboratoire pour voir sa couverture par type de test. +curator.tool.naming=Nommage des variantes +nm.title=Autorité de nommage des variantes +nm.intro=Attribuez des noms officiels DU aux variantes sans nom ou signalées. Réutilisez un nom établi s'il existe ; sinon, frappez le prochain identifiant DU. +nm.mode.needs=Besoin d'un nom +nm.mode.pending=En attente de revue +nm.mode.named=Nommées (DU) +nm.mode.backlog=Backlog sans nom +nm.mode.all=Toutes +nm.select=Sélectionnez une variante à nommer. +nm.none=Aucune variante ne correspond. +nm.unnamed=(sans nom) +nm.col.name=Nom +nm.col.coord=GRCh38 +nm.col.defines=Définit +nm.col.status=Statut +nm.coord=Coordonnée GRCh38 +nm.type=Type de mutation +nm.defines=Définit la branche +nm.aliases=Connue sous +nm.dedup.warn=Une variante nommée existe déjà à cette coordonnée : +nm.dedup.hint=Envisagez de la réutiliser (l'ajouter comme alias sur la branche) au lieu de frapper un nouveau nom DU. +nm.assign=Attribuer un nom DU +nm.assign.confirm=Frapper le prochain identifiant DU pour cette variante et la publier comme NAMED ? +nm.flag=Signaler pour revue +nm.unflag=Remettre sans nom +nm.named_note=Déjà nommée — identifiant DU attribué. +nm.notice.minted=Frappé +curator.tool.reconcile_flags=Marqueurs de réconciliation +rf.title=Marqueurs de réconciliation +rf.intro=Groupes de synonymes YBrowse dont les noms sont répartis sur plusieurs variantes du catalogue — choisissez celle à conserver et fusionnez les autres. +rf.select=Sélectionnez un marqueur à résoudre. +rf.none=Aucun marqueur de réconciliation — rien à résoudre. +rf.col.locus=Locus +rf.col.names=Synonymes +rf.col.variants=Variantes +rf.synonyms=Synonymes YBrowse +rf.split_note=Selon YBrowse ces noms sont un seul SNP physique, mais le catalogue les a répartis sur les variantes ci-dessous. Choisissez la variante canonique à conserver ; les autres y sont fusionnées (liens, alias) et supprimées. +rf.keep=Conserver +rf.col.canonical=Canonique +rf.col.defines=Définit des branches +rf.merge=Fusionner +rf.merge.confirm=Fusionner les autres variantes dans celle sélectionnée ? Leurs liens d'arbre et alias y sont déplacés et elles sont supprimées. +rf.merge.hint=Les variantes non sélectionnées sont intégrées dans celle conservée. +rf.notice.merged=Variantes fusionnées : +rf.resolved=Résolu +rf.resolved.note=Ce marqueur a été résolu. + +# Curator sequencer-lab proposal review (curator/instrument-proposals) +curator.tool.instrument_proposals=Propositions laboratoire-séquenceur +ip.title=Propositions laboratoire-séquenceur +ip.intro=Associations instrument→laboratoire déduites des séquençages fédérés. Acceptez pour fixer le laboratoire que résout la recherche publique, ou rejetez pour l'écarter. +ip.select=Sélectionnez une proposition à examiner. +ip.none=Aucune proposition. +ip.col.instrument=Instrument +ip.col.lab=Laboratoire proposé +ip.col.support=Obs. / citoyens +ip.col.confidence=Confiance +ip.col.status=Statut +ip.col.platform=Plateforme +ip.col.model=Modèle +ip.col.citizen=Citoyen +ip.filter.all=Toutes +ip.filter.ready=Prêtes +ip.filter.pending=En attente +ip.filter.accepted=Acceptées +ip.filter.rejected=Rejetées +ip.obs=Observations +ip.citizens=Citoyens distincts +ip.observations=Observations à l'appui +ip.lab_name=Nom du laboratoire +ip.manufacturer=Fabricant +ip.model=Modèle +ip.is_d2c=Laboratoire direct au consommateur (D2C) +ip.reason=Motif (facultatif) +ip.accept=Accepter +ip.reject=Rejeter +ip.accept.confirm=Fixer ce laboratoire pour l'instrument ? Cela résout la recherche publique. +ip.reject.confirm=Rejeter cette association ? Le laboratoire dominant ne sera pas reproposé pour cet instrument. +ip.accept.hint=Accepter fixe le laboratoire de l'instrument. +ip.notice.accepted=Acceptée — laboratoire fixé : +ip.notice.rejected=Proposition rejetée. +ip.resolved=Résolu +ip.resolved.note=Cette proposition a été résolue. + +# Rapport public par échantillon (samples/report.html) +sample.identity.title=Échantillon +sample.field.accession=Numéro d'accès +sample.field.alias=Alias +sample.field.sex=Sexe +sample.field.center=Centre +sample.field.description=Description +sample.field.publications=Publications d'origine +sample.ydna.title=Lignée Y-DNA +sample.mtdna.title=Lignée mtDNA +sample.pathway.noCall=Aucun haplogroupe attribué pour cet échantillon. +sample.pathway.unplaced=L'haplogroupe attribué n'est pas encore placé dans l'arbre. +sample.recon.consensus=Consensus inter-technologies +sample.recon.runs=analyses +sample.recon.confidence=confiance +sample.recon.concordance=concordance +sample.ybp=ans AP +sample.map.title=Origine +sample.map.none=Aucune coordonnée géographique enregistrée. +sample.seq.title=Séquençage et couverture +sample.seq.platform=Plateforme +sample.seq.instrument=Instrument +sample.seq.testType=Type de test +sample.seq.layout=Disposition +sample.seq.reads=Lectures +sample.seq.readLength=Longueur de lecture +sample.seq.none=Aucune donnée de séquençage disponible. +sample.cov.build=Assemblage +sample.cov.aligner=Aligneur +sample.cov.testtype=Type de test +sample.cov.mean=Profondeur moyenne +sample.cov.expected=Attendue +sample.cov.cohort=cohorte +sample.cov.10x=10x % +sample.cov.20x=20x % +sample.cov.30x=30x % +sample.cov.conformance=Vs. spéc. +sample.cov.below=Sous la spéc. +sample.cov.at=Conforme +sample.cov.above=Au-dessus +sample.ancestry.title=Ascendance +sample.ancestry.none=Aucune répartition d'ascendance disponible. +sample.future.note=Le phénotype prédit et les mesures de distance génétique ne sont pas encore disponibles. +sample.curator.visibility=Visibilité publique (curateur) +sample.curator.public=Public +sample.curator.private=Privé +sample.federated=Fédéré +curator.tool.denovo_conflicts=Conflits de novo +dc.title=Conflits de novo +dc.intro=Clades de référence (ISOGG / PhyloTree) dont le placement de novo diverge — échantillons étrangers dans le nœud du clade, ou membres dispersés. Pires d'abord. +dc.none=Aucun conflit. +dc.filter.all=Tous +dc.filter.y=ADN-Y +dc.filter.mt=ADNmt +dc.col.lineage=Lignée +dc.col.clade=Clade de référence +dc.col.tips=Échantillons +dc.col.magnitude=Magnitude +dc.col.home=Nœud +dc.col.foreign=Étrangers +dc.col.away=Dispersés diff --git a/rust/migrations/0001_foundation.sql b/rust/migrations/0001_foundation.sql new file mode 100644 index 00000000..f07c2b67 --- /dev/null +++ b/rust/migrations/0001_foundation.sql @@ -0,0 +1,33 @@ +-- DecodingUs redesigned schema — foundation. +-- Namespaces, extensions, and native enums. See plan §2. +-- +-- The legacy app spread tables across public/auth/tree/social/support/genomics/ +-- billing + a second "metadata" database. We keep logical grouping via schemas +-- but in ONE database (the metadata DB collapses into `fed`). + +CREATE EXTENSION IF NOT EXISTS postgis; -- biosample/donor geometry(Point,4326) +CREATE EXTENSION IF NOT EXISTS citext; -- case-insensitive user email +CREATE EXTENSION IF NOT EXISTS pgcrypto; -- gen_random_uuid() + +CREATE SCHEMA IF NOT EXISTS core; -- samples, donors, variants, regions +CREATE SCHEMA IF NOT EXISTS tree; -- haplogroups + versioning +CREATE SCHEMA IF NOT EXISTS genomics; -- sequencing, coverage, callable loci +CREATE SCHEMA IF NOT EXISTS pubs; -- publications/studies (`pub` is reserved-ish; use pubs) +CREATE SCHEMA IF NOT EXISTS ident; -- users/auth/roles/atproto +CREATE SCHEMA IF NOT EXISTS ibd; -- match discovery +CREATE SCHEMA IF NOT EXISTS fed; -- PDS fleet/firehose (was the metadata DB) +CREATE SCHEMA IF NOT EXISTS social; -- reputation/messaging +CREATE SCHEMA IF NOT EXISTS support; -- contact messages +CREATE SCHEMA IF NOT EXISTS billing; -- subscriptions + +-- Native enums. Labels match du-domain serde forms (SCREAMING_SNAKE_CASE) so +-- Rust maps them directly via #[derive(sqlx::Type)] and JSONB round-trips align. +CREATE TYPE core.dna_type AS ENUM ('Y_DNA', 'MT_DNA'); +CREATE TYPE core.biological_sex AS ENUM ('MALE', 'FEMALE', 'INTERSEX'); +CREATE TYPE core.biosample_source AS ENUM ('STANDARD', 'CITIZEN', 'PGP', 'EXTERNAL', 'ANCIENT'); +CREATE TYPE core.data_generation_method AS ENUM ('SEQUENCING', 'GENOTYPING'); +CREATE TYPE core.target_type AS ENUM ('WHOLE_GENOME', 'Y_CHROMOSOME', 'MT_DNA', 'AUTOSOMAL', 'X_CHROMOSOME', 'MIXED'); +CREATE TYPE core.mutation_type AS ENUM ('SNP', 'INDEL', 'STR', 'DEL', 'INS', 'MNP'); +CREATE TYPE core.naming_status AS ENUM ('UNNAMED', 'PENDING_REVIEW', 'NAMED'); +CREATE TYPE tree.change_set_status AS ENUM ('DRAFT', 'READY_FOR_REVIEW', 'UNDER_REVIEW', 'APPLIED', 'DISCARDED'); +CREATE TYPE tree.tree_change_type AS ENUM ('CREATE', 'UPDATE', 'DELETE', 'REPARENT', 'VARIANT_EDIT'); diff --git a/rust/migrations/0002_core.sql b/rust/migrations/0002_core.sql new file mode 100644 index 00000000..470848df --- /dev/null +++ b/rust/migrations/0002_core.sql @@ -0,0 +1,80 @@ +-- core schema: variants, specimen donors, the UNIFIED biosample, genome regions. +-- This migration realizes the central de-sprawl moves from plan §2. + +-- ── Variants ────────────────────────────────────────────────────────────── +-- One row per variant. Replaces legacy `variant` + `variant_alias` + per-build +-- coordinate rows. JSONB payload shapes are defined in du-domain::variant. +CREATE TABLE core.variant ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + canonical_name TEXT NOT NULL, + mutation_type core.mutation_type NOT NULL, + naming_status core.naming_status NOT NULL DEFAULT 'UNNAMED', + aliases JSONB NOT NULL DEFAULT '{}'::jsonb, -- {common_names, rs_ids, sources} + coordinates JSONB NOT NULL DEFAULT '{}'::jsonb, -- {GRCh38:{...}, hs1:{...}, ...} + annotations JSONB NOT NULL DEFAULT '{}'::jsonb, -- {cytobands, str_overlaps} + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE UNIQUE INDEX variant_canonical_name_key ON core.variant (canonical_name); +-- GIN indexes on the queried JSONB paths (alias lookup, build-coordinate search). +CREATE INDEX variant_aliases_gin ON core.variant USING gin (aliases jsonb_path_ops); +CREATE INDEX variant_coordinates_gin ON core.variant USING gin (coordinates jsonb_path_ops); + +-- ── Specimen donors ─────────────────────────────────────────────────────── +-- Owns demographic data (consolidated from legacy biosample in evolutions 16/18). +CREATE TABLE core.specimen_donor ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + donor_identifier TEXT, + origin_biobank TEXT, + sex core.biological_sex, + donor_type core.biosample_source NOT NULL DEFAULT 'STANDARD', + geocoord geometry(Point, 4326), + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX specimen_donor_geocoord_gist ON core.specimen_donor USING gist (geocoord); + +-- ── Unified biosample ─────────────────────────────────────────────────────── +-- Replaces three legacy tables (biosample, citizen_biosample, pgp_biosample). +-- `source` discriminates; `source_attrs` JSONB holds source-specific fields +-- (at_uri/at_cid, pgp_participant_id, ena accession, …). `atproto` is the single +-- consistent federation reference replacing scattered at_uri/at_cid columns. +CREATE TABLE core.biosample ( + sample_guid UUID PRIMARY KEY DEFAULT gen_random_uuid(), + donor_id BIGINT REFERENCES core.specimen_donor(id), + source core.biosample_source NOT NULL, + accession TEXT, + alias TEXT, + description TEXT, + center_name TEXT, + locked BOOLEAN NOT NULL DEFAULT false, + deleted BOOLEAN NOT NULL DEFAULT false, + source_attrs JSONB NOT NULL DEFAULT '{}'::jsonb, + -- original haplogroup calls per publication (folded in from the dropped + -- biosample_original_haplogroup / citizen_* tables). + original_haplogroups JSONB NOT NULL DEFAULT '[]'::jsonb, + atproto JSONB, -- {uri, cid, repo_did} or NULL when not federated + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE UNIQUE INDEX biosample_accession_key ON core.biosample (accession) WHERE accession IS NOT NULL; +CREATE INDEX biosample_source_idx ON core.biosample (source); +CREATE INDEX biosample_source_attrs_gin ON core.biosample USING gin (source_attrs jsonb_path_ops); +-- Unique federation URI when present (citizen samples carry an at:// URI). +CREATE UNIQUE INDEX biosample_atproto_uri_key + ON core.biosample ((atproto->>'uri')) WHERE atproto IS NOT NULL; + +-- ── Genome regions ────────────────────────────────────────────────────────── +-- Multi-build structural regions (centromere/telomere/PAR/…). Coordinates as +-- JSONB keyed by build (legacy genome_region_v2 shape). +CREATE TABLE core.genome_region ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + region_type TEXT NOT NULL, + name TEXT NOT NULL, + coordinates JSONB NOT NULL DEFAULT '{}'::jsonb, -- {GRCh38:{contig,start,end}, hs1:{...}} + properties JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE UNIQUE INDEX genome_region_type_name_key ON core.genome_region (region_type, name); +CREATE INDEX genome_region_coordinates_gin ON core.genome_region USING gin (coordinates jsonb_path_ops); diff --git a/rust/migrations/0003_tree.sql b/rust/migrations/0003_tree.sql new file mode 100644 index 00000000..8be95425 --- /dev/null +++ b/rust/migrations/0003_tree.sql @@ -0,0 +1,214 @@ +-- tree schema: haplogroup phylogeny, temporal versioning, bulk-merge staging, +-- and the discovery pipeline. De-sprawl moves (plan §2): +-- * per-revision metadata tables (relationship_revision_metadata, +-- haplogroup_variant_metadata) fold into a `revision` JSONB column. +-- * discovery's polymorphic (sample_type, sample_id) collapses to one +-- core.biosample(sample_guid) FK. + +-- ── Phylogeny ──────────────────────────────────────────────────────────────── +CREATE TABLE tree.haplogroup ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + name TEXT NOT NULL, + haplogroup_type core.dna_type NOT NULL, + lineage TEXT, + source TEXT, + confidence_level TEXT, + formed_ybp INTEGER, + tmrca_ybp INTEGER, + -- multi-source attribution + age-estimate detail, formerly scattered columns. + provenance JSONB NOT NULL DEFAULT '{}'::jsonb, + valid_from TIMESTAMPTZ NOT NULL DEFAULT now(), + valid_until TIMESTAMPTZ +); +CREATE UNIQUE INDEX haplogroup_name_type_key ON tree.haplogroup (name, haplogroup_type); +CREATE INDEX haplogroup_provenance_gin ON tree.haplogroup USING gin (provenance jsonb_path_ops); + +-- Temporal parent/child edges. `revision` JSONB folds the old +-- relationship_revision_metadata (author/timestamp/comment/change_type). +CREATE TABLE tree.haplogroup_relationship ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + child_haplogroup_id BIGINT NOT NULL REFERENCES tree.haplogroup(id), + parent_haplogroup_id BIGINT REFERENCES tree.haplogroup(id), + revision_id INTEGER NOT NULL DEFAULT 1, + source TEXT, + revision JSONB NOT NULL DEFAULT '{}'::jsonb, + valid_from TIMESTAMPTZ NOT NULL DEFAULT now(), + valid_until TIMESTAMPTZ +); +CREATE INDEX haplogroup_rel_child_idx ON tree.haplogroup_relationship (child_haplogroup_id); +CREATE INDEX haplogroup_rel_parent_idx ON tree.haplogroup_relationship (parent_haplogroup_id); +-- One currently-valid parent per child. +CREATE UNIQUE INDEX haplogroup_rel_current_child_key + ON tree.haplogroup_relationship (child_haplogroup_id) WHERE valid_until IS NULL; + +-- Defining variants per haplogroup. `revision` JSONB folds haplogroup_variant_metadata. +CREATE TABLE tree.haplogroup_variant ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + haplogroup_id BIGINT NOT NULL REFERENCES tree.haplogroup(id), + variant_id BIGINT NOT NULL REFERENCES core.variant(id), + revision JSONB NOT NULL DEFAULT '{}'::jsonb, + valid_from TIMESTAMPTZ NOT NULL DEFAULT now(), + valid_until TIMESTAMPTZ +); +CREATE UNIQUE INDEX haplogroup_variant_current_key + ON tree.haplogroup_variant (haplogroup_id, variant_id) WHERE valid_until IS NULL; + +-- Historical/archaeological date constraints for age estimation. +CREATE TABLE tree.genealogical_anchor ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + haplogroup_id BIGINT NOT NULL REFERENCES tree.haplogroup(id), + anchor_type TEXT NOT NULL, -- KNOWN_MRCA / MDKA / ANCIENT_DNA + date_ce INTEGER, + carbon_date_bp INTEGER, + confidence NUMERIC(4,3), + details JSONB NOT NULL DEFAULT '{}'::jsonb +); +CREATE INDEX genealogical_anchor_hg_idx ON tree.genealogical_anchor (haplogroup_id); + +-- Modal STR haplotypes for STR-based age estimation. +CREATE TABLE tree.haplogroup_ancestral_str ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + haplogroup_id BIGINT NOT NULL REFERENCES tree.haplogroup(id), + marker_name TEXT NOT NULL, + ancestral_value INTEGER NOT NULL, + confidence NUMERIC(4,3), + method TEXT -- MODAL / PHYLOGENETIC / MANUAL +); +CREATE INDEX haplogroup_ancestral_str_hg_idx ON tree.haplogroup_ancestral_str (haplogroup_id); + +-- ── Versioning / bulk merge staging ────────────────────────────────────────── +CREATE TABLE tree.change_set ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + source TEXT NOT NULL, -- ISOGG, ytree.net, ... + haplogroup_type core.dna_type, + status tree.change_set_status NOT NULL DEFAULT 'DRAFT', + description TEXT, + change_count INTEGER NOT NULL DEFAULT 0, + created_by TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + promoted_by TEXT, + promoted_at TIMESTAMPTZ +); +CREATE INDEX change_set_status_idx ON tree.change_set (status); + +CREATE TABLE tree.tree_change ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + change_set_id BIGINT NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, + change_type tree.tree_change_type NOT NULL, + haplogroup_id BIGINT REFERENCES tree.haplogroup(id), + old_values JSONB, + new_values JSONB, + status TEXT NOT NULL DEFAULT 'PENDING' -- PENDING/APPROVED/REJECTED +); +CREATE INDEX tree_change_set_idx ON tree.tree_change (change_set_id, status); + +CREATE TABLE tree.change_set_comment ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + change_set_id BIGINT NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, + commented_by TEXT NOT NULL, + comment TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- WIP shadow tables hold proposed structure before it is applied to production. +CREATE TABLE tree.wip_haplogroup ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + change_set_id BIGINT NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, + placeholder_id INTEGER NOT NULL, -- negative temp id within the change set + name TEXT NOT NULL, + source TEXT, + formed_ybp INTEGER, + provenance JSONB NOT NULL DEFAULT '{}'::jsonb +); +CREATE INDEX wip_haplogroup_cs_idx ON tree.wip_haplogroup (change_set_id); + +CREATE TABLE tree.wip_relationship ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + change_set_id BIGINT NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, + child_placeholder_id INTEGER NOT NULL, + parent_placeholder_id INTEGER, + parent_production_id BIGINT REFERENCES tree.haplogroup(id) +); + +CREATE TABLE tree.wip_haplogroup_variant ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + change_set_id BIGINT NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, + wip_haplogroup_id BIGINT NOT NULL REFERENCES tree.wip_haplogroup(id) ON DELETE CASCADE, + variant_id BIGINT NOT NULL REFERENCES core.variant(id) +); + +CREATE TABLE tree.wip_reparent ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + change_set_id BIGINT NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, + haplogroup_id BIGINT NOT NULL REFERENCES tree.haplogroup(id), + old_parent_id BIGINT REFERENCES tree.haplogroup(id), + new_parent_id BIGINT REFERENCES tree.haplogroup(id) +); + +CREATE TABLE tree.wip_resolution ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + change_set_id BIGINT NOT NULL REFERENCES tree.change_set(id) ON DELETE CASCADE, + wip_haplogroup_id BIGINT REFERENCES tree.wip_haplogroup(id) ON DELETE CASCADE, + wip_reparent_id BIGINT REFERENCES tree.wip_reparent(id) ON DELETE CASCADE, + resolution_type TEXT NOT NULL, -- REPARENT/EDIT_VARIANTS/MERGE_EXISTING/DEFER + new_parent_id BIGINT REFERENCES tree.haplogroup(id), + merge_target_id BIGINT REFERENCES tree.haplogroup(id), + details JSONB NOT NULL DEFAULT '{}'::jsonb +); + +-- ── Discovery pipeline ─────────────────────────────────────────────────────── +-- Private variants found in a sample. Was polymorphic (sample_type, sample_id); +-- now one core.biosample(sample_guid) FK. +CREATE TABLE tree.biosample_private_variant ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid), + variant_id BIGINT NOT NULL REFERENCES core.variant(id), + haplogroup_type core.dna_type NOT NULL, + terminal_haplogroup_id BIGINT REFERENCES tree.haplogroup(id), + status TEXT NOT NULL DEFAULT 'ACTIVE', -- ACTIVE/PROMOTED/INVALIDATED + discovered_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX bpv_sample_idx ON tree.biosample_private_variant (sample_guid); +CREATE INDEX bpv_variant_idx ON tree.biosample_private_variant (variant_id); + +CREATE TABLE tree.proposed_branch ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + proposed_name TEXT, + parent_haplogroup_id BIGINT REFERENCES tree.haplogroup(id), + discovery_sample_guids UUID[] NOT NULL DEFAULT '{}', + evidence_count INTEGER NOT NULL DEFAULT 0, + confidence NUMERIC(4,3), + proposed_by TEXT, + status TEXT NOT NULL DEFAULT 'PROPOSED' -- PROPOSED/UNDER_REVIEW/REJECTED/ACCEPTED +); + +CREATE TABLE tree.proposed_branch_variant ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + proposed_branch_id BIGINT NOT NULL REFERENCES tree.proposed_branch(id) ON DELETE CASCADE, + variant_id BIGINT NOT NULL REFERENCES core.variant(id), + supporting_sample_count INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE tree.proposed_branch_evidence ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + proposed_branch_id BIGINT NOT NULL REFERENCES tree.proposed_branch(id) ON DELETE CASCADE, + evidence_type TEXT NOT NULL, -- PRIVATE_VARIANT/SHARED_DERIVED/STRVAL_SIMILARITY/PUBLICATION + evidence_detail JSONB NOT NULL DEFAULT '{}'::jsonb, + confidence NUMERIC(4,3) +); + +CREATE TABLE tree.curator_action ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + proposed_branch_id BIGINT REFERENCES tree.proposed_branch(id) ON DELETE CASCADE, + action TEXT NOT NULL, -- APPROVE/DEFER/REJECT + notes TEXT, + action_by TEXT, + action_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE tree.discovery_config ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + config_key TEXT NOT NULL UNIQUE, + config_value JSONB NOT NULL DEFAULT '{}'::jsonb, + description TEXT +); diff --git a/rust/migrations/0004_genomics.sql b/rust/migrations/0004_genomics.sql new file mode 100644 index 00000000..5a173ff6 --- /dev/null +++ b/rust/migrations/0004_genomics.sql @@ -0,0 +1,220 @@ +-- genomics schema: sequencing runs/files, coverage, callable loci, labs & +-- instruments, pangenome. De-sprawl (plan §2): +-- * sequence_file_checksum / _http_location / _atp_location -> JSONB on sequence_file +-- * alignment_coverage / pangenome_alignment_coverage -> coverage JSONB on metadata +-- * biosample_callable_loci polymorphic (sample_type, sample_id) -> sample_guid FK +-- * scattered at_uri/at_cid -> single `atproto` JSONB + +-- ── Reference contigs ──────────────────────────────────────────────────────── +CREATE TABLE genomics.genbank_contig ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + accession TEXT NOT NULL UNIQUE, + common_name TEXT, + reference_genome TEXT NOT NULL, -- GRCh37/GRCh38/hs1 + seq_length BIGINT +); + +-- ── Labs & instruments ─────────────────────────────────────────────────────── +CREATE TABLE genomics.sequencing_lab ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + is_d2c BOOLEAN NOT NULL DEFAULT false, + website_url TEXT, + description_markdown TEXT +); + +CREATE TABLE genomics.sequencer_instrument ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + instrument_id TEXT NOT NULL UNIQUE, -- e.g. 'A00123' + model_name TEXT, + manufacturer TEXT, + year_introduced INTEGER, + estimated_max_throughput BIGINT +); + +CREATE TABLE genomics.instrument_observation ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + instrument_id BIGINT REFERENCES genomics.sequencer_instrument(id), + lab_name TEXT, + biosample_ref TEXT, + platform TEXT, + instrument_model TEXT, + flowcell_id TEXT, + run_date DATE, + confidence TEXT, -- KNOWN/INFERRED/GUESSED + atproto JSONB -- {uri, cid, repo_did} +); +CREATE UNIQUE INDEX instrument_observation_atproto_uri_key + ON genomics.instrument_observation ((atproto->>'uri')) WHERE atproto IS NOT NULL; + +CREATE TABLE genomics.instrument_association_proposal ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + instrument_id BIGINT REFERENCES genomics.sequencer_instrument(id), + proposed_lab_name TEXT, + proposed_model TEXT, + observation_count INTEGER NOT NULL DEFAULT 0, + distinct_citizen_count INTEGER NOT NULL DEFAULT 0, + confidence_score NUMERIC(5,4), + status TEXT NOT NULL DEFAULT 'PENDING', + accepted_lab_id BIGINT REFERENCES genomics.sequencing_lab(id), + accepted_instrument_id BIGINT REFERENCES genomics.sequencer_instrument(id) +); + +-- ── Test types & coverage expectations ────────────────────────────────────── +CREATE TABLE genomics.test_type_definition ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + code TEXT NOT NULL UNIQUE, + display_name TEXT NOT NULL, + category core.data_generation_method NOT NULL, + vendor TEXT, + target_type core.target_type, + expected_min_depth DOUBLE PRECISION, + supports_haplogroup_y BOOLEAN NOT NULL DEFAULT false, + supports_haplogroup_mt BOOLEAN NOT NULL DEFAULT false, + supports_autosomal_ibd BOOLEAN NOT NULL DEFAULT false, + supports_ancestry BOOLEAN NOT NULL DEFAULT false, + typical_file_formats TEXT[] NOT NULL DEFAULT '{}', + description TEXT +); + +CREATE TABLE genomics.coverage_expectation_profile ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + test_type_id BIGINT NOT NULL REFERENCES genomics.test_type_definition(id) ON DELETE CASCADE, + contig_name TEXT, + variant_class TEXT, -- SNP/STR/INDEL + min_depth_high DOUBLE PRECISION, + min_depth_medium DOUBLE PRECISION, + min_depth_low DOUBLE PRECISION, + min_coverage_pct DOUBLE PRECISION, + min_mapping_quality DOUBLE PRECISION +); + +-- ── Pangenome ─────────────────────────────────────────────────────────────── +CREATE TABLE genomics.pangenome_graph ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + graph_name TEXT NOT NULL UNIQUE, + source_gfa_file TEXT, + description TEXT, + creation_date TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE genomics.pangenome_path ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + graph_id BIGINT NOT NULL REFERENCES genomics.pangenome_graph(id) ON DELETE CASCADE, + path_name TEXT NOT NULL, + is_reference BOOLEAN NOT NULL DEFAULT false, + length_bp BIGINT, + UNIQUE (graph_id, path_name) +); + +CREATE TABLE genomics.canonical_pangenome_variant ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + pangenome_graph_id BIGINT NOT NULL REFERENCES genomics.pangenome_graph(id) ON DELETE CASCADE, + variant_type TEXT, + variant_nodes INTEGER[] NOT NULL DEFAULT '{}', + variant_edges INTEGER[] NOT NULL DEFAULT '{}', + reference_path_id BIGINT REFERENCES genomics.pangenome_path(id), + reference_allele_sequence TEXT, + canonical_hash TEXT NOT NULL UNIQUE +); + +-- ── Sequencing runs & files ───────────────────────────────────────────────── +CREATE TABLE genomics.sequence_library ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid), + test_type_id BIGINT REFERENCES genomics.test_type_definition(id), + lab_id BIGINT REFERENCES genomics.sequencing_lab(id), + run_date DATE, + instrument TEXT, + reads BIGINT, + read_length INTEGER, + paired_end BOOLEAN, + insert_size INTEGER, + atproto JSONB, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX sequence_library_sample_idx ON genomics.sequence_library (sample_guid); + +CREATE TABLE genomics.sequence_file ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + library_id BIGINT NOT NULL REFERENCES genomics.sequence_library(id) ON DELETE CASCADE, + file_name TEXT NOT NULL, + file_size_bytes BIGINT, + file_format TEXT, -- BAM/CRAM/VCF/... + aligner TEXT, + target_reference TEXT, + pangenome_graph_id BIGINT REFERENCES genomics.pangenome_graph(id), + checksums JSONB NOT NULL DEFAULT '[]'::jsonb, -- [{algorithm, checksum, verified_at}] + http_locations JSONB NOT NULL DEFAULT '[]'::jsonb, -- [{file_url, file_index_url}] + atp_location JSONB, -- {repo_did, record_cid, record_path} + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX sequence_file_library_idx ON genomics.sequence_file (library_id); + +-- Linear-reference alignment stats. coverage JSONB replaces alignment_coverage; +-- expression indexes target the hot aggregation paths. +CREATE TABLE genomics.alignment_metadata ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sequence_file_id BIGINT NOT NULL REFERENCES genomics.sequence_file(id) ON DELETE CASCADE, + genbank_contig_id BIGINT REFERENCES genomics.genbank_contig(id), + metric_level TEXT NOT NULL, -- CONTIG_OVERALL/REGION + region_name TEXT, + region_start_pos BIGINT, + region_end_pos BIGINT, + reference_build TEXT, + variant_caller TEXT, + coverage JSONB NOT NULL DEFAULT '{}'::jsonb -- {meanDepth, medianDepth, percent_coverage_at_*x} +); +CREATE INDEX alignment_metadata_file_idx ON genomics.alignment_metadata (sequence_file_id); +CREATE INDEX alignment_metadata_meandepth_idx + ON genomics.alignment_metadata (((coverage->>'meanDepth')::double precision)); + +CREATE TABLE genomics.pangenome_alignment_metadata ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sequence_file_id BIGINT NOT NULL REFERENCES genomics.sequence_file(id) ON DELETE CASCADE, + pangenome_graph_id BIGINT NOT NULL REFERENCES genomics.pangenome_graph(id), + metric_level TEXT NOT NULL, -- GRAPH_OVERALL/PATH/NODE/REGION + metadata JSONB NOT NULL DEFAULT '{}'::jsonb -- includes coverage +); + +CREATE TABLE genomics.reported_variant_pangenome ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid), + graph_id BIGINT NOT NULL REFERENCES genomics.pangenome_graph(id), + variant_type TEXT, + variant_nodes INTEGER[] NOT NULL DEFAULT '{}', + variant_edges INTEGER[] NOT NULL DEFAULT '{}', + allele_fraction DOUBLE PRECISION, + depth INTEGER, + zygosity TEXT, -- HOM_REF/HET/HOM_ALT + haplotype_information JSONB NOT NULL DEFAULT '{}'::jsonb +); +CREATE INDEX reported_variant_pangenome_sample_idx ON genomics.reported_variant_pangenome (sample_guid); + +-- ── Chip genotype data ─────────────────────────────────────────────────────── +CREATE TABLE genomics.genotype_data ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid), + test_type_id BIGINT REFERENCES genomics.test_type_definition(id), + provider TEXT, + metrics JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX genotype_data_sample_idx ON genomics.genotype_data (sample_guid); + +-- ── Callable loci (was polymorphic; now sample_guid FK) ────────────────────── +CREATE TABLE genomics.biosample_callable_loci ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid), + chromosome TEXT NOT NULL, + total_callable_bp BIGINT NOT NULL DEFAULT 0, + region_count INTEGER NOT NULL DEFAULT 0, + bed_file_hash TEXT, + source_test_type_id BIGINT REFERENCES genomics.test_type_definition(id), + y_xdegen_callable_bp BIGINT, + y_ampliconic_callable_bp BIGINT, + y_palindromic_callable_bp BIGINT, + computed_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX callable_loci_sample_idx ON genomics.biosample_callable_loci (sample_guid); diff --git a/rust/migrations/0005_ident.sql b/rust/migrations/0005_ident.sql new file mode 100644 index 00000000..7766d0c4 --- /dev/null +++ b/rust/migrations/0005_ident.sql @@ -0,0 +1,121 @@ +-- ident schema: users, RBAC, AT Protocol identity/OAuth, consent. + +CREATE TABLE ident.users ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + email CITEXT UNIQUE, -- case-insensitive + did TEXT UNIQUE, -- AT Protocol DID + handle TEXT UNIQUE, + display_name TEXT, + is_active BOOLEAN NOT NULL DEFAULT true, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- ── RBAC ───────────────────────────────────────────────────────────────────── +CREATE TABLE ident.roles ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name TEXT NOT NULL UNIQUE, + description TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE ident.permissions ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name TEXT NOT NULL UNIQUE, + description TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE ident.role_permissions ( + role_id UUID NOT NULL REFERENCES ident.roles(id) ON DELETE CASCADE, + permission_id UUID NOT NULL REFERENCES ident.permissions(id) ON DELETE CASCADE, + PRIMARY KEY (role_id, permission_id) +); + +CREATE TABLE ident.user_roles ( + user_id UUID NOT NULL REFERENCES ident.users(id) ON DELETE CASCADE, + role_id UUID NOT NULL REFERENCES ident.roles(id) ON DELETE CASCADE, + PRIMARY KEY (user_id, role_id) +); + +-- Base roles the app expects to exist (Admin, Curator, TreeCurator). +INSERT INTO ident.roles (name, description) VALUES + ('Admin', 'Full administrative access'), + ('Curator', 'Content curation'), + ('TreeCurator', 'Haplogroup tree curation') +ON CONFLICT (name) DO NOTHING; + +-- ── AT Protocol identity / OAuth ──────────────────────────────────────────── +CREATE TABLE ident.user_pds_info ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID NOT NULL UNIQUE REFERENCES ident.users(id) ON DELETE CASCADE, + pds_url VARCHAR(512) NOT NULL, + did TEXT UNIQUE, + handle TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE ident.user_login_info ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID NOT NULL REFERENCES ident.users(id) ON DELETE CASCADE, + provider_id TEXT NOT NULL, + provider_key TEXT NOT NULL, + -- bcrypt for legacy verification, argon2 for new hashes (NULL for OAuth-only). + password_hash TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + UNIQUE (provider_id, provider_key) +); + +CREATE TABLE ident.user_oauth2_info ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + login_info_id UUID NOT NULL UNIQUE REFERENCES ident.user_login_info(id) ON DELETE CASCADE, + access_token TEXT NOT NULL, + token_type TEXT, + expires_in BIGINT, + refresh_token TEXT, + scope TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE ident.atprotocol_authorization_servers ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + issuer_url TEXT NOT NULL UNIQUE, + authorization_endpoint TEXT, + token_endpoint TEXT, + pushed_authorization_request_endpoint TEXT, + dpop_signing_alg_values_supported TEXT, + scopes_supported TEXT, + metadata_fetched_at TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE ident.atprotocol_client_metadata ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + client_id_url TEXT NOT NULL UNIQUE, + client_name TEXT, + logo_uri TEXT, + tos_uri TEXT, + policy_uri TEXT, + redirect_uris TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- ── GDPR cookie consent (authenticated or anonymous) ───────────────────────── +CREATE TABLE ident.cookie_consents ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID REFERENCES ident.users(id) ON DELETE SET NULL, + session_id TEXT, + ip_address_hash VARCHAR(64), + consent_given BOOLEAN NOT NULL DEFAULT false, + consent_timestamp TIMESTAMPTZ NOT NULL DEFAULT now(), + policy_version TEXT, + user_agent TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); diff --git a/rust/migrations/0006_pubs.sql b/rust/migrations/0006_pubs.sql new file mode 100644 index 00000000..ebd6a1de --- /dev/null +++ b/rust/migrations/0006_pubs.sql @@ -0,0 +1,81 @@ +-- pubs schema: publications, genomic studies, and their links to samples. +-- De-sprawl: publication_biosample + publication_citizen_biosample collapse into +-- one link table now that biosamples are unified under core.biosample. + +CREATE TYPE pubs.study_source AS ENUM ('ENA', 'NCBI_BIOPROJECT', 'NCBI_GENBANK'); + +CREATE TABLE pubs.genomic_study ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + accession TEXT NOT NULL UNIQUE, + title TEXT, + center_name TEXT, + study_name TEXT, + source pubs.study_source NOT NULL DEFAULT 'ENA', + bio_project_id TEXT, + molecule TEXT, + topology TEXT, + taxonomy_id INTEGER, + version TEXT, -- legacy genomic_studies.version is varchar + submission_date DATE, + details JSONB NOT NULL DEFAULT '{}'::jsonb +); + +CREATE TABLE pubs.publication ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + pubmed_id TEXT UNIQUE, + doi TEXT UNIQUE, + open_alex_id TEXT UNIQUE, + title TEXT NOT NULL, + journal TEXT, + publication_date DATE, + url TEXT, + authors TEXT, + abstract_summary TEXT, + citation_normalized_percentile NUMERIC, + cited_by_count INTEGER, + open_access_status TEXT, + open_access_url TEXT, + primary_topic TEXT, + publication_type TEXT, + publisher TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Unified publication<->biosample link (both standard and citizen samples). +CREATE TABLE pubs.publication_biosample ( + publication_id BIGINT NOT NULL REFERENCES pubs.publication(id) ON DELETE CASCADE, + sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + PRIMARY KEY (publication_id, sample_guid) +); + +CREATE TABLE pubs.publication_study ( + publication_id BIGINT NOT NULL REFERENCES pubs.publication(id) ON DELETE CASCADE, + study_id BIGINT NOT NULL REFERENCES pubs.genomic_study(id) ON DELETE CASCADE, + PRIMARY KEY (publication_id, study_id) +); + +-- Editorial review queue from OpenAlex discovery. +CREATE TABLE pubs.publication_candidate ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + openalex_id TEXT NOT NULL UNIQUE, + doi TEXT, + title TEXT, + abstract TEXT, + publication_date DATE, + journal_name TEXT, + relevance_score NUMERIC, + status TEXT NOT NULL DEFAULT 'pending', -- pending/accepted/rejected/deferred + reviewed_by UUID REFERENCES ident.users(id), + raw_metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX publication_candidate_status_idx ON pubs.publication_candidate (status); + +CREATE TABLE pubs.publication_search_config ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + search_query TEXT, + concepts JSONB NOT NULL DEFAULT '[]'::jsonb, + enabled BOOLEAN NOT NULL DEFAULT true +); diff --git a/rust/migrations/0007_ibd.sql b/rust/migrations/0007_ibd.sql new file mode 100644 index 00000000..82cdab29 --- /dev/null +++ b/rust/migrations/0007_ibd.sql @@ -0,0 +1,126 @@ +-- ibd schema: population/ancestry analysis + privacy-preserving IBD matching, +-- attestations, suggestions, and match request/consent tracking. + +-- ── Population & ancestry ──────────────────────────────────────────────────── +CREATE TABLE ibd.population ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + population_name TEXT NOT NULL UNIQUE +); + +CREATE TABLE ibd.analysis_method ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + method_name TEXT NOT NULL UNIQUE +); + +CREATE TABLE ibd.ancestry_analysis ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + analysis_method_id BIGINT NOT NULL REFERENCES ibd.analysis_method(id), + population_id BIGINT NOT NULL REFERENCES ibd.population(id), + probability NUMERIC(5,4) NOT NULL, + UNIQUE (sample_guid, analysis_method_id, population_id) +); + +-- Full ADMIXTURE/PCA breakdown per sample (pca_coordinates as JSONB). +CREATE TABLE ibd.population_breakdown ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + analysis_method TEXT, + panel_type TEXT, + pca_coordinates JSONB NOT NULL DEFAULT '{}'::jsonb, + analysis_date TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX population_breakdown_sample_idx ON ibd.population_breakdown (sample_guid); + +CREATE TABLE ibd.population_breakdown_cache ( + sample_guid UUID PRIMARY KEY REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + breakdown JSONB NOT NULL DEFAULT '{}'::jsonb, + breakdown_hash VARCHAR(64), + cached_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Cached O(N^2) pairwise overlap scores (order-independent pair key). +CREATE TABLE ibd.population_overlap_score ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sample_guid_1 UUID NOT NULL REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + sample_guid_2 UUID NOT NULL REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + score DOUBLE PRECISION NOT NULL, + computed_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE UNIQUE INDEX population_overlap_pair_key + ON ibd.population_overlap_score (LEAST(sample_guid_1, sample_guid_2), GREATEST(sample_guid_1, sample_guid_2)); + +-- ── IBD matching ───────────────────────────────────────────────────────────── +CREATE TABLE ibd.validation_service ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + guid UUID NOT NULL UNIQUE DEFAULT gen_random_uuid(), + name TEXT NOT NULL UNIQUE, + description TEXT, + trust_level INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE ibd.ibd_discovery_index ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + sample_guid_1 UUID NOT NULL REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + sample_guid_2 UUID NOT NULL REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + pangenome_graph_id BIGINT REFERENCES genomics.pangenome_graph(id), + match_region_type TEXT NOT NULL, -- AUTOSOMAL/X/Y/MT + total_shared_cm_approx DOUBLE PRECISION, + num_shared_segments_approx INTEGER, + is_publicly_discoverable BOOLEAN NOT NULL DEFAULT false, + consensus_status TEXT, + validation_service_id BIGINT REFERENCES ibd.validation_service(id), + indexed_date TIMESTAMPTZ NOT NULL DEFAULT now() +); +-- Order-independent pair uniqueness per region type. +CREATE UNIQUE INDEX ibd_discovery_pair_key + ON ibd.ibd_discovery_index (LEAST(sample_guid_1, sample_guid_2), GREATEST(sample_guid_1, sample_guid_2), match_region_type); + +CREATE TABLE ibd.ibd_pds_attestation ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + ibd_discovery_index_id BIGINT NOT NULL REFERENCES ibd.ibd_discovery_index(id) ON DELETE CASCADE, + attesting_pds_guid UUID NOT NULL, + attestation_timestamp TIMESTAMPTZ NOT NULL DEFAULT now(), + attestation_signature TEXT NOT NULL, + attestation_type TEXT NOT NULL, -- INITIAL_REPORT/CONFIRMATION/DISPUTE/REVOCATION + attestation_notes TEXT +); +CREATE INDEX ibd_attestation_index_idx ON ibd.ibd_pds_attestation (ibd_discovery_index_id); + +CREATE TABLE ibd.match_suggestion ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + target_sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + suggested_sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + suggestion_type TEXT NOT NULL, -- SHARED_MATCH/POPULATION_OVERLAP/HAPLOGROUP + score DOUBLE PRECISION, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + status TEXT NOT NULL DEFAULT 'ACTIVE', -- ACTIVE/DISMISSED/EXPIRED/CONVERTED + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + expires_at TIMESTAMPTZ +); +CREATE INDEX match_suggestion_target_idx ON ibd.match_suggestion (target_sample_guid, status); + +-- ── Match request & consent (AT Protocol records keyed by at:// URI) ───────── +CREATE TABLE ibd.match_request ( + request_uri TEXT PRIMARY KEY, + requester_did TEXT NOT NULL, + target_did TEXT NOT NULL, + requester_sample_guid UUID REFERENCES core.biosample(sample_guid), + target_sample_guid UUID REFERENCES core.biosample(sample_guid), + status TEXT NOT NULL DEFAULT 'PENDING', -- PENDING/CANCELLED/CONSENTED/DECLINED/EXPIRED + details JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX match_request_target_did_idx ON ibd.match_request (target_did, status); +CREATE INDEX match_request_requester_did_idx ON ibd.match_request (requester_did, status); + +CREATE TABLE ibd.match_consent ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + request_uri TEXT NOT NULL REFERENCES ibd.match_request(request_uri) ON DELETE CASCADE, + consenting_did TEXT NOT NULL, + consent_given BOOLEAN NOT NULL, + consent_uri TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX match_consent_request_idx ON ibd.match_consent (request_uri); diff --git a/rust/migrations/0008_fed.sql b/rust/migrations/0008_fed.sql new file mode 100644 index 00000000..f4e0e754 --- /dev/null +++ b/rust/migrations/0008_fed.sql @@ -0,0 +1,75 @@ +-- fed schema: PDS fleet + firehose. This collapses the legacy second "metadata" +-- database into the single DB (plan §2) — one database, one pool. + +-- Firehose cursor/lease tracking per registered PDS (distributed consumers). +CREATE TABLE fed.pds_registration ( + did TEXT PRIMARY KEY, + pds_url TEXT NOT NULL, + handle TEXT, + last_commit_cid TEXT, + cursor BIGINT, + leased_by_instance_id TEXT, + lease_expires_at TIMESTAMPTZ, + processing_status TEXT NOT NULL DEFAULT 'IDLE', + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX pds_registration_lease_idx ON fed.pds_registration (lease_expires_at); + +CREATE TABLE fed.pds_node ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + did TEXT NOT NULL UNIQUE, + pds_url TEXT, + handle TEXT, + node_name TEXT, + software_version TEXT, + status TEXT NOT NULL DEFAULT 'UNKNOWN', -- ONLINE/OFFLINE/BUSY/ERROR/UNKNOWN + capabilities JSONB NOT NULL DEFAULT '{}'::jsonb, + last_heartbeat TIMESTAMPTZ, + last_commit_cid TEXT, + ip_address TEXT, + os_info TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX pds_node_status_idx ON fed.pds_node (status); + +CREATE TABLE fed.pds_heartbeat_log ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + pds_node_id BIGINT NOT NULL REFERENCES fed.pds_node(id) ON DELETE CASCADE, + status TEXT, + software_version TEXT, + load_metrics JSONB NOT NULL DEFAULT '{}'::jsonb, + processing_queue_size INTEGER, + error_message TEXT, + recorded_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX pds_heartbeat_node_time_idx ON fed.pds_heartbeat_log (pds_node_id, recorded_at DESC); + +CREATE TABLE fed.pds_fleet_config ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + config_key TEXT NOT NULL UNIQUE, + config_value TEXT, + description TEXT, + updated_by TEXT, + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Distributed variant/haplogroup/STR proposals submitted by edge nodes. +CREATE TABLE fed.pds_submission ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + pds_node_id BIGINT REFERENCES fed.pds_node(id) ON DELETE SET NULL, + submission_type TEXT NOT NULL, -- HAPLOGROUP_CALL/VARIANT_CALL/BRANCH_PROPOSAL/PRIVATE_VARIANT/STR_PROFILE + biosample_guid UUID REFERENCES core.biosample(sample_guid), + proposed_value TEXT, + confidence_score NUMERIC(5,4), + algorithm_version TEXT, + software_version TEXT, + payload JSONB NOT NULL DEFAULT '{}'::jsonb, + status TEXT NOT NULL DEFAULT 'PENDING', -- PENDING/ACCEPTED/REJECTED/SUPERSEDED + reviewed_by TEXT, + reviewed_at TIMESTAMPTZ, + atproto JSONB, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX pds_submission_status_type_idx ON fed.pds_submission (status, submission_type); diff --git a/rust/migrations/0009_social_support_billing.sql b/rust/migrations/0009_social_support_billing.sql new file mode 100644 index 00000000..061a78aa --- /dev/null +++ b/rust/migrations/0009_social_support_billing.sql @@ -0,0 +1,130 @@ +-- social / support / billing schemas. + +-- ── social: reputation ────────────────────────────────────────────────────── +CREATE TABLE social.reputation_event_type ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + name TEXT NOT NULL UNIQUE, + description TEXT, + default_points_change INTEGER NOT NULL DEFAULT 0, + is_positive BOOLEAN NOT NULL DEFAULT true, + is_system_generated BOOLEAN NOT NULL DEFAULT false, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE social.reputation_event ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID NOT NULL REFERENCES ident.users(id) ON DELETE CASCADE, + event_type_id UUID NOT NULL REFERENCES social.reputation_event_type(id), + actual_points_change INTEGER NOT NULL, + source_user_id UUID REFERENCES ident.users(id) ON DELETE SET NULL, + related_entity_type TEXT, + related_entity_id UUID, + notes TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX reputation_event_user_idx ON social.reputation_event (user_id, created_at DESC); + +CREATE TABLE social.user_reputation_score ( + user_id UUID PRIMARY KEY REFERENCES ident.users(id) ON DELETE CASCADE, + score BIGINT NOT NULL DEFAULT 0, + last_calculated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- ── social: messaging & feed ───────────────────────────────────────────────── +CREATE TABLE social.user_block ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + blocker_id UUID NOT NULL REFERENCES ident.users(id) ON DELETE CASCADE, + blocked_id UUID NOT NULL REFERENCES ident.users(id) ON DELETE CASCADE, + reason TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + UNIQUE (blocker_id, blocked_id) +); + +CREATE TABLE social.conversation ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + participant_ids UUID[] NOT NULL DEFAULT '{}', + subject TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + last_message_at TIMESTAMPTZ, + deleted_at TIMESTAMPTZ +); +CREATE INDEX conversation_participants_gin ON social.conversation USING gin (participant_ids); + +CREATE TABLE social.message ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + conversation_id UUID NOT NULL REFERENCES social.conversation(id) ON DELETE CASCADE, + sender_id UUID NOT NULL REFERENCES ident.users(id) ON DELETE CASCADE, + body TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + read_at TIMESTAMPTZ +); +CREATE INDEX message_conversation_idx ON social.message (conversation_id, created_at); + +CREATE TABLE social.feed_post ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + author_id UUID NOT NULL REFERENCES ident.users(id) ON DELETE CASCADE, + content TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + deleted_at TIMESTAMPTZ +); + +-- ── social: group projects (legacy public.group_project) ───────────────────── +-- Rich access-control policies kept as TEXT (flexible) + atproto JSONB. +CREATE TABLE social.group_project ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + project_guid UUID NOT NULL UNIQUE DEFAULT gen_random_uuid(), + project_name TEXT NOT NULL, + project_type TEXT NOT NULL, -- HAPLOGROUP/SURNAME/GEOGRAPHIC/ETHNIC/RESEARCH/CUSTOM + target_haplogroup TEXT, + target_lineage TEXT, -- Y_DNA/MT_DNA/BOTH + description TEXT, + join_policy TEXT NOT NULL DEFAULT 'OPEN', + member_list_visibility TEXT NOT NULL DEFAULT 'MEMBERS_ONLY', + str_policy TEXT, + snp_policy TEXT, + public_tree_view BOOLEAN NOT NULL DEFAULT false, + succession_policy TEXT, + owner_did TEXT, + atproto JSONB, + deleted BOOLEAN NOT NULL DEFAULT false, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- ── support: contact messages (authenticated or anonymous) ─────────────────── +CREATE TABLE support.contact_message ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID REFERENCES ident.users(id) ON DELETE SET NULL, + sender_name TEXT, + sender_email TEXT, + subject TEXT, + message TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'new', -- new/read/replied/closed + ip_address_hash VARCHAR(64), + user_last_viewed_at TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX contact_message_status_idx ON support.contact_message (status, created_at DESC); +CREATE INDEX contact_message_user_idx ON support.contact_message (user_id); + +-- ── billing: subscriptions ─────────────────────────────────────────────────── +CREATE TABLE billing.patron_subscription ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + user_id UUID NOT NULL REFERENCES ident.users(id) ON DELETE CASCADE, + patron_tier TEXT NOT NULL, -- SUPPORTER/CONTRIBUTOR/SUSTAINER/FOUNDING_PATRON + status TEXT NOT NULL DEFAULT 'ACTIVE', -- ACTIVE/CANCELLED/PAST_DUE/EXPIRED + payment_provider TEXT, -- STRIPE/PAYPAL + provider_subscription_id TEXT, + amount_cents INTEGER, + currency TEXT NOT NULL DEFAULT 'USD', + billing_interval TEXT, -- MONTHLY/YEARLY + current_period_start TIMESTAMPTZ, + current_period_end TIMESTAMPTZ, + cancelled_at TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX patron_subscription_user_idx ON billing.patron_subscription (user_id, status); diff --git a/rust/migrations/0010_audit.sql b/rust/migrations/0010_audit.sql new file mode 100644 index 00000000..c7313d62 --- /dev/null +++ b/rust/migrations/0010_audit.sql @@ -0,0 +1,16 @@ +-- Curator audit trail. Home for the legacy `curator.audit_log`: entity-level +-- change history (who changed what, with old/new JSONB snapshots). Distinct from +-- tree.curator_action, which records proposed-branch approval decisions only. +CREATE TABLE ident.audit_log ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID NOT NULL REFERENCES ident.users(id) ON DELETE CASCADE, + entity_type TEXT NOT NULL, -- 'variant', 'haplogroup', ... + entity_id BIGINT NOT NULL, -- legacy stored an int; catalog ids are bigint + action TEXT NOT NULL, -- CREATE/UPDATE/DELETE + old_value JSONB, + new_value JSONB, + comment TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX audit_log_entity_idx ON ident.audit_log (entity_type, entity_id); +CREATE INDEX audit_log_user_idx ON ident.audit_log (user_id); diff --git a/rust/migrations/0011_coverage_mirror.sql b/rust/migrations/0011_coverage_mirror.sql new file mode 100644 index 00000000..bf51d214 --- /dev/null +++ b/rust/migrations/0011_coverage_mirror.sql @@ -0,0 +1,50 @@ +-- Federated coverage mirror (atmosphere doc 08 §3, REVISED 2026-06). +-- +-- Original re-scope said "no persistent mirror — aggregate coverage on demand." +-- That doesn't scale: a population coverage view would have to fan out an HTTP +-- fetch to every PDS at query time. So the AppView KEEPS a mirror of the public +-- coverage *summaries* (QC metrics only — never raw reads/files). A lightweight +-- Jetstream consumer (du-jobs) ingests com.decodingus.atmosphere.alignment +-- summary records; population views then aggregate this table with cheap local +-- SQL, exactly like the local genomics.alignment_metadata benchmark path. +-- +-- This is NOT the old full-CRUD network mirror: one collection, summary metrics +-- only, no per-sample raw data, no orphan/sync machinery. + +CREATE TABLE fed.coverage_summary ( + did TEXT NOT NULL, -- owning PDS / citizen DID + collection TEXT NOT NULL, -- lexicon NSID (com.decodingus.atmosphere.alignment) + rkey TEXT NOT NULL, -- record key (tid) + at_uri TEXT NOT NULL, -- at://{did}/{collection}/{rkey} + cid TEXT, -- commit cid (provenance / refetch) + biosample_ref TEXT, -- alignment.biosampleRef (denormalized cohort key) + sequence_run_ref TEXT, -- alignment.sequenceRunRef + reference_build TEXT, -- GRCh38 / GRCh37 / T2T-CHM13 / ... + aligner TEXT, + -- Extracted scalars for indexed aggregation; the authoritative copy (incl. + -- per-contig callable bases) stays in `metrics`. + mean_coverage DOUBLE PRECISION, + median_coverage DOUBLE PRECISION, + pct_10x DOUBLE PRECISION, + pct_20x DOUBLE PRECISION, + pct_30x DOUBLE PRECISION, + metrics JSONB NOT NULL DEFAULT '{}'::jsonb, -- full alignmentMetrics (incl. contigs[]) + record_created_at TIMESTAMPTZ, -- record meta.createdAt, if present + time_us BIGINT NOT NULL, -- Jetstream cursor of the event that wrote this row + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, collection, rkey) +); + +CREATE INDEX coverage_summary_build_idx ON fed.coverage_summary (reference_build); +CREATE INDEX coverage_summary_mean_idx ON fed.coverage_summary (mean_coverage); +CREATE INDEX coverage_summary_biosample_idx + ON fed.coverage_summary (biosample_ref) WHERE biosample_ref IS NOT NULL; + +-- Singleton Jetstream cursor so the consumer resumes from where it left off after +-- a restart/reconnect (Jetstream replays from a `time_us` microsecond timestamp). +CREATE TABLE fed.jetstream_cursor ( + id BOOLEAN PRIMARY KEY DEFAULT true, + time_us BIGINT NOT NULL, + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + CONSTRAINT jetstream_cursor_singleton CHECK (id) +); diff --git a/rust/migrations/0012_fed_reporting.sql b/rust/migrations/0012_fed_reporting.sql new file mode 100644 index 00000000..a1e208d6 --- /dev/null +++ b/rust/migrations/0012_fed_reporting.sql @@ -0,0 +1,170 @@ +-- Federated reporting mirror (atmosphere "Record Status Summary": the legacy +-- AppView's `✅ AppView Complete` ingest set). The AppView does NOT analyze — it +-- aggregates and reports. Navigator computes anonymized per-sample SUMMARIES at +-- the edge and publishes them as public PDS records; a Jetstream consumer +-- (du-jobs) mirrors them here so reports aggregate with local SQL. +-- +-- This is the in-scope ingestion the v2.1 "scope reduction" wrongly dropped. It +-- is NOT the legacy full-CRUD raw-data mirror: summaries only, no raw reads/files, +-- and donor PII is never stored. +-- +-- PRIVACY: PII-bearing records (biosample/sequencerun/project/workspace) are +-- reduced to typed, non-identifying columns — NO raw record JSONB is kept, so +-- donorIdentifier / sampleAccession / description / file paths can never leak. +-- Pure-analytics records (genotype/populationBreakdown/haplogroupReconciliation) +-- keep the computed payload as JSONB (with `files` stripped on ingest). +-- +-- Every table is keyed (did, rkey) — one collection per table — for idempotent, +-- ordered (time_us) upsert from the firehose. `fed.coverage_summary` (alignment, +-- migration 0011) is the sibling already in place. + +-- Biosample — anonymized: pseudonymous DID, sex, Y/mt haplogroup calls, center, +-- and join refs. Donor identifiers/accession/free-text description are dropped. +CREATE TABLE fed.biosample ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + sex TEXT, -- Male/Female/Other/Unknown + y_haplogroup TEXT, -- haplogroups.yDna.haplogroupName + mt_haplogroup TEXT, -- haplogroups.mtDna.haplogroupName + center_name TEXT, -- sequencing center (not donor PII) + population_breakdown_ref TEXT, + str_profile_ref TEXT, + sequence_run_count INTEGER NOT NULL DEFAULT 0, + genotype_count INTEGER NOT NULL DEFAULT 0, + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); +CREATE INDEX fed_biosample_y_idx ON fed.biosample (y_haplogroup) WHERE y_haplogroup IS NOT NULL; +CREATE INDEX fed_biosample_mt_idx ON fed.biosample (mt_haplogroup) WHERE mt_haplogroup IS NOT NULL; +CREATE INDEX fed_biosample_center_idx ON fed.biosample (center_name); + +-- Sequence run — platform/instrument/test characterization (no files, no PII). +CREATE TABLE fed.sequencerun ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + biosample_ref TEXT, + platform_name TEXT, -- ILLUMINA/PACBIO/NANOPORE/... + instrument_model TEXT, + instrument_id TEXT, -- @RG instrument id (crowdsourced lab inference) + test_type TEXT, -- WGS/EXOME/TARGETED/... + library_layout TEXT, -- PAIRED/SINGLE + total_reads BIGINT, + read_length INTEGER, + mean_insert_size DOUBLE PRECISION, + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); +CREATE INDEX fed_sequencerun_platform_idx ON fed.sequencerun (platform_name); +CREATE INDEX fed_sequencerun_testtype_idx ON fed.sequencerun (test_type); + +-- Project — surname/research project grouping (project-level, not donor PII). +CREATE TABLE fed.project ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + project_name TEXT, + administrator_did TEXT, + member_count INTEGER NOT NULL DEFAULT 0, + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); + +-- Workspace — researcher container; counts only. +CREATE TABLE fed.workspace ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + sample_count INTEGER NOT NULL DEFAULT 0, + project_count INTEGER NOT NULL DEFAULT 0, + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); + +-- Genotype — chip/array summary stats (computed; raw genotypes stay local). +CREATE TABLE fed.genotype ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + biosample_ref TEXT, + provider TEXT, -- 23andMe/AncestryDNA/... + test_type_code TEXT, + chip_version TEXT, + total_markers_called INTEGER, + total_markers_possible INTEGER, + no_call_rate DOUBLE PRECISION, + y_markers_called INTEGER, + mt_markers_called INTEGER, + autosomal_markers_called INTEGER, + het_rate DOUBLE PRECISION, + build_version TEXT, + y_haplogroup TEXT, -- derivedHaplogroups.yDna.haplogroupName + mt_haplogroup TEXT, + population_breakdown_ref TEXT, + record JSONB NOT NULL DEFAULT '{}'::jsonb, -- full record minus `files` + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); +CREATE INDEX fed_genotype_provider_idx ON fed.genotype (provider); + +-- Population breakdown — ancestry composition (33 pops / 9 super-pops / PCA). +CREATE TABLE fed.population_breakdown ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + biosample_ref TEXT, + analysis_method TEXT, -- PCA_PROJECTION_GMM/ADMIXTURE/... + panel_type TEXT, -- aims/genome-wide + reference_populations TEXT, + snps_analyzed INTEGER, + snps_with_genotype INTEGER, + snps_missing INTEGER, + confidence_level DOUBLE PRECISION, + components JSONB NOT NULL DEFAULT '[]'::jsonb, -- sub-continental percentages + super_population_summary JSONB NOT NULL DEFAULT '[]'::jsonb, -- continental rollup + pca_coordinates JSONB, + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); +CREATE INDEX fed_population_breakdown_panel_idx ON fed.population_breakdown (panel_type); + +-- Haplogroup reconciliation — donor-level multi-run consensus call. +CREATE TABLE fed.haplogroup_reconciliation ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + specimen_donor_ref TEXT, + dna_type TEXT, -- Y_DNA / MT_DNA + compatibility_level TEXT, -- COMPATIBLE/MINOR_DIVERGENCE/... + consensus_haplogroup TEXT, + confidence DOUBLE PRECISION, + branch_compatibility_score DOUBLE PRECISION, + snp_concordance DOUBLE PRECISION, + run_count INTEGER, + record JSONB NOT NULL DEFAULT '{}'::jsonb, -- full record (runCalls, conflicts, ...) + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); +CREATE INDEX fed_reconciliation_consensus_idx ON fed.haplogroup_reconciliation (dna_type, consensus_haplogroup); diff --git a/rust/migrations/0013_str.sql b/rust/migrations/0013_str.sql new file mode 100644 index 00000000..daf8bd84 --- /dev/null +++ b/rust/migrations/0013_str.sql @@ -0,0 +1,44 @@ +-- Y-STR profiles + per-branch modal signatures. +-- +-- Product goal: aggregate STR "signatures" per SNP-defined branch (haplogroup) +-- for STR→branch prediction, and surface a nudge for STR-only testers to upgrade +-- to WGS (which resolves the branch at SNP level). STR is Y-DNA only. +-- +-- Federation: Navigator publishes com.decodingus.atmosphere.strProfile records; +-- the Jetstream consumer mirrors them here (summaries only, like the other fed.* +-- tables). Markers are stored lossless as JSONB (the lexicon's union of simple / +-- multi-copy / complex values); scoring (modal + distance) handles simple + +-- multi-copy, complex is preserved but unscored in v1. + +CREATE TABLE fed.str_profile ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + biosample_ref TEXT, -- at-uri of the parent biosample (join key) + sequence_run_ref TEXT, -- set when STRs were WGS-derived + source TEXT, -- DIRECT_TEST / WGS_DERIVED / BIG_Y_DERIVED / IMPORTED / MANUAL_ENTRY + imported_from TEXT, -- FTDNA / YSEQ / YFULL / ... + derivation_method TEXT, -- HIPSTR / GANGSTR / ... (WGS-derived) + total_markers INTEGER, + markers JSONB NOT NULL DEFAULT '[]'::jsonb, -- lossless strMarkerValue[] + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); +CREATE INDEX str_profile_biosample_idx ON fed.str_profile (biosample_ref) WHERE biosample_ref IS NOT NULL; +-- "WGS-derived?" — drives the STR-only→WGS upgrade nudge (Phase 2). +CREATE INDEX str_profile_source_idx ON fed.str_profile (source); + +-- Widen the per-branch ancestral/modal signature for multi-copy/complex values +-- and aggregation provenance. `ancestral_value` stays as the fast integer path +-- for simple markers (now nullable: multi-copy/complex live in `ancestral_json`). +ALTER TABLE tree.haplogroup_ancestral_str + ALTER COLUMN ancestral_value DROP NOT NULL, + ADD COLUMN ancestral_json JSONB, -- the modal strValue (simple/multiCopy) + ADD COLUMN supporting_samples INTEGER, -- observations backing this marker's modal + ADD COLUMN recomputed_at TIMESTAMPTZ; +-- One signature row per (haplogroup, marker) so the recompute job can upsert. +ALTER TABLE tree.haplogroup_ancestral_str + ADD CONSTRAINT haplogroup_ancestral_str_hg_marker_uniq UNIQUE (haplogroup_id, marker_name); diff --git a/rust/migrations/0014_str_age.sql b/rust/migrations/0014_str_age.sql new file mode 100644 index 00000000..fc257553 --- /dev/null +++ b/rust/migrations/0014_str_age.sql @@ -0,0 +1,42 @@ +-- STR-based branch-age estimation (a *contributing* refinement factor in the +-- combined age model — McDonald 2021, documents/proposals/branch-age-estimation.md). +-- Branch age combines independent evidence as P(t|e)=k·∏P(t|eᵢ): SNP counting, +-- Y-STR variance, genealogical/aDNA anchors. This migration adds the STR term's +-- inputs (per-marker mutation rates) and a labeled per-branch age-estimate store +-- (one row per method) — STR ages do NOT overwrite tree.haplogroup.tmrca_ybp +-- (the authoritative combined value); a future combiner aggregates the factors. + +-- Per-marker Y-STR mutation rates (per generation). Sources: Ballantyne 2010, +-- Willems 2016. Ships empty — populated by a curator/import; until then the +-- age computation falls back to a documented average rate. +CREATE TABLE genomics.str_mutation_rate ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + marker_name TEXT NOT NULL UNIQUE, + panel_names TEXT[], + mutation_rate NUMERIC(12,10) NOT NULL, -- mutations / marker / generation + mutation_rate_lower NUMERIC(12,10), -- 95% CI lower + mutation_rate_upper NUMERIC(12,10), -- 95% CI upper + omega_plus NUMERIC(5,4) DEFAULT 0.5, -- expansion bias + omega_minus NUMERIC(5,4) DEFAULT 0.5, -- contraction bias + multi_step_rate NUMERIC(5,4), -- ω±2 + ω±3 + … + source TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Per-branch age estimates, one row per contributing method (STR_VARIANCE now; +-- SNP_POISSON / GENEALOGICAL / COMBINED later). Kept distinct from the +-- authoritative tree.haplogroup.{formed_ybp,tmrca_ybp}. +CREATE TABLE tree.haplogroup_age_estimate ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + haplogroup_id BIGINT NOT NULL REFERENCES tree.haplogroup(id) ON DELETE CASCADE, + method TEXT NOT NULL, -- STR_VARIANCE / SNP_POISSON / COMBINED / … + estimate_ybp INTEGER, + ci_low_ybp INTEGER, + ci_high_ybp INTEGER, + sample_count INTEGER, + marker_count INTEGER, + generation_years NUMERIC(5,2), + computed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + UNIQUE (haplogroup_id, method) +); +CREATE INDEX haplogroup_age_estimate_hg_idx ON tree.haplogroup_age_estimate (haplogroup_id); diff --git a/rust/migrations/0015_haplogroup_backbone.sql b/rust/migrations/0015_haplogroup_backbone.sql new file mode 100644 index 00000000..47329c89 --- /dev/null +++ b/rust/migrations/0015_haplogroup_backbone.sql @@ -0,0 +1,16 @@ +-- Backbone flag for the haplogroup tree. ISOGG's "backbone" is the established +-- trunk: the single-letter major clades (A, B, … T) and every ancestor on the +-- path from them up to the root. It is a *role*, distinct from `source` (which +-- records data provenance, e.g. 'ISOGG'), so we store it as its own column +-- rather than overloading `source == 'backbone'` as the legacy Scala app did. +-- +-- The tree view renders backbone nodes green (the established spine) vs. amber +-- for recently-updated and grey for the rest. Recomputed by +-- `du_db::haplogroup::recompute_backbone` after each tree load; curators may +-- also set it directly. +ALTER TABLE tree.haplogroup + ADD COLUMN is_backbone BOOLEAN NOT NULL DEFAULT false; + +-- Partial index: backbone is a small subset, and the tree view filters on it. +CREATE INDEX haplogroup_backbone_idx ON tree.haplogroup (haplogroup_type) + WHERE is_backbone; diff --git a/rust/migrations/0016_variant_naming_authority.sql b/rust/migrations/0016_variant_naming_authority.sql new file mode 100644 index 00000000..f17ab2aa --- /dev/null +++ b/rust/migrations/0016_variant_naming_authority.sql @@ -0,0 +1,20 @@ +-- Variant Naming Authority (planning/variant-naming-authority.md). +-- DecodingUs owns the `DU` Y-variant name prefix. A variant may exist BEFORE it +-- has an official name — discovered by coordinates, awaiting curation — so +-- `canonical_name` becomes nullable (NULL = unnamed, identified by coordinates). +-- The unique constraint applies only to named variants (NULLs are not unique). + +ALTER TABLE core.variant ALTER COLUMN canonical_name DROP NOT NULL; + +DROP INDEX core.variant_canonical_name_key; +CREATE UNIQUE INDEX variant_canonical_name_key + ON core.variant (canonical_name) WHERE canonical_name IS NOT NULL; + +-- The DU name authority: a monotonic sequence behind `DUxxxxx` identifiers. +CREATE SEQUENCE core.du_variant_name_seq; + +-- Mint the next DU name, e.g. DU00001. Zero-padded to 5 digits, then natural +-- width beyond that. +CREATE FUNCTION core.next_du_name() RETURNS text + LANGUAGE sql AS +$$ SELECT 'DU' || lpad(nextval('core.du_variant_name_seq')::text, 5, '0') $$; diff --git a/rust/migrations/0017_variant_evidence.sql b/rust/migrations/0017_variant_evidence.sql new file mode 100644 index 00000000..64180151 --- /dev/null +++ b/rust/migrations/0017_variant_evidence.sql @@ -0,0 +1,6 @@ +-- Per-variant authority provenance (universal-variant-schema "evidence"). The +-- YBrowse GFF3 — the central document Y-DNA naming authorities flow through — +-- carries the SNP's haplogroup hint, YFull node, citation, primers, and comment. +-- Ingestion (the GFF3 job) populates this; other paths leave it '{}'. +ALTER TABLE core.variant ADD COLUMN evidence JSONB NOT NULL DEFAULT '{}'::jsonb; +CREATE INDEX variant_evidence_gin ON core.variant USING gin (evidence jsonb_path_ops); diff --git a/rust/migrations/0018_ybrowse_mirror.sql b/rust/migrations/0018_ybrowse_mirror.sql new file mode 100644 index 00000000..5c85986d --- /dev/null +++ b/rust/migrations/0018_ybrowse_mirror.sql @@ -0,0 +1,48 @@ +-- YBrowse source mirror. YBrowse publishes `snps_hg38.gff3` as a FULL snapshot +-- (no deltas), with the same physical SNP often appearing under several names on +-- separate lines. Ingest refreshes THIS mirror verbatim (one row per upstream +-- name); the curated `core.variant` catalog is then *derived* from the mirror by +-- reconciliation, so curator decisions survive re-ingest. +CREATE SCHEMA IF NOT EXISTS source; + +CREATE TABLE source.ybrowse_snp ( + name TEXT PRIMARY KEY, -- the GFF Name/ID (authority identifier) + contig TEXT NOT NULL, + position BIGINT NOT NULL, + allele_anc TEXT, + allele_der TEXT, + coordinates JSONB NOT NULL DEFAULT '{}'::jsonb, -- multi-build {GRCh38, GRCh37, hs1} + evidence JSONB NOT NULL DEFAULT '{}'::jsonb, -- source, isogg/ycc haplogroup, yfull, ref, primers, ... + ingested_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Physical-SNP key: reconciliation folds synonyms (same coordinate + alleles). +CREATE INDEX ybrowse_snp_physical ON source.ybrowse_snp (contig, position, allele_anc, allele_der); + +-- Canonical-name preference for a synonym cluster (lower = more preferred). Ranks +-- by the name's alpha prefix; established authority prefixes first, provisional +-- (YFS/FTE/…) last. A cluster whose best rank is provisional (>= 90) gets a minted +-- DU name instead. TUNABLE domain policy — edit the CASE to adjust authority order. +CREATE FUNCTION core.ysnp_name_rank(nm text) RETURNS int LANGUAGE sql IMMUTABLE AS +$$ SELECT CASE upper(substring(nm from '^[A-Za-z]+')) + WHEN 'M' THEN 1 WHEN 'P' THEN 2 WHEN 'L' THEN 3 WHEN 'U' THEN 4 + WHEN 'V' THEN 5 WHEN 'PF' THEN 6 WHEN 'CTS' THEN 7 WHEN 'Z' THEN 8 + WHEN 'S' THEN 9 WHEN 'DF' THEN 10 WHEN 'FGC' THEN 11 WHEN 'BY' THEN 12 + WHEN 'FT' THEN 13 WHEN 'Y' THEN 14 WHEN 'A' THEN 15 + WHEN 'YFS' THEN 90 WHEN 'YFE' THEN 91 WHEN 'FTE' THEN 92 + ELSE 50 END $$; + +-- Synonym clusters whose names already map to MORE THAN ONE existing variant — +-- the catalog has them split across rows (some possibly tree-linked), so +-- reconciliation does NOT auto-merge them; it records them here for curator +-- review. Rebuilt each reconcile run. +CREATE TABLE source.ybrowse_reconcile_flag ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + contig TEXT NOT NULL, + position BIGINT NOT NULL, + allele_anc TEXT, + allele_der TEXT, + names TEXT[] NOT NULL, + variant_ids BIGINT[] NOT NULL, + flagged_at TIMESTAMPTZ NOT NULL DEFAULT now() +); diff --git a/rust/migrations/0019_variant_strand_canon.sql b/rust/migrations/0019_variant_strand_canon.sql new file mode 100644 index 00000000..539986e5 --- /dev/null +++ b/rust/migrations/0019_variant_strand_canon.sql @@ -0,0 +1,19 @@ +-- Strand-canonical allele key, so the same physical SNP recorded on opposite +-- strands (A>G vs reverse-complement T>C) folds together during reconciliation. +-- Ancestral/derived ORDER is preserved, so a polarity swap (A>G vs G>A) does NOT +-- fold — that's a real disagreement, left for a curator. + +CREATE FUNCTION core.dna_complement(b text) RETURNS text LANGUAGE sql IMMUTABLE AS +$$ SELECT CASE upper(b) WHEN 'A' THEN 'T' WHEN 'T' THEN 'A' WHEN 'C' THEN 'G' WHEN 'G' THEN 'C' ELSE upper(b) END $$; + +-- Canonical "anc>der" for a SNP: the lexicographically-smaller of the forward +-- representation and its strand reverse-complement (single ACGT bases only; +-- anything else passes through upper-cased). e.g. both A>G and T>C → 'A>G'. +CREATE FUNCTION core.ysnp_canon(anc text, der text) RETURNS text LANGUAGE sql IMMUTABLE AS +$$ SELECT CASE + WHEN anc IS NULL OR der IS NULL THEN COALESCE(upper(anc),'?') || '>' || COALESCE(upper(der),'?') + WHEN anc ~ '^[ACGTacgt]$' AND der ~ '^[ACGTacgt]$' THEN + least(upper(anc) || '>' || upper(der), + core.dna_complement(anc) || '>' || core.dna_complement(der)) + ELSE upper(anc) || '>' || upper(der) + END $$; diff --git a/rust/migrations/0020_variant_indel_canon.sql b/rust/migrations/0020_variant_indel_canon.sql new file mode 100644 index 00000000..70f248b9 --- /dev/null +++ b/rust/migrations/0020_variant_indel_canon.sql @@ -0,0 +1,64 @@ +-- INDEL handling for reconciliation, borrowing the Scala YBrowse heuristics +-- (repeat-notation expansion + VCF-style trim). YBrowse labels every row 'snp' +-- and encodes most indels as bare "ins"/"del" markers (no bases); a handful are +-- true multi-base alleles. MNPs (equal-length multi-base) are left ALONE. + +-- "3T" -> "TTT", "2AG" -> "AGAG"; an already-nucleotide allele passes through; +-- a non-nucleotide marker (ins/del/.) returns its upper-cased letters. +CREATE FUNCTION core.ysnp_expand_repeat(a text) RETURNS text LANGUAGE plpgsql IMMUTABLE AS +$$ +DECLARE up text; digits text; bases text; +BEGIN + up := upper(trim(coalesce(a, ''))); + IF up ~ '^[ACGTN]+$' THEN RETURN up; END IF; + digits := regexp_replace(up, '[^0-9]', '', 'g'); + bases := regexp_replace(up, '[0-9]', '', 'g'); + IF digits <> '' AND bases <> '' THEN RETURN repeat(bases, digits::int); END IF; + RETURN bases; -- e.g. "INS"/"DEL" markers, digits stripped +END +$$; + +-- Classify a variant from its ancestral/derived alleles: single bases = SNP; +-- equal-length multi-base = MNP (left alone); unequal/markers = INDEL. +CREATE FUNCTION core.ysnp_mutation_type(anc text, der text) RETURNS text LANGUAGE plpgsql IMMUTABLE AS +$$ +DECLARE a text; d text; +BEGIN + a := core.ysnp_expand_repeat(anc); + d := core.ysnp_expand_repeat(der); + IF a ~ '^[ACGT]$' AND d ~ '^[ACGT]$' THEN RETURN 'SNP'; END IF; + IF a ~ '^[ACGT]+$' AND d ~ '^[ACGT]+$' THEN + IF length(a) = length(d) THEN RETURN 'MNP'; END IF; + RETURN 'INDEL'; + END IF; + RETURN 'INDEL'; -- ins/del markers / gaps / dirty +END +$$; + +-- Replace the SNP-only canon with one that also folds equivalent INDEL +-- representations: SNP -> strand-canonical (as before); MNP -> left alone; +-- INDEL -> trim common suffix then prefix (keeping >=1 base each — VCF-style, +-- minus the reference-FASTA padding we don't have), so e.g. T>TC and TA>TCA fold. +DROP FUNCTION IF EXISTS core.ysnp_canon(text, text); +CREATE FUNCTION core.ysnp_canon(anc text, der text) RETURNS text LANGUAGE plpgsql IMMUTABLE AS +$$ +DECLARE a text; d text; +BEGIN + a := core.ysnp_expand_repeat(anc); + d := core.ysnp_expand_repeat(der); + IF a ~ '^[ACGT]$' AND d ~ '^[ACGT]$' THEN + RETURN least(a || '>' || d, core.dna_complement(a) || '>' || core.dna_complement(d)); + END IF; + IF a ~ '^[ACGT]+$' AND d ~ '^[ACGT]+$' THEN + IF length(a) = length(d) THEN RETURN a || '>' || d; END IF; -- MNP: untouched + WHILE length(a) > 1 AND length(d) > 1 AND right(a, 1) = right(d, 1) LOOP + a := left(a, length(a) - 1); d := left(d, length(d) - 1); + END LOOP; + WHILE length(a) > 1 AND length(d) > 1 AND left(a, 1) = left(d, 1) LOOP + a := right(a, length(a) - 1); d := right(d, length(d) - 1); + END LOOP; + RETURN a || '>' || d; + END IF; + RETURN a || '>' || d; -- markers / gaps +END +$$; diff --git a/rust/migrations/0021_ancestral_state.sql b/rust/migrations/0021_ancestral_state.sql new file mode 100644 index 00000000..863c734c --- /dev/null +++ b/rust/migrations/0021_ancestral_state.sql @@ -0,0 +1,40 @@ +-- Ancestral-state reconstruction (ASR) modeling + allele-semantics fix. +-- +-- (1) core.variant.coordinates stored `reference_allele`/`alternate_allele`, but the +-- reference genome is not the phylogenetic root (genetic Adam) — so reference does +-- NOT map to ancestral. Every source already provides ancestral/derived; relabel +-- the JSONB keys per build. Values are unchanged (they were already anc/der). +-- (2) Recurrent SNPs (homoplasy) and back-mutations: a SNP changes state on multiple +-- branches, forward (anc->der) or reverse (der->anc). Record each branch's exact +-- transition on the link so forward/defining vs back-mutation vs recurrent is +-- representable. Existing links stay NULL (treated as forward/defining, using the +-- variant's coordinate alleles). + +-- (1) Rename coordinate keys per build (no-op on an empty/fresh DB). +UPDATE core.variant v +SET coordinates = ( + SELECT jsonb_object_agg( + build, + (entry - 'reference_allele' - 'alternate_allele') + || (CASE WHEN entry ? 'reference_allele' + THEN jsonb_build_object('ancestral', entry->'reference_allele') ELSE '{}'::jsonb END) + || (CASE WHEN entry ? 'alternate_allele' + THEN jsonb_build_object('derived', entry->'alternate_allele') ELSE '{}'::jsonb END) + ) + FROM jsonb_each(v.coordinates) AS e(build, entry) +) +WHERE EXISTS ( + SELECT 1 FROM jsonb_each(v.coordinates) AS e(build, entry) + WHERE entry ? 'reference_allele' OR entry ? 'alternate_allele' +); + +-- (2) Per-branch mutation direction on the link (the exact transition observed on +-- that branch). NULL = legacy/defining forward link (use the variant's anc/der). +ALTER TABLE tree.haplogroup_variant + ADD COLUMN ancestral_allele text, + ADD COLUMN derived_allele text; + +COMMENT ON COLUMN tree.haplogroup_variant.ancestral_allele IS + 'Ancestral allele of this SNP on this branch (per-branch ASR transition; NULL = forward/defining, use variant coordinates).'; +COMMENT ON COLUMN tree.haplogroup_variant.derived_allele IS + 'Derived allele acquired on this branch. derived == variant ancestral => back-mutation; same SNP on multiple branches => recurrent (homoplasy).'; diff --git a/rust/migrations/0022_biosample_is_public.sql b/rust/migrations/0022_biosample_is_public.sql new file mode 100644 index 00000000..03fea6e3 --- /dev/null +++ b/rust/migrations/0022_biosample_is_public.sql @@ -0,0 +1,14 @@ +-- Public-report visibility gate on the canonical biosample. Curators opt a +-- sample in (Navigator analysis → linked to a publication → core.biosample as the +-- canonical record). Default false: nothing becomes public implicitly. This is +-- the single visibility authority the eventual full core/fed consolidation relies +-- on — the per-sample public report (`/sample/:id`) filters on it. + +ALTER TABLE core.biosample + ADD COLUMN is_public BOOLEAN NOT NULL DEFAULT false; + +-- Partial index: the public report only ever filters on the true side, and the +-- public set is small relative to the table, so a partial index stays tiny. +-- Combined with `deleted = false` to match the existing biosample SELECT guard. +CREATE INDEX biosample_is_public_idx + ON core.biosample (sample_guid) WHERE is_public AND deleted = false; diff --git a/rust/migrations/0023_variant_defining_haplogroup.sql b/rust/migrations/0023_variant_defining_haplogroup.sql new file mode 100644 index 00000000..b4bb706a --- /dev/null +++ b/rust/migrations/0023_variant_defining_haplogroup.sql @@ -0,0 +1,38 @@ +-- Recurrence-aware variant identity (universal-variant design, Decision 5). +-- +-- A recurrent/homoplasic SNP is the SAME physical site (one coordinate) arising +-- independently on more than one lineage. ISOGG encodes this textually with a +-- `.1`/`.2` suffix on the SNP name (e.g. `A10822` on E, `A10822.2` on A000b1) and +-- a `^^` "below-criteria-but-stable" marker. Those decorations are NOT distinct +-- identities — baking them into `canonical_name` fragments one physical SNP into +-- several coordless rows. +-- +-- Per the design, identity is `(canonical_name, defining_haplogroup_id)`: one name, +-- one coordinate, distinguished by the lineage each occurrence defines. The primary +-- occurrence keeps `defining_haplogroup_id IS NULL` (the COALESCE(-1) slot); each +-- additional recurrence is a sibling row sharing the name + coordinate, scoped to +-- its branch. Because every row then defines exactly ONE lineage, +-- `scrub_recurrent_links` (which prunes a variant whose links scatter across +-- unrelated lineages) leaves genuine ISOGG-curated recurrence alone by construction. + +ALTER TABLE core.variant + ADD COLUMN defining_haplogroup_id bigint REFERENCES tree.haplogroup(id); + +COMMENT ON COLUMN core.variant.defining_haplogroup_id IS + 'Lineage this variant identity is a defining mutation for. NULL = the primary ' + 'occurrence (unique by name). A non-NULL value marks an additional recurrence ' + 'of the same physical SNP (same coordinate) on a different branch — the ' + 'universal-variant way to model homoplasy (ISOGG .1/.2), instead of a name suffix.'; + +-- Replace the name-only uniqueness with name + defining-lineage. NULL collapses to +-- -1 so the primary occurrence stays globally unique by name, while recurrences are +-- unique per (name, branch). Matches the design index. +DROP INDEX core.variant_canonical_name_key; +CREATE UNIQUE INDEX variant_canonical_name_key + ON core.variant (canonical_name, COALESCE(defining_haplogroup_id, -1)) + WHERE canonical_name IS NOT NULL; + +-- Lookups of recurrences for a given branch (backfill + tree reads). +CREATE INDEX variant_defining_haplogroup_idx + ON core.variant (defining_haplogroup_id) + WHERE defining_haplogroup_id IS NOT NULL; diff --git a/rust/migrations/0024_tree_revision.sql b/rust/migrations/0024_tree_revision.sql new file mode 100644 index 00000000..71618c8e --- /dev/null +++ b/rust/migrations/0024_tree_revision.sql @@ -0,0 +1,23 @@ +-- Tree revision marker — a persisted, monotonic counter the API serves as the +-- cache-revalidation token (ETag) for the haplogroup-tree endpoints. The Edge +-- (Navigator) caches the full tree JSON and needs a cheap way to tell whether the +-- AppView has a newer tree without re-downloading ~28 MB on a blind timer. +-- +-- It is bumped explicitly (once) by every operation that changes a served tree +-- payload — change-set apply, coordinate enrichment (the hs1-backfill that caused +-- the staleness incident), the YBrowse reconcile, and the tree-init build — rather +-- than by a per-row trigger, to keep the hot per-variant write path free. +-- +-- A single global row covers both Y and mt: variants (core.variant) aren't typed +-- Y/mt, so a coordinate enrichment can't be cheaply attributed to one tree. The +-- per-endpoint ETag folds in the dna type + root, so a global bump revalidates +-- both trees (a harmless over-invalidation, never a false 304). + +CREATE TABLE tree.tree_revision ( + id SMALLINT PRIMARY KEY DEFAULT 1 CHECK (id = 1), + revision BIGINT NOT NULL DEFAULT 1, + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Exactly one row; the CHECK + PK pin it as a singleton. +INSERT INTO tree.tree_revision (id) VALUES (1); diff --git a/rust/migrations/0025_sequencer_instrument_lab.sql b/rust/migrations/0025_sequencer_instrument_lab.sql new file mode 100644 index 00000000..198d02c0 --- /dev/null +++ b/rust/migrations/0025_sequencer_instrument_lab.sql @@ -0,0 +1,20 @@ +-- Preseeded instrument → lab association. +-- +-- The genomics redesign (mig 0004) dropped the legacy `sequencer_instrument.lab_id` +-- FK on the theory that instrument↔lab would resolve through the consensus path +-- (instrument_observation → instrument_association_proposal → accept). That +-- consensus/curation machinery is NOT live yet, so the only associations we have +-- are the **preseeded** curator ties carried over from the legacy catalog — and +-- they had nowhere to live in the new schema. +-- +-- Re-add a nullable direct `lab_id`: the lookup API (GET /api/v1/sequencer/lab) +-- reads it directly, and the ETL backfills it from the legacy tie. When the +-- proposal/consensus path does go live, accepting a proposal sets this column; +-- the proposal tables (instrument_observation, instrument_association_proposal) +-- stay dormant until then. + +ALTER TABLE genomics.sequencer_instrument + ADD COLUMN lab_id BIGINT REFERENCES genomics.sequencing_lab(id); + +CREATE INDEX sequencer_instrument_lab_idx + ON genomics.sequencer_instrument (lab_id) WHERE lab_id IS NOT NULL; diff --git a/rust/migrations/0026_instrument_proposal_indexes.sql b/rust/migrations/0026_instrument_proposal_indexes.sql new file mode 100644 index 00000000..601834d8 --- /dev/null +++ b/rust/migrations/0026_instrument_proposal_indexes.sql @@ -0,0 +1,15 @@ +-- D8 consensus hardening: stable proposal identity + aggregation/queue indexes. +-- The hourly recompute now UPSERTs the single active proposal per unresolved +-- instrument (instead of DELETE + re-INSERT), so a curator's open proposal id +-- stays stable across background runs. This partial unique index is the upsert +-- conflict arbiter (one active proposal per instrument). +CREATE UNIQUE INDEX instrument_association_proposal_active_key + ON genomics.instrument_association_proposal (instrument_id) + WHERE status IN ('PENDING', 'READY_FOR_REVIEW'); + +-- recompute aggregates observations by instrument; the curator queue filters and +-- sorts by status. Both are seq-scans without these as the federation grows. +CREATE INDEX instrument_observation_instrument_idx + ON genomics.instrument_observation (instrument_id); +CREATE INDEX instrument_association_proposal_status_idx + ON genomics.instrument_association_proposal (status); diff --git a/rust/migrations/0027_instrument_observation_lexicon.sql b/rust/migrations/0027_instrument_observation_lexicon.sql new file mode 100644 index 00000000..11f0df6b --- /dev/null +++ b/rust/migrations/0027_instrument_observation_lexicon.sql @@ -0,0 +1,32 @@ +-- D8: the instrumentObservation lexicon end-to-end. Citizens publish explicit +-- instrument→lab observations (com.decodingus.atmosphere.instrumentObservation) +-- carrying a real confidence level (KNOWN/INFERRED/GUESSED) and an observation +-- timestamp. The Jetstream consumer mirrors them here; recompute_consensus folds +-- them in alongside the implicit fed.sequencerun.center_name claims. + +CREATE TABLE fed.instrument_observation ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + instrument_id TEXT, -- @RG instrument id (e.g. 'A00123') + lab_name TEXT, -- the citizen's claimed lab + biosample_ref TEXT, -- at-uri of the biosample it came from + platform TEXT, -- ILLUMINA/PACBIO/ONT/... + instrument_model TEXT, + flowcell_id TEXT, + run_date DATE, + confidence TEXT, -- KNOWN/INFERRED/GUESSED + observed_at TIMESTAMPTZ, -- when the citizen recorded it (recency) + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); +CREATE INDEX fed_instrument_observation_instrument_idx + ON fed.instrument_observation (instrument_id); + +-- The consensus working table gains an observation timestamp so the confidence +-- score's recency term is real (it was a constant). Implicit sequencerun-derived +-- observations leave it NULL (treated as neutral recency). +ALTER TABLE genomics.instrument_observation ADD COLUMN observed_at TIMESTAMPTZ; diff --git a/rust/migrations/0028_private_variant_mirror.sql b/rust/migrations/0028_private_variant_mirror.sql new file mode 100644 index 00000000..2bec6a10 --- /dev/null +++ b/rust/migrations/0028_private_variant_mirror.sql @@ -0,0 +1,26 @@ +-- D6: the privateVariant lexicon mirror. Citizens publish their per-sample private +-- variant set (mutations beyond the assigned terminal haplogroup) as a +-- com.decodingus.atmosphere.privateVariant record; the Jetstream consumer mirrors +-- it here. The discovery consensus engine (du_db::discovery) materializes these +-- into tree.biosample_private_variant and pools them into proposed branches. +-- +-- Privacy posture matches the existing strProfile/biosample summary records: +-- citizen-opt-in, keyed by biosample ref (no donor PII), variants anonymized to +-- coordinates/names. + +CREATE TABLE fed.private_variant ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + biosample_ref TEXT, -- at-uri of the parent biosample (join key) + sequence_run_ref TEXT, -- optional, for precision + dna_type TEXT, -- Y_DNA / MT_DNA + terminal_haplogroup TEXT, -- the assigned terminal (name string) + variants JSONB NOT NULL DEFAULT '[]'::jsonb, -- [{name?,contig,position,ancestral,derived,rsId?}] + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); +CREATE INDEX fed_private_variant_biosample_idx ON fed.private_variant (biosample_ref); diff --git a/rust/migrations/0029_discovery_consensus.sql b/rust/migrations/0029_discovery_consensus.sql new file mode 100644 index 00000000..0201062d --- /dev/null +++ b/rust/migrations/0029_discovery_consensus.sql @@ -0,0 +1,49 @@ +-- D6: discovery consensus engine support. Adds the constraints the engine's +-- declarative recompute relies on (idempotent upsert arbiters + a stable cluster +-- key for proposal identity), and seeds tree.discovery_config (previously unused). + +-- 1. Natural identity for a materialized private variant: one sample asserting one +-- variant under one DNA arm (the materialize ON CONFLICT arbiter). De-dup any +-- pre-existing fixture rows first so the unique index can be built. +DELETE FROM tree.biosample_private_variant a + USING tree.biosample_private_variant b + WHERE a.id > b.id AND a.sample_guid = b.sample_guid + AND a.variant_id = b.variant_id AND a.haplogroup_type = b.haplogroup_type; +ALTER TABLE tree.biosample_private_variant + ADD CONSTRAINT bpv_sample_variant_type_key UNIQUE (sample_guid, variant_id, haplogroup_type); + +-- 2. One row per (proposal, variant) so the engine can rebuild a proposal's +-- defining-variant set with DELETE-then-insert safely. +ALTER TABLE tree.proposed_branch_variant + ADD CONSTRAINT pbv_branch_variant_key UNIQUE (proposed_branch_id, variant_id); + +-- 3. Stable cluster identity for the engine's proposal UPSERT. cluster_key = the +-- sorted defining variant-id set (as text). haplogroup_type scopes the partial +-- unique index. submit()-created proposals leave cluster_key NULL and are +-- excluded, so the two intake paths coexist. +ALTER TABLE tree.proposed_branch ADD COLUMN cluster_key TEXT; +ALTER TABLE tree.proposed_branch ADD COLUMN haplogroup_type core.dna_type; +UPDATE tree.proposed_branch pb + SET haplogroup_type = h.haplogroup_type + FROM tree.haplogroup h WHERE h.id = pb.parent_haplogroup_id; +CREATE UNIQUE INDEX proposed_branch_open_cluster_key + ON tree.proposed_branch (parent_haplogroup_id, haplogroup_type, cluster_key) + WHERE status IN ('PROPOSED','UNDER_REVIEW','READY_FOR_REVIEW','SPLIT_CANDIDATE') + AND cluster_key IS NOT NULL; +CREATE INDEX proposed_branch_status_idx ON tree.proposed_branch (status); + +-- 4. Seed discovery thresholds + engine flags (read by du_db::discovery::load_config). +INSERT INTO tree.discovery_config (config_key, config_value, description) VALUES + ('thresholds_Y_DNA', + '{"consensus_threshold":3,"auto_promote_threshold":10,"confidence_threshold":0.95,"similarity_match_threshold":0.80,"similarity_split_threshold":0.50}', + 'Y-DNA discovery consensus thresholds'), + ('thresholds_MT_DNA', + '{"consensus_threshold":3,"auto_promote_threshold":10,"confidence_threshold":0.95,"similarity_match_threshold":0.80,"similarity_split_threshold":0.50}', + 'mtDNA discovery consensus thresholds'), + ('confidence_weights', + '{"w_count":0.4,"w_submitters":0.3,"w_consistency":0.3}', + 'discovery confidence blend weights'), + ('engine', + '{"auto_promote":false}', + 'discovery engine flags (auto_promote off = curator-gated)') +ON CONFLICT (config_key) DO NOTHING; diff --git a/rust/migrations/0030_test_type_coverage_norm.sql b/rust/migrations/0030_test_type_coverage_norm.sql new file mode 100644 index 00000000..599ca049 --- /dev/null +++ b/rust/migrations/0030_test_type_coverage_norm.sql @@ -0,0 +1,22 @@ +-- D7: empirical per-test-type coverage norms. Derived (not curated) from the +-- federated coverage cohort (fed.coverage_summary ⋈ fed.sequencerun.test_type) + +-- fed.genotype marker counts, by du_db::coverage::recompute_norms. Lets a sample's +-- actual coverage be compared against what samples of its test type typically +-- achieve ("you ordered 30x, the cohort median is 29x, you got 28x"), and tracks +-- vendors against the norm. Typical marker counts are captured for the (deferred) +-- age-contribution weighting. + +CREATE TABLE genomics.test_type_coverage_norm ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + test_type TEXT NOT NULL UNIQUE, -- federated/catalog test-type code + sample_count INTEGER NOT NULL DEFAULT 0, + median_mean_depth DOUBLE PRECISION, + p25_mean_depth DOUBLE PRECISION, + p75_mean_depth DOUBLE PRECISION, + median_pct_10x DOUBLE PRECISION, + median_pct_20x DOUBLE PRECISION, + median_pct_30x DOUBLE PRECISION, + typical_y_markers INTEGER, -- median fed.genotype.y_markers_called (for age, deferred) + typical_mt_markers INTEGER, + computed_at TIMESTAMPTZ NOT NULL DEFAULT now() +); diff --git a/rust/migrations/0031_discovery_reliability.sql b/rust/migrations/0031_discovery_reliability.sql new file mode 100644 index 00000000..d242c276 --- /dev/null +++ b/rust/migrations/0031_discovery_reliability.sql @@ -0,0 +1,15 @@ +-- D6 reliability: gate + weight the discovery consensus by the contributor's +-- cross-technology consensus reliability (fed.haplogroup_reconciliation). Extend +-- the existing tree.discovery_config seed (0029) in place — `jsonb ||` merge so the +-- prior keys are preserved. + +-- Exclusion floor: a contributor whose consensus confidence is below this (or whose +-- reconciliation is INCOMPATIBLE) can't drive a branch proposal. +UPDATE tree.discovery_config + SET config_value = config_value || '{"min_consensus_confidence": 0.5}'::jsonb + WHERE config_key = 'engine'; + +-- Confidence weights renormalized to include the reliability term (sum = 1.0). +UPDATE tree.discovery_config + SET config_value = '{"w_count":0.35,"w_submitters":0.2,"w_consistency":0.25,"w_reliability":0.2}'::jsonb + WHERE config_key = 'confidence_weights'; diff --git a/rust/migrations/0032_exchange.sql b/rust/migrations/0032_exchange.sql new file mode 100644 index 00000000..0f2b23c1 --- /dev/null +++ b/rust/migrations/0032_exchange.sql @@ -0,0 +1,73 @@ +-- D1: the encrypted Edge-to-Edge exchange substrate, AppView **broker** side. The +-- AppView never sees plaintext or session keys — it brokers consent (verifying +-- Ed25519 DID signatures), mirrors the published X25519 keys, and blind-relays +-- ciphertext. Generalizes the IBD-specific match_request/match_consent (mig 0007, +-- unused) into a purpose-tagged exchange.* schema shared by IBD + genealogy PII. + +CREATE SCHEMA IF NOT EXISTS exchange; + +-- A signed request to exchange with a partner (PII-free: dids + purpose + scope). +CREATE TABLE exchange.exchange_request ( + request_uri TEXT PRIMARY KEY, -- at:// uri of the signed record + initiator_did TEXT NOT NULL, + partner_did TEXT NOT NULL, + purpose TEXT NOT NULL, -- IBD_AUTOSOMAL/IBD_Y/IBD_MT/GENEALOGY_PII/... + scope TEXT, -- consent boundary, e.g. 'project:' + status TEXT NOT NULL DEFAULT 'PENDING', -- PENDING/CONSENTED/DECLINED/CANCELLED/EXPIRED + details JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +CREATE INDEX exchange_request_partner_idx ON exchange.exchange_request (partner_did, status); +CREATE INDEX exchange_request_initiator_idx ON exchange.exchange_request (initiator_did, status); + +-- One signed consent per (request, did); the broker verifies BOTH before a session. +CREATE TABLE exchange.exchange_consent ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + request_uri TEXT NOT NULL REFERENCES exchange.exchange_request(request_uri) ON DELETE CASCADE, + consenting_did TEXT NOT NULL, + consent_given BOOLEAN NOT NULL, + consent_uri TEXT, + signature TEXT NOT NULL, -- Ed25519, verified by the broker + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + UNIQUE (request_uri, consenting_did) +); + +-- A consented session; the Edges run ECDH + exchange under it (broker opaque). +CREATE TABLE exchange.exchange_session ( + session_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + request_uri TEXT NOT NULL REFERENCES exchange.exchange_request(request_uri) ON DELETE CASCADE, + status TEXT NOT NULL DEFAULT 'ESTABLISHING', -- ESTABLISHING/ACTIVE/COMPLETE/EXPIRED + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + expires_at TIMESTAMPTZ +); +CREATE INDEX exchange_session_request_idx ON exchange.exchange_session (request_uri); + +-- The blind store-and-forward buffer — ciphertext only; deleted on ack or TTL. +CREATE TABLE exchange.relay_envelope ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + session_id UUID NOT NULL REFERENCES exchange.exchange_session(session_id) ON DELETE CASCADE, + from_did TEXT NOT NULL, + to_did TEXT NOT NULL, + seq INTEGER NOT NULL, + size_bytes INTEGER NOT NULL, + blob BYTEA NOT NULL, -- opaque AES-GCM ciphertext envelope + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + expires_at TIMESTAMPTZ, + delivered_at TIMESTAMPTZ +); +CREATE INDEX relay_envelope_pull_idx ON exchange.relay_envelope (session_id, to_did, delivered_at); + +-- Mirror of each DID's published, Ed25519-signed static X25519 exchange key. +CREATE TABLE exchange.exchange_publickey ( + did TEXT PRIMARY KEY, + x25519_pub BYTEA NOT NULL, + key_uri TEXT, + sig_verified_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Fold: the IBD-specific request/consent generalize into exchange.* (D1 §8). They +-- carry no data and no code referenced them (the candidate engine uses +-- ibd.match_suggestion / ibd_discovery_index, which remain). +DROP TABLE IF EXISTS ibd.match_consent; +DROP TABLE IF EXISTS ibd.match_request; diff --git a/rust/migrations/0033_research.sql b/rust/migrations/0033_research.sql new file mode 100644 index 00000000..8727f34f --- /dev/null +++ b/rust/migrations/0033_research.sql @@ -0,0 +1,44 @@ +-- D2: the PII-free ResearchSubject registry. A vendor-neutral pseudonymous "person" +-- node co-admins attach project memberships + merge-links to. Identity resolution is +-- Edge-to-Edge over D1 (id-list exchange) / genetic (D3) — the AppView learns no name, +-- kit number, or hash of one. INVARIANT: every column here is pseudonymous (UUID/DID). + +CREATE SCHEMA IF NOT EXISTS research; + +-- The pseudonymous person node — almost empty by design. +CREATE TABLE research.research_subject ( + research_subject_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + custody_did TEXT, -- null = admin-stewarded; set on member claim + retired_into UUID REFERENCES research.research_subject(research_subject_id), -- tombstone → kept id + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Project membership: which subject is in which group-project, and who stewards the +-- (local, clear-text) identity for it. +CREATE TABLE research.subject_membership ( + research_subject_id UUID NOT NULL REFERENCES research.research_subject(research_subject_id) ON DELETE CASCADE, + project_id BIGINT NOT NULL REFERENCES social.group_project(id) ON DELETE CASCADE, + steward_did TEXT NOT NULL, -- the admin holding the local clear-text identity + added_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (research_subject_id, project_id) +); +CREATE INDEX subject_membership_project_idx ON research.subject_membership (project_id); +CREATE INDEX subject_membership_steward_idx ON research.subject_membership (steward_did); + +-- Audit of merges (4.1 id-exchange / 4.2 genetic / 4.3 assertion / claim) — pseudonymous. +CREATE TABLE research.subject_link ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + subject_a UUID NOT NULL, -- kept + subject_b UUID NOT NULL, -- retired into a + method TEXT NOT NULL, -- ID_EXCHANGE/GENETIC/ASSERTION/CLAIM + asserted_by_did TEXT NOT NULL, + confidence DOUBLE PRECISION, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Optional, sparse: link to a federated sample IF the person published anonymized data. +CREATE TABLE research.subject_biosample ( + research_subject_id UUID REFERENCES research.research_subject(research_subject_id) ON DELETE CASCADE, + sample_guid UUID REFERENCES core.biosample(sample_guid), + PRIMARY KEY (research_subject_id, sample_guid) +); diff --git a/rust/migrations/0034_project_member.sql b/rust/migrations/0034_project_member.sql new file mode 100644 index 00000000..8d8962d4 --- /dev/null +++ b/rust/migrations/0034_project_member.sql @@ -0,0 +1,18 @@ +-- D5: the group-project collaborator-team ACL. A *project* (social.group_project, +-- mig 0009 — reused, not duplicated) is the consent/scope boundary the whole stack +-- references (scope=project:); project_member is the trust circle that gates it: +-- D1 only relays GENEALOGY_PII between team members, D2 registry ops are role-gated, +-- D4 (later) serves R2 to the team. PII-free: DIDs + roles only. The project's +-- owner_did is the founding ADMIN (no row needed for it). + +CREATE TABLE research.project_member ( + project_id BIGINT NOT NULL REFERENCES social.group_project(id) ON DELETE CASCADE, + member_did TEXT NOT NULL, + role TEXT NOT NULL, -- ADMIN/CO_ADMIN/MODERATOR/CURATOR + permissions TEXT[] NOT NULL DEFAULT '{}', -- granular overrides (forward; role-gated for v1) + appointed_by TEXT, + joined_at TIMESTAMPTZ NOT NULL DEFAULT now(), + left_at TIMESTAMPTZ, -- revocation; NULL = live + PRIMARY KEY (project_id, member_did) +); +CREATE INDEX project_member_did_idx ON research.project_member (member_did) WHERE left_at IS NULL; diff --git a/rust/migrations/0035_assertion.sql b/rust/migrations/0035_assertion.sql new file mode 100644 index 00000000..cf4a425c --- /dev/null +++ b/rust/migrations/0035_assertion.sql @@ -0,0 +1,38 @@ +-- D4: the attributed-claim assertion store (R2 — non-PII, project-scoped). Co-admin +-- research is modeled as append-only, attributed, scoped assertions over a pseudonymous +-- research_subject (D2), NOT direct row mutation. The predicate's PII-class picks the +-- rail: only non-PII predicates land here (R2/R1); PII (MDKA_IS/IDENTITY/PII NOTE) is +-- R3 — D1 P2P only, folded locally in Navigator, and by construction has NO table here. +-- INVARIANT: every column is pseudonymous (UUID/DID) or a non-PII classification/JSONB. + +-- Append-only claims. An edit is a new row with supersedes_id = old.id; a retraction +-- sets retracted_at. Nothing is overwritten — conflict-with-provenance (two admins +-- disagree → two live rows, both attributed). +CREATE TABLE research.assertion ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + subject_id UUID NOT NULL REFERENCES research.research_subject(research_subject_id) ON DELETE CASCADE, + predicate TEXT NOT NULL, -- SAME_PERSON_AS | BELONGS_TO_BRANCH | HAPLOGROUP_IS | NOTE(non-PII) + value JSONB NOT NULL, -- predicate-specific; scrubbed for obvious PII + author_did TEXT NOT NULL, -- attribution + scope TEXT NOT NULL, -- PUBLIC | PROJECT: (the consent/visibility boundary) + evidence JSONB, -- optional (STR distance, SNP, IBD ref, doc citation) + record_uri TEXT, -- at:// of the PDS record (R1) if any; NULL for R2 + supersedes_id BIGINT REFERENCES research.assertion(id), -- append-only edit chain + retracted_at TIMESTAMPTZ, -- drops out of current_view; kept for audit + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); +-- The live-claim lookup the fold reads (per subject/predicate/scope, non-retracted). +CREATE INDEX assertion_live_idx ON research.assertion (subject_id, predicate, scope) WHERE retracted_at IS NULL; + +-- Materialized fold — per-(subject, predicate, scope), so a subject in two projects +-- never bleeds claims across them (per-project isolation). A project view folds its own +-- PROJECT: claims together with PUBLIC ones; the recompute runs on every write. +CREATE TABLE research.subject_current_view ( + subject_id UUID NOT NULL, + predicate TEXT NOT NULL, + scope TEXT NOT NULL, -- the viewing scope: PUBLIC | PROJECT: + state TEXT NOT NULL, -- SETTLED | DISPUTED + view JSONB NOT NULL, -- live claims [{assertion_id, value, author_did, evidence, created_at}] + refolded_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (subject_id, predicate, scope) +); diff --git a/rust/migrations/0036_device_key.sql b/rust/migrations/0036_device_key.sql new file mode 100644 index 00000000..bd96b289 --- /dev/null +++ b/rust/migrations/0036_device_key.sql @@ -0,0 +1,23 @@ +-- Device-key registry (the Edge auth foundation). A desktop client (Navigator) cannot +-- sign with the PDS-custodied #atproto repo key, and cannot add its own verificationMethod +-- to a did:plc doc (that needs a rotation-key PLC op the PDS holds). So each client +-- publishes its own Ed25519 DEVICE public key as a record in the user's own repo — writing +-- to your own repo IS the proof of control over repo_did — and the AppView ingests it here +-- (like every other fed.* record). crate::sig::verify_signed then verifies signed Edge calls +-- against the registered key. PII-free: a DID + a PUBLIC key + record pointers only. +-- +-- (did, rkey) PK ⇒ a DID may register N device keys (N devices); revocation = the user +-- deletes the record, which routes through fed::delete → table_for. + +CREATE TABLE fed.device_key ( + did TEXT NOT NULL, + rkey TEXT NOT NULL, + at_uri TEXT NOT NULL, + cid TEXT, + public_key TEXT NOT NULL, -- the device Ed25519 pubkey as a did:key:z… string + record_created_at TIMESTAMPTZ, + time_us BIGINT NOT NULL, + indexed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (did, rkey) +); +CREATE INDEX device_key_did_idx ON fed.device_key (did); diff --git a/rust/migrations/0037_haplogroup_sample.sql b/rust/migrations/0037_haplogroup_sample.sql new file mode 100644 index 00000000..249254ad --- /dev/null +++ b/rust/migrations/0037_haplogroup_sample.sql @@ -0,0 +1,22 @@ +-- YFull-style sample placement: attach non-D2C biosamples as leaves under the tree node +-- their published haplogroup call resolves to. The call lives in +-- core.biosample.original_haplogroups (a paper's stated Y/mt haplogroup); a recompute +-- resolves it to a tree.haplogroup node (via du_db::haplogroup::resolve_name_or_variant). +-- D2C (source='CITIZEN') samples are excluded. An unresolvable call is kept UNPLACED +-- (haplogroup_id NULL + the raw text) so a curator can triage it — no silent loss. +-- +-- dna_type-partitioned (PK includes it) so a sample gets one Y placement now + one mt +-- placement when the mt tree lands. Placements feed the cached tree's per-node sample +-- count, so a recompute bumps tree.tree_revision (the ETag marker). + +CREATE TABLE tree.haplogroup_sample ( + sample_guid UUID NOT NULL REFERENCES core.biosample(sample_guid) ON DELETE CASCADE, + dna_type core.dna_type NOT NULL, + haplogroup_id BIGINT REFERENCES tree.haplogroup(id), -- NULL when UNPLACED + call_text TEXT NOT NULL, -- the raw published call resolved from + status TEXT NOT NULL, -- PLACED | UNPLACED + refreshed_at TIMESTAMPTZ NOT NULL DEFAULT now(), + PRIMARY KEY (sample_guid, dna_type) +); +CREATE INDEX haplogroup_sample_node_idx ON tree.haplogroup_sample (haplogroup_id) WHERE status = 'PLACED'; +CREATE INDEX haplogroup_sample_status_idx ON tree.haplogroup_sample (status); diff --git a/rust/migrations/0038_seed_sequencer_labs.sql b/rust/migrations/0038_seed_sequencer_labs.sql new file mode 100644 index 00000000..6c9a7c7f --- /dev/null +++ b/rust/migrations/0038_seed_sequencer_labs.sql @@ -0,0 +1,63 @@ +-- Seed the sequencer lab + instrument lookup from the old YDNA Warehouse d2c export +-- (instrument_centers.tsv). Rule: keep instruments with n_crams > 2; when an instrument +-- maps to several labs, assign the max-frequency one (a NO_CSV "no lab" placeholder never +-- wins a qualifying row). Lab short codes → canonical full names; all is_d2c. model_name = +-- the export's platform string; manufacturer derived (Illumina / MGI / unknown→NULL). +-- The lookup (du_db::sequencer::lookup_lab → /api/v1/sequencer/lab) reads this tie directly. +-- Idempotent: labs ON CONFLICT DO NOTHING (preserve any consensus-created row), instruments +-- ON CONFLICT DO UPDATE (corrective re-run). + +INSERT INTO genomics.sequencing_lab (name, is_d2c) VALUES + ('Family Tree DNA', true), + ('Dante Labs', true), + ('Nebula Genomics', true), + ('Full Genomes Corporation', true), + ('YSEQ', true) +ON CONFLICT (name) DO NOTHING; + +INSERT INTO genomics.sequencer_instrument (instrument_id, model_name, manufacturer, lab_id) VALUES + -- Family Tree DNA (6) + ('A00186', 'NovaSeq6000', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Family Tree DNA')), + ('SN7001368', 'HiSeq2000/2500', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Family Tree DNA')), + ('SN7001371', 'HiSeq2000/2500', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Family Tree DNA')), + ('HWI-ST1368', 'HiSeq2000', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Family Tree DNA')), + ('USSD-TL1-1227', 'unknown', NULL, (SELECT id FROM genomics.sequencing_lab WHERE name = 'Family Tree DNA')), + ('8QRF6V1', 'unknown', NULL, (SELECT id FROM genomics.sequencing_lab WHERE name = 'Family Tree DNA')), + -- Dante Labs (6) + ('A00925', 'NovaSeq6000', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Dante Labs')), + ('A00910', 'NovaSeq6000', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Dante Labs')), + ('A00966', 'NovaSeq6000', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Dante Labs')), + ('A01245', 'NovaSeq6000', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Dante Labs')), + ('A00197', 'NovaSeq6000', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Dante Labs')), + ('V100002649', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Dante Labs')), + -- YSEQ (11) + ('YSEQ1', 'MGI in-house (YSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + ('A00182', 'NovaSeq6000', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + ('A00788', 'NovaSeq6000', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + ('FP200006039', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + ('V350158671', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + ('V300063980', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + ('V350181030', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + ('V350218029', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + ('V350218277', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + ('V350202941', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + ('V350180275', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'YSEQ')), + -- Full Genomes Corporation (11) + ('ST-E00317', 'HiSeqX', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + ('ST-E00192', 'HiSeqX', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + ('E00500', 'HiSeqX', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + ('ST-E00126', 'HiSeqX', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + ('E00548', 'HiSeqX', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + ('E00576', 'HiSeqX', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + ('ST-E00494', 'HiSeqX', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + ('ST-E00142', 'HiSeqX', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + ('D00539', 'HiSeq2500', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + ('FCC3KJLACXX', 'HiSeq2000/2500(flowcell-id)', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + ('A00818', 'NovaSeq6000', 'Illumina', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Full Genomes Corporation')), + -- Nebula Genomics (2) + ('E100006791', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Nebula Genomics')), + ('FP200007833', 'MGI/BGI(DNBSEQ)', 'MGI', (SELECT id FROM genomics.sequencing_lab WHERE name = 'Nebula Genomics')) +ON CONFLICT (instrument_id) DO UPDATE SET + model_name = EXCLUDED.model_name, + manufacturer = EXCLUDED.manufacturer, + lab_id = EXCLUDED.lab_id; diff --git a/rust/migrations/0039_denovo_conflict.sql b/rust/migrations/0039_denovo_conflict.sql new file mode 100644 index 00000000..449ecbee --- /dev/null +++ b/rust/migrations/0039_denovo_conflict.sql @@ -0,0 +1,22 @@ +-- De-novo tree vs reference (ISOGG / PhyloTree) placement conflicts, surfaced for +-- curator triage. Each row = a reference clade whose de-novo placement disagrees: +-- `foreign_in` tips sit inside the clade's de-novo home node that don't belong, +-- and/or `members_away` of the clade's members landed elsewhere; `magnitude` ranks +-- the discrepancy. Populated by the de-novo loader (du_db::denovo::load) and +-- replaced per dna_type on each reload (cleared by haplogroup::clear_dna). +CREATE TABLE tree.denovo_conflict ( + id BIGSERIAL PRIMARY KEY, + dna_type core.dna_type NOT NULL, + haplogroup TEXT NOT NULL, -- the reference clade (ISOGG / mt haplogroup) + label TEXT, -- display label + n_tips INTEGER NOT NULL, + magnitude INTEGER NOT NULL, + home_node TEXT, -- the de-novo NodeN the clade maps to + foreign_in INTEGER NOT NULL, -- foreign tips inside the home node + members_away INTEGER NOT NULL, -- clade members placed outside the home node + source TEXT NOT NULL DEFAULT 'decodingus-denovo', + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE INDEX denovo_conflict_triage_idx + ON tree.denovo_conflict (dna_type, magnitude DESC, n_tips DESC); diff --git a/rust/migrations/0040_ibd_attestation_did.sql b/rust/migrations/0040_ibd_attestation_did.sql new file mode 100644 index 00000000..f07e2d39 --- /dev/null +++ b/rust/migrations/0040_ibd_attestation_did.sql @@ -0,0 +1,26 @@ +-- IBD attestation ingest: let two consented Edges report the *outcome* of their +-- encrypted comparison back to the AppView. The match graph (ibd.ibd_discovery_index) +-- + attestations (ibd.ibd_pds_attestation) already exist (mig 0007); this adapts the +-- attestation row to the DID-based device-key auth (the AT Protocol pivot — attestations +-- are signed by an Edge's DID, not a PDS guid) and makes ingest idempotent. + +-- The DID that signed the attestation (the PDS-guid identity predates the device-key +-- pivot; DID is the live identity). Legacy guid kept but no longer required. +ALTER TABLE ibd.ibd_pds_attestation ADD COLUMN attesting_did TEXT; +ALTER TABLE ibd.ibd_pds_attestation ALTER COLUMN attesting_pds_guid DROP NOT NULL; + +-- Provenance: the consented exchange this attestation came out of (the privacy rail — +-- every graph edge traces to a real dual-consent). NULL only for legacy rows. +ALTER TABLE ibd.ibd_pds_attestation ADD COLUMN exchange_request_uri TEXT; + +-- Each party's own reported figures, kept per-attestation so consensus can check that +-- the two sides *agree* (a match is CONFIRMED only when both report compatible totals). +-- The agreed value is then summarized onto ibd_discovery_index.total_shared_cm_approx. +ALTER TABLE ibd.ibd_pds_attestation ADD COLUMN reported_total_cm DOUBLE PRECISION; +ALTER TABLE ibd.ibd_pds_attestation ADD COLUMN reported_segments INTEGER; + +-- Idempotent ingest: one attestation per (match, attester, type). A re-submit corrects +-- the prior figures (cM / segment count / notes) rather than duplicating the edge. +CREATE UNIQUE INDEX ibd_attestation_did_key + ON ibd.ibd_pds_attestation (ibd_discovery_index_id, attesting_did, attestation_type) + WHERE attesting_did IS NOT NULL; diff --git a/rust/scripts/mock-legacy.sql b/rust/scripts/mock-legacy.sql new file mode 100644 index 00000000..dd4074ad --- /dev/null +++ b/rust/scripts/mock-legacy.sql @@ -0,0 +1,611 @@ +-- Mock of the legacy DecodingUs schema (the subset the ETL reads), matching the +-- CURRENT production schema (/Users/jkane/db.schema) so du-migrate's transformers +-- are exercised with data. Lets the ETL be verified without prod access. +CREATE EXTENSION IF NOT EXISTS postgis; +CREATE EXTENSION IF NOT EXISTS citext; +CREATE EXTENSION IF NOT EXISTS pgcrypto; +CREATE SCHEMA IF NOT EXISTS tree; +CREATE SCHEMA IF NOT EXISTS auth; +CREATE SCHEMA IF NOT EXISTS curator; + +CREATE TYPE public.biological_sex AS ENUM ('male','female','intersex'); +CREATE TYPE public.biosample_type AS ENUM ('Standard','PGP','Citizen','Ancient'); + +CREATE TABLE public.specimen_donor ( + id SERIAL PRIMARY KEY, + donor_identifier varchar(255) NOT NULL, + origin_biobank varchar(255) NOT NULL, + sex public.biological_sex, + geocoord geometry(Point,4326), + date_range_start integer, + date_range_end integer, + donor_type public.biosample_type DEFAULT 'Standard' NOT NULL, + pgp_participant_id varchar(50), + at_uri varchar(255) +); + +CREATE TABLE public.genbank_contig ( + genbank_contig_id SERIAL PRIMARY KEY, + accession varchar(255) NOT NULL, + common_name varchar(255), + reference_genome varchar(255), + seq_length integer NOT NULL +); + +CREATE TABLE public.variant ( + variant_id SERIAL PRIMARY KEY, + genbank_contig_id integer NOT NULL REFERENCES public.genbank_contig, + "position" integer NOT NULL, + reference_allele varchar(255) NOT NULL, + alternate_allele varchar(255) NOT NULL, + variant_type varchar(5) NOT NULL, + rs_id varchar(255), + common_name varchar(255) +); +CREATE TABLE public.variant_alias ( + id SERIAL PRIMARY KEY, + variant_id integer NOT NULL REFERENCES public.variant, + alias_type varchar(50) NOT NULL, + alias_value varchar(255) NOT NULL, + source varchar(255) +); + +CREATE TABLE public.biosample ( + id SERIAL PRIMARY KEY, + sample_accession varchar(255) NOT NULL, + description text NOT NULL, + alias varchar(255), + center_name varchar(255) NOT NULL, + specimen_donor_id integer REFERENCES public.specimen_donor, + sample_guid uuid NOT NULL, + locked boolean DEFAULT false NOT NULL, + source_platform varchar(100) +); +CREATE TABLE public.biosample_original_haplogroup ( + id SERIAL PRIMARY KEY, + biosample_id integer, + publication_id integer, + original_y_haplogroup varchar(255), + original_mt_haplogroup varchar(255), + notes text, + y_haplogroup_result jsonb, + mt_haplogroup_result jsonb +); + +CREATE TABLE public.citizen_biosample ( + id SERIAL PRIMARY KEY, + at_uri varchar(255), + source_platform varchar(255), + collection_date date, + sex varchar(15), + geocoord geometry(Point,4326), + description text, + sample_guid uuid NOT NULL, + deleted boolean DEFAULT false NOT NULL, + at_cid varchar(255), + accession varchar(255), + alias varchar(255), + y_haplogroup jsonb, + mt_haplogroup jsonb, + specimen_donor_id integer REFERENCES public.specimen_donor +); +CREATE TABLE public.citizen_biosample_original_haplogroup ( + id SERIAL PRIMARY KEY, + citizen_biosample_id integer, + publication_id integer, + y_haplogroup_result jsonb, + mt_haplogroup_result jsonb, + notes text +); + +CREATE TABLE public.pgp_biosample ( + pgp_biosample_id SERIAL PRIMARY KEY, + pgp_participant_id varchar(255) NOT NULL, + ena_biosample_accession varchar(255), + sex varchar(15), + sample_guid uuid NOT NULL +); + +CREATE TABLE tree.haplogroup ( + haplogroup_id SERIAL PRIMARY KEY, + name varchar(255) NOT NULL, + lineage varchar(255), + description text, + haplogroup_type varchar(10) NOT NULL, + revision_id integer NOT NULL, + source varchar(255) NOT NULL, + confidence_level varchar(255) NOT NULL, + valid_from timestamp NOT NULL DEFAULT now(), + valid_until timestamp, + formed_ybp integer, + formed_ybp_lower integer, + formed_ybp_upper integer, + tmrca_ybp integer, + tmrca_ybp_lower integer, + tmrca_ybp_upper integer, + age_estimate_source varchar(100) +); +CREATE TABLE tree.haplogroup_relationship ( + haplogroup_relationship_id SERIAL PRIMARY KEY, + child_haplogroup_id integer NOT NULL, + parent_haplogroup_id integer NOT NULL, + revision_id integer NOT NULL, + valid_from timestamp NOT NULL DEFAULT now(), + valid_until timestamp, + source varchar(255) NOT NULL +); +CREATE TABLE tree.haplogroup_variant ( + haplogroup_variant_id SERIAL PRIMARY KEY, + haplogroup_id integer NOT NULL, + variant_id integer NOT NULL +); + +CREATE TABLE public.genomic_studies ( + id SERIAL PRIMARY KEY, + accession varchar(50) NOT NULL, + title varchar(255) NOT NULL, + center_name varchar(255) NOT NULL, + study_name varchar(255) NOT NULL, + details text, + source varchar(20) NOT NULL, + submission_date date, + last_update date, + bio_project_id varchar(50), + molecule varchar(50), + topology varchar(50), + taxonomy_id integer, + version varchar(10) +); +CREATE TABLE public.publication ( + id SERIAL PRIMARY KEY, + pubmed_id varchar(20), + doi varchar(255), + title text NOT NULL, + journal varchar(255), + publication_date date, + url varchar(2048), + authors varchar(1000), + abstract_summary text, + open_alex_id varchar(255), + cited_by_count integer, + open_access_status varchar(50) +); +CREATE TABLE public.publication_biosample (publication_id integer NOT NULL, biosample_id integer NOT NULL); +CREATE TABLE public.publication_citizen_biosample (publication_id integer NOT NULL, citizen_biosample_id integer NOT NULL); +CREATE TABLE public.publication_ena_study (publication_id integer NOT NULL, genomic_study_id integer NOT NULL); + +-- ── ident / auth (UUID-keyed; AT Protocol OAuth, no passwords) ─────────────── +CREATE TABLE public.users ( + id uuid DEFAULT gen_random_uuid() NOT NULL PRIMARY KEY, + email public.citext, + did varchar(255) NOT NULL, + handle varchar(255), + display_name varchar(255), + created_at timestamp without time zone DEFAULT now() NOT NULL, + updated_at timestamp without time zone DEFAULT now() NOT NULL, + is_active boolean DEFAULT true NOT NULL +); +CREATE TABLE auth.roles ( + id uuid DEFAULT gen_random_uuid() NOT NULL PRIMARY KEY, + name varchar(255) NOT NULL, + description text, + created_at timestamp without time zone DEFAULT now() NOT NULL, + updated_at timestamp without time zone DEFAULT now() NOT NULL +); +CREATE TABLE auth.permissions ( + id uuid DEFAULT gen_random_uuid() NOT NULL PRIMARY KEY, + name varchar(255) NOT NULL, + description text, + created_at timestamp without time zone DEFAULT now() NOT NULL, + updated_at timestamp without time zone DEFAULT now() NOT NULL +); +CREATE TABLE auth.role_permissions (role_id uuid NOT NULL, permission_id uuid NOT NULL); +CREATE TABLE auth.user_roles (user_id uuid NOT NULL, role_id uuid NOT NULL); +CREATE TABLE auth.user_login_info ( + id uuid DEFAULT gen_random_uuid() NOT NULL PRIMARY KEY, + user_id uuid NOT NULL, + provider_id varchar(255) NOT NULL, + provider_key varchar(255) NOT NULL, + created_at timestamp without time zone DEFAULT now() NOT NULL, + updated_at timestamp without time zone DEFAULT now() NOT NULL +); +CREATE TABLE auth.user_oauth2_info ( + id uuid DEFAULT gen_random_uuid() NOT NULL PRIMARY KEY, + login_info_id uuid NOT NULL, + access_token text NOT NULL, + token_type varchar(50), + expires_in bigint, + refresh_token text, + scope text, + created_at timestamp without time zone DEFAULT now() NOT NULL, + updated_at timestamp without time zone DEFAULT now() NOT NULL +); +CREATE TABLE auth.user_pds_info ( + id uuid DEFAULT gen_random_uuid() NOT NULL PRIMARY KEY, + user_id uuid NOT NULL, + pds_url varchar(512) NOT NULL, + did varchar(255) NOT NULL, + handle varchar(255), + created_at timestamp without time zone DEFAULT now() NOT NULL, + updated_at timestamp without time zone DEFAULT now() NOT NULL +); +CREATE TABLE auth.cookie_consents ( + id uuid DEFAULT gen_random_uuid() NOT NULL PRIMARY KEY, + user_id uuid, + session_id varchar(255), + ip_address_hash varchar(64), + consent_given boolean DEFAULT false NOT NULL, + consent_timestamp timestamp without time zone DEFAULT now() NOT NULL, + policy_version varchar(20) DEFAULT '1.0' NOT NULL, + user_agent text, + created_at timestamp without time zone DEFAULT now() NOT NULL +); +CREATE TABLE auth.atprotocol_authorization_servers ( + id uuid DEFAULT gen_random_uuid() NOT NULL PRIMARY KEY, + issuer_url varchar(255) NOT NULL, + authorization_endpoint varchar(255), + token_endpoint varchar(255), + pushed_authorization_request_endpoint varchar(255), + dpop_signing_alg_values_supported text, + scopes_supported text, + client_id_metadata_document_supported boolean, + metadata_fetched_at timestamp without time zone DEFAULT now() NOT NULL, + created_at timestamp without time zone DEFAULT now() NOT NULL, + updated_at timestamp without time zone DEFAULT now() NOT NULL +); +CREATE TABLE auth.atprotocol_client_metadata ( + id uuid DEFAULT gen_random_uuid() NOT NULL PRIMARY KEY, + client_id_url varchar(255) NOT NULL, + client_name varchar(255), + client_uri varchar(255), + logo_uri varchar(255), + tos_uri varchar(255), + policy_uri varchar(255), + redirect_uris text, + created_at timestamp without time zone DEFAULT now() NOT NULL, + updated_at timestamp without time zone DEFAULT now() NOT NULL +); +CREATE TABLE curator.audit_log ( + id uuid DEFAULT gen_random_uuid() NOT NULL PRIMARY KEY, + user_id uuid NOT NULL, + entity_type varchar(50) NOT NULL, + entity_id integer NOT NULL, + action varchar(20) NOT NULL, + old_value jsonb, + new_value jsonb, + comment text, + created_at timestamp without time zone DEFAULT now() NOT NULL +); + +-- ── genomics (sequencing, coverage, pangenome) ────────────────────────────── +CREATE TYPE public.data_generation_method AS ENUM ('SEQUENCING','GENOTYPING'); +CREATE TYPE public.target_type AS ENUM ('WHOLE_GENOME','Y_CHROMOSOME','MT_DNA','AUTOSOMAL','X_CHROMOSOME','MIXED'); + +CREATE TABLE public.sequencing_lab ( + id SERIAL PRIMARY KEY, + name varchar(255) NOT NULL, + is_d2c boolean DEFAULT false NOT NULL, + website_url varchar(255), + description_markdown text, + created_at timestamp without time zone DEFAULT now() NOT NULL, + updated_at timestamp without time zone +); +CREATE TABLE public.sequencer_instrument ( + id SERIAL PRIMARY KEY, + instrument_id varchar(255) NOT NULL, + lab_id integer NOT NULL, + manufacturer varchar(255), + model varchar(255), + created_at timestamp without time zone DEFAULT now() NOT NULL, + updated_at timestamp without time zone +); +CREATE TABLE public.test_type_definition ( + id SERIAL PRIMARY KEY, + code varchar(50) NOT NULL, + display_name varchar(100) NOT NULL, + category public.data_generation_method NOT NULL, + vendor varchar(100), + target_type public.target_type NOT NULL, + expected_min_depth double precision, + supports_haplogroup_y boolean DEFAULT false NOT NULL, + supports_haplogroup_mt boolean DEFAULT false NOT NULL, + supports_autosomal_ibd boolean DEFAULT false NOT NULL, + supports_ancestry boolean DEFAULT false NOT NULL, + typical_file_formats text[], + description text +); +CREATE TABLE public.pangenome_graph ( + id BIGSERIAL PRIMARY KEY, + graph_name varchar(255) NOT NULL, + source_gfa_file varchar(255), + description text, + creation_date timestamp without time zone DEFAULT now() NOT NULL +); +CREATE TABLE public.pangenome_node ( + id BIGSERIAL PRIMARY KEY, + graph_id bigint NOT NULL, + node_name varchar(255) NOT NULL, + sequence_length bigint +); +CREATE TABLE public.pangenome_path ( + id BIGSERIAL PRIMARY KEY, + graph_id bigint NOT NULL, + path_name varchar(255) NOT NULL, + is_reference boolean DEFAULT false, + length_bp bigint, + description text +); +CREATE TABLE public.canonical_pangenome_variant ( + id BIGSERIAL PRIMARY KEY, + pangenome_graph_id integer NOT NULL, + variant_type varchar(50) NOT NULL, + variant_nodes integer[] NOT NULL, + variant_edges integer[] DEFAULT '{}' NOT NULL, + reference_path_id integer, + reference_start_position integer, + reference_end_position integer, + reference_allele_sequence text, + alternate_allele_sequence text, + canonical_hash varchar(255) NOT NULL, + description text, + creation_date timestamp without time zone DEFAULT now() NOT NULL +); +CREATE TABLE public.sequence_library ( + id SERIAL PRIMARY KEY, + sample_guid uuid NOT NULL, + lab varchar(255) NOT NULL, + run_date timestamp without time zone NOT NULL, + instrument varchar(255) NOT NULL, + reads bigint NOT NULL, + read_length integer NOT NULL, + paired_end boolean NOT NULL, + insert_size integer, + created_at timestamp without time zone NOT NULL, + updated_at timestamp without time zone, + at_uri varchar(255), + at_cid varchar(255), + test_type_id integer NOT NULL +); +CREATE TABLE public.sequence_file ( + id SERIAL PRIMARY KEY, + library_id integer NOT NULL, + file_name varchar(255) NOT NULL, + file_size_bytes bigint NOT NULL, + file_format varchar(255) NOT NULL, + aligner varchar(255) NOT NULL, + target_reference varchar(255) NOT NULL, + created_at timestamp without time zone NOT NULL, + updated_at timestamp without time zone, + pangenome_graph_id integer, + checksums jsonb DEFAULT '[]'::jsonb, + http_locations jsonb DEFAULT '[]'::jsonb, + atp_location jsonb +); +CREATE TABLE public.alignment_metadata ( + id BIGSERIAL PRIMARY KEY, + sequence_file_id bigint NOT NULL, + genbank_contig_id integer NOT NULL, + metric_level varchar(50) NOT NULL, + region_name varchar(255), + region_start_pos bigint, + region_end_pos bigint, + region_length_bp bigint, + metrics_date timestamp without time zone DEFAULT now() NOT NULL, + analysis_tool varchar(255) NOT NULL, + analysis_tool_version varchar(50), + notes text, + metadata jsonb, + reference_build varchar(255), + variant_caller varchar(255), + genome_territory bigint, + mean_coverage double precision, + median_coverage double precision, + sd_coverage double precision, + pct_exc_dupe double precision, + pct_exc_mapq double precision, + pct_10x double precision, + pct_20x double precision, + pct_30x double precision, + het_snp_sensitivity double precision +); +CREATE TABLE public.alignment_coverage ( + alignment_metadata_id bigint NOT NULL, + mean_depth double precision, + median_depth double precision, + percent_coverage_at_1x double precision, + percent_coverage_at_5x double precision, + percent_coverage_at_10x double precision, + percent_coverage_at_20x double precision, + percent_coverage_at_30x double precision, + bases_no_coverage bigint, + bases_low_quality_mapping bigint, + bases_callable bigint, + mean_mapping_quality double precision +); +CREATE TABLE public.pangenome_alignment_metadata ( + id BIGSERIAL PRIMARY KEY, + sequence_file_id bigint NOT NULL, + pangenome_graph_id integer NOT NULL, + metric_level varchar(50) NOT NULL, + pangenome_path_id integer, + pangenome_node_id integer, + region_start_node_id integer, + region_end_node_id integer, + region_name varchar(255), + region_length_bp bigint, + metrics_date timestamp without time zone DEFAULT now() NOT NULL, + analysis_tool varchar(255) NOT NULL, + analysis_tool_version varchar(50), + notes text, + metadata jsonb +); +CREATE TABLE public.pangenome_alignment_coverage ( + alignment_metadata_id bigint NOT NULL, + mean_depth double precision, + median_depth double precision, + percent_coverage_at_1x double precision, + percent_coverage_at_5x double precision, + percent_coverage_at_10x double precision, + percent_coverage_at_20x double precision, + percent_coverage_at_30x double precision, + bases_no_coverage bigint, + bases_low_quality_mapping bigint, + bases_callable bigint, + mean_mapping_quality double precision +); +CREATE TABLE public.reported_variant_pangenome ( + id BIGSERIAL PRIMARY KEY, + sample_guid uuid NOT NULL, + graph_id integer NOT NULL, + variant_type varchar(50) NOT NULL, + reference_path_id integer, + reference_start_position integer, + reference_end_position integer, + variant_nodes integer[] NOT NULL, + variant_edges integer[] DEFAULT '{}' NOT NULL, + alternate_allele_sequence text, + reference_allele_sequence text, + reference_repeat_count integer, + alternate_repeat_count integer, + allele_fraction double precision, + depth integer, + reported_date timestamp without time zone DEFAULT now() NOT NULL, + provenance varchar(255) NOT NULL, + confidence_score double precision NOT NULL, + notes text, + status varchar(255) NOT NULL, + zygosity varchar(10), + haplotype_information jsonb +); +CREATE TABLE public.genotype_data ( + id SERIAL PRIMARY KEY, + at_uri varchar, + at_cid varchar, + sample_guid uuid NOT NULL, + test_type_id integer, + provider varchar, + chip_version varchar, + build_version varchar, + source_file_hash varchar, + metrics jsonb DEFAULT '{}'::jsonb NOT NULL, + population_breakdown_id integer, + deleted boolean DEFAULT false, + created_at timestamp without time zone DEFAULT now(), + updated_at timestamp without time zone DEFAULT now() +); + +-- ── seed ───────────────────────────────────────────────────────────────────── +INSERT INTO public.specimen_donor (donor_identifier, origin_biobank, sex, donor_type, geocoord) VALUES + ('D1','Biobank A','male','Standard', ST_SetSRID(ST_MakePoint(-0.12,51.50),4326)), + ('D2','Biobank B','female','Ancient', ST_SetSRID(ST_MakePoint(35.0,47.0),4326)); + +INSERT INTO public.genbank_contig (accession, common_name, reference_genome, seq_length) VALUES + ('CM000686.2','chrY','GRCh38', 57227415); + +INSERT INTO public.variant (genbank_contig_id, "position", reference_allele, alternate_allele, variant_type, rs_id, common_name) VALUES + (1, 22739367, 'T', 'C', 'SNP', 'rs9786153', 'M269'), + (1, 13668077, 'G', 'A', 'SNP', NULL, NULL); +INSERT INTO public.variant_alias (variant_id, alias_type, alias_value, source) VALUES + (1, 'isogg', 'R-M269', 'ISOGG'), + (1, 'rsid', 'rs9786153', 'dbSNP'); + +INSERT INTO public.biosample (sample_accession, description, alias, center_name, specimen_donor_id, sample_guid, source_platform) VALUES + ('SAMN001','Standard sample','std-1','Center X', 1, '11111111-1111-1111-1111-111111111111', 'Illumina'), + ('SAMN002','Ancient sample','anc-1','Center Y', 2, '22222222-2222-2222-2222-222222222222', NULL); +INSERT INTO public.biosample_original_haplogroup (biosample_id, publication_id, original_y_haplogroup, y_haplogroup_result) VALUES + (1, 1, 'R-M269', '{"call":"R-M269","conf":0.99}'::jsonb); + +INSERT INTO public.citizen_biosample (at_uri, at_cid, source_platform, sample_guid, deleted, accession, alias, y_haplogroup) VALUES + ('at://did:plc:abc123/app.decodingus.biosample/xyz','bafyreigh2akiscaildc','Navigator','33333333-3333-3333-3333-333333333333', false, 'CIT001','cit-1','{"terminal":"R-L21"}'::jsonb); +INSERT INTO public.citizen_biosample_original_haplogroup (citizen_biosample_id, publication_id, y_haplogroup_result) VALUES + (1, 2, '{"call":"R-L21"}'::jsonb); + +INSERT INTO public.pgp_biosample (pgp_participant_id, ena_biosample_accession, sample_guid) VALUES + ('hu1A2B3C','ERS999','44444444-4444-4444-4444-444444444444'); + +INSERT INTO tree.haplogroup (name, lineage, haplogroup_type, revision_id, source, confidence_level, formed_ybp, age_estimate_source) VALUES + ('R','R','Y',1,'ISOGG','high',28200,'SNP'), + ('R1b','R>R1b','Y',1,'ISOGG','high',22800,'SNP'); +INSERT INTO tree.haplogroup_relationship (child_haplogroup_id, parent_haplogroup_id, revision_id, source) VALUES (2,1,1,'ISOGG'); +INSERT INTO tree.haplogroup_variant (haplogroup_id, variant_id) VALUES (2,1); + +INSERT INTO public.genomic_studies (accession, title, center_name, study_name, source, taxonomy_id, version, details) VALUES + ('PRJEB12345','Steppe ancient genomes','Inst','steppe-study','ENA',9606,'1','free text notes'); + +INSERT INTO public.publication (pubmed_id, doi, title, journal, publication_date, cited_by_count) VALUES + ('30001','10.1000/euro1','Peopling of Europe','Nature','2021-03-15',142), + ('30002','10.1000/steppe2','Steppe Y diversity','Cell','2019-07-01',88); +INSERT INTO public.publication_biosample (publication_id, biosample_id) VALUES (1, 1); +INSERT INTO public.publication_citizen_biosample (publication_id, citizen_biosample_id) VALUES (2, 1); +INSERT INTO public.publication_ena_study (publication_id, genomic_study_id) VALUES (1, 1); + +-- ident / auth seed (fixed UUIDs so FKs line up). +INSERT INTO public.users (id, email, did, handle, display_name) VALUES + ('aaaaaaaa-0000-0000-0000-000000000001','curator@decodingus.org','did:plc:curator1','curator.bsky.social','Curator One'), + ('aaaaaaaa-0000-0000-0000-000000000002', NULL,'did:plc:admin1','admin.decodingus.com','Admin'); +INSERT INTO auth.roles (id, name, description) VALUES + ('bbbbbbbb-0000-0000-0000-000000000001','Admin','Full administrative access'), + ('bbbbbbbb-0000-0000-0000-000000000002','Curator','Content curation'), + ('bbbbbbbb-0000-0000-0000-000000000003','TreeCurator','Haplogroup tree curation'); +INSERT INTO auth.permissions (id, name, description) VALUES + ('cccccccc-0000-0000-0000-000000000001','variant.edit','Edit variants'), + ('cccccccc-0000-0000-0000-000000000002','tree.edit','Edit the haplogroup tree'); +INSERT INTO auth.role_permissions (role_id, permission_id) VALUES + ('bbbbbbbb-0000-0000-0000-000000000002','cccccccc-0000-0000-0000-000000000001'), + ('bbbbbbbb-0000-0000-0000-000000000003','cccccccc-0000-0000-0000-000000000002'); +INSERT INTO auth.user_roles (user_id, role_id) VALUES + ('aaaaaaaa-0000-0000-0000-000000000001','bbbbbbbb-0000-0000-0000-000000000002'), + ('aaaaaaaa-0000-0000-0000-000000000002','bbbbbbbb-0000-0000-0000-000000000001'); +INSERT INTO auth.user_login_info (id, user_id, provider_id, provider_key) VALUES + ('dddddddd-0000-0000-0000-000000000001','aaaaaaaa-0000-0000-0000-000000000001','oauth2','did:plc:curator1'), + ('dddddddd-0000-0000-0000-000000000002','aaaaaaaa-0000-0000-0000-000000000002','oauth2','did:plc:admin1'); +INSERT INTO auth.user_oauth2_info (id, login_info_id, access_token, token_type, expires_in, refresh_token, scope) VALUES + ('eeeeeeee-0000-0000-0000-000000000001','dddddddd-0000-0000-0000-000000000001','tok-access-1','DPoP',3600,'tok-refresh-1','atproto transition:generic'); +INSERT INTO auth.user_pds_info (id, user_id, pds_url, did, handle) VALUES + ('ffffffff-0000-0000-0000-000000000001','aaaaaaaa-0000-0000-0000-000000000001','https://pds.decodingus.com','did:plc:curator1','curator.bsky.social'); +INSERT INTO auth.cookie_consents (user_id, session_id, ip_address_hash, consent_given, policy_version) VALUES + ('aaaaaaaa-0000-0000-0000-000000000001', NULL,'abc123hash', true,'1.0'), + (NULL,'anon-session-1','def456hash', true,'1.0'); +INSERT INTO auth.atprotocol_authorization_servers (issuer_url, authorization_endpoint, token_endpoint, scopes_supported, client_id_metadata_document_supported) VALUES + ('https://bsky.social','https://bsky.social/oauth/authorize','https://bsky.social/oauth/token','atproto transition:generic', true); +INSERT INTO auth.atprotocol_client_metadata (client_id_url, client_name, client_uri, redirect_uris) VALUES + ('https://decodingus.com/oauth/client-metadata.json','DecodingUs','https://decodingus.com','https://decodingus.com/oauth/callback'); +INSERT INTO curator.audit_log (user_id, entity_type, entity_id, action, old_value, new_value, comment) VALUES + ('aaaaaaaa-0000-0000-0000-000000000001','variant',1,'UPDATE','{"common_name":null}'::jsonb,'{"common_name":"M269"}'::jsonb,'Named terminal SNP'), + ('aaaaaaaa-0000-0000-0000-000000000002','haplogroup',2,'CREATE',NULL,'{"name":"R1b"}'::jsonb,'Added R1b'); + +-- genomics seed (sequencing run on the standard biosample 1111...). +INSERT INTO public.sequencing_lab (name, is_d2c, website_url) VALUES + ('Dante Labs', true, 'https://dantelabs.com'); +-- two rows, same instrument_id across labs -> dedups to one on migration. +INSERT INTO public.sequencer_instrument (instrument_id, lab_id, manufacturer, model) VALUES + ('A00123', 1, 'Illumina', 'NovaSeq 6000'), + ('A00123', 1, 'Illumina', 'NovaSeq 6000'); +INSERT INTO public.test_type_definition (code, display_name, category, vendor, target_type, expected_min_depth, supports_haplogroup_y, typical_file_formats, description) VALUES + ('WGS30','Whole Genome 30x','SEQUENCING','Dante','WHOLE_GENOME',30,true,'{BAM,VCF}','30x WGS'); +INSERT INTO public.pangenome_graph (graph_name, source_gfa_file, description) VALUES + ('HPRC-v1','hprc-v1.gfa','Human Pangenome Reference Consortium v1'); +INSERT INTO public.pangenome_node (graph_id, node_name, sequence_length) VALUES (1,'n1',128),(1,'n2',64); +INSERT INTO public.pangenome_path (graph_id, path_name, is_reference, length_bp) VALUES (1,'GRCh38#chrY', true, 57227415); +INSERT INTO public.canonical_pangenome_variant (pangenome_graph_id, variant_type, variant_nodes, variant_edges, reference_path_id, reference_allele_sequence, canonical_hash) VALUES + (1,'SNP','{1,2}','{1}',1,'T','hash-canon-1'); +INSERT INTO public.sequence_library (sample_guid, lab, run_date, instrument, reads, read_length, paired_end, insert_size, created_at, at_uri, at_cid, test_type_id) VALUES + ('11111111-1111-1111-1111-111111111111','Dante Labs','2023-05-01 00:00:00','A00123',900000000,150,true,350,'2023-05-02 00:00:00','at://did:plc:lab/app.decodingus.seqlib/abc','bafyseqlib1',1); +INSERT INTO public.sequence_file (library_id, file_name, file_size_bytes, file_format, aligner, target_reference, created_at, pangenome_graph_id, checksums, http_locations, atp_location) VALUES + (1,'sample1.bam',64000000000,'BAM','bwa-mem2','GRCh38','2023-05-02 00:00:00',1, + '[{"algorithm":"sha256","checksum":"deadbeef","verified_at":"2023-05-03"}]'::jsonb, + '[{"file_url":"https://store/sample1.bam","file_index_url":"https://store/sample1.bam.bai"}]'::jsonb, + '{"repo_did":"did:plc:lab","record_cid":"bafyfile1","record_path":"app.decodingus.file/1"}'::jsonb); +INSERT INTO public.alignment_metadata (sequence_file_id, genbank_contig_id, metric_level, reference_build, variant_caller, analysis_tool, analysis_tool_version, genome_territory, mean_coverage, median_coverage, pct_10x, pct_20x, pct_30x) VALUES + (1,1,'CONTIG_OVERALL','GRCh38','DeepVariant','Picard','3.0',57227415,31.2,30.0,99.1,97.5,90.2); +INSERT INTO public.alignment_coverage (alignment_metadata_id, mean_depth, median_depth, percent_coverage_at_1x, percent_coverage_at_10x, percent_coverage_at_30x, bases_callable, mean_mapping_quality) VALUES + (1,32.5,31.0,99.9,98.7,91.0,56000000,58.4); +INSERT INTO public.pangenome_alignment_metadata (sequence_file_id, pangenome_graph_id, metric_level, pangenome_path_id, region_name, analysis_tool, analysis_tool_version, metadata) VALUES + (1,1,'GRAPH_OVERALL',1,'whole-graph','vg','1.50','{"graphAlignedReads":880000000}'::jsonb); +INSERT INTO public.pangenome_alignment_coverage (alignment_metadata_id, mean_depth, percent_coverage_at_10x) VALUES + (1,29.8,97.3); +INSERT INTO public.reported_variant_pangenome (sample_guid, graph_id, variant_type, reference_path_id, variant_nodes, variant_edges, allele_fraction, depth, provenance, confidence_score, status, zygosity, haplotype_information) VALUES + ('11111111-1111-1111-1111-111111111111',1,'SNP',1,'{1,2}','{1}',0.99,34,'Navigator',0.97,'CONFIRMED','HET','{"phase_set":12}'::jsonb); +INSERT INTO public.genotype_data (at_uri, at_cid, sample_guid, test_type_id, provider, chip_version, build_version, source_file_hash, metrics, deleted) VALUES + ('at://did:plc:chip/app.decodingus.genotype/g1','bafygeno1','11111111-1111-1111-1111-111111111111',1,'23andMe','v5','GRCh37','sha256:chip1','{"callRate":0.987,"totalMarkersCalled":630000}'::jsonb, false), + (NULL,NULL,'11111111-1111-1111-1111-111111111111',1,'DeletedProvider','v4','GRCh37','sha256:chip2','{}'::jsonb, true); diff --git a/rust/scripts/oauth-dev.sh b/rust/scripts/oauth-dev.sh new file mode 100755 index 00000000..2869142c --- /dev/null +++ b/rust/scripts/oauth-dev.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash +# One-command local AT Protocol OAuth dev stack: Postgres + PDS + Caddy (TLS), +# wired so du-web's dev login (/login/atproto/dev) completes the handshake over +# the PDS's canonical https://pds.test. Runtime-agnostic: Apple `container` or +# Docker (du-web itself runs on the host via cargo — the Docker image build is +# still blocked by unpushed sibling path-deps, and the CA/IP wiring is dynamic, +# so a pure `compose up` can't do all of it; this script is the orchestrator). +# +# ./scripts/oauth-dev.sh up # boot stack, extract CA, create account, write env +# ./scripts/oauth-dev.sh web # run du-web on the host with the dev env (foreground) +# ./scripts/oauth-dev.sh env # print the env file +# ./scripts/oauth-dev.sh down # stop + remove PDS/Caddy (Postgres left to test-db.sh) +# +# Or via the Makefile: `make oauth-dev` (= up + web). +set -euo pipefail + +HERE="$(cd "$(dirname "$0")" && pwd)" +ROOT="$(cd "$HERE/.." && pwd)" +STATE="$ROOT/.oauth-dev" +PDS_NAME="du-oauth-pds" +CADDY_NAME="du-oauth-caddy" +PDS_HOST="pds.test" +ACCT_HANDLE="alice.pds.test" +ACCT_PASSWORD="alice-pw-12345" +ACCT_EMAIL="alice@example.test" +WEB_PORT="9000" + +rt() { if command -v container >/dev/null 2>&1; then echo container; elif command -v docker >/dev/null 2>&1; then echo docker; else echo ""; fi; } +RT="$(rt)" +[ -n "$RT" ] || { echo "ERROR: need Apple \`container\` or \`docker\`." >&2; exit 1; } + +# Container IP on its runtime network (used for cross-container + host reach). +ctr_ip() { + local name="$1" + if [ "$RT" = container ]; then + container ls 2>/dev/null | awk -v n="$name" '$1==n{print $6}' | cut -d/ -f1 + else + docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$name" 2>/dev/null + fi +} +ctr_rm() { if [ "$RT" = container ]; then container rm -f "$1" 2>/dev/null || true; else docker rm -f "$1" 2>/dev/null || true; fi; } +ctr_exec() { local n="$1"; shift; if [ "$RT" = container ]; then container exec "$n" "$@"; else docker exec "$n" "$@"; fi; } + +wait_http() { # url, tries + local url="$1" tries="${2:-30}" + for _ in $(seq 1 "$tries"); do curl -fsS -m 3 "$url" >/dev/null 2>&1 && return 0; sleep 1; done + return 1 +} + +cmd_up() { + mkdir -p "$STATE/pdsdata/blocks" + + # 1. Postgres (reuse the shared dev DB helper; idempotent). + echo "==> postgres (test-db.sh)" + "$HERE/test-db.sh" up >/dev/null 2>&1 || true + DATABASE_URL="$("$HERE/test-db.sh" url 2>/dev/null || true)" + [ -n "$DATABASE_URL" ] || { echo "could not get DATABASE_URL (is the dev Postgres up?)" >&2; exit 1; } + export DATABASE_URL + + # 2. PDS. (Plain string flags, not arrays — macOS bash 3.2 + `set -u`.) + echo "==> PDS ($PDS_NAME)" + ctr_rm "$PDS_NAME" + local run="container" pds_ports="" caddy_ports="" + if [ "$RT" = docker ]; then run="docker"; pds_ports="-p 3000:3000"; caddy_ports="-p 443:443"; fi + $run run -d --name "$PDS_NAME" $pds_ports -v "$STATE/pdsdata:/pds" \ + -e PDS_HOSTNAME="$PDS_HOST" -e PDS_PORT=3000 \ + -e PDS_JWT_SECRET="$(openssl rand --hex 16)" \ + -e PDS_ADMIN_PASSWORD="$(openssl rand --hex 16)" \ + -e PDS_PLC_ROTATION_KEY_K256_PRIVATE_KEY_HEX="$(openssl rand --hex 32)" \ + -e PDS_DATA_DIRECTORY=/pds -e PDS_BLOBSTORE_DISK_LOCATION=/pds/blocks \ + -e PDS_DID_PLC_URL=https://plc.directory -e PDS_INVITE_REQUIRED=false -e PDS_DEV_MODE=true \ + ghcr.io/bluesky-social/pds:latest >/dev/null + + local pds_ip; for _ in $(seq 1 20); do pds_ip="$(ctr_ip "$PDS_NAME")"; [ -n "$pds_ip" ] && break; sleep 1; done + local pds_reach; [ "$RT" = docker ] && pds_reach="127.0.0.1" || pds_reach="$pds_ip" + wait_http "http://$pds_reach:3000/xrpc/_health" 30 || { echo "PDS not healthy" >&2; exit 1; } + echo " PDS healthy at http://$pds_reach:3000 (network ip $pds_ip)" + + # 3. Caddy TLS proxy → PDS, canonical https://pds.test. + echo "==> Caddy ($CADDY_NAME)" + ctr_rm "$CADDY_NAME" + printf '{\n auto_https disable_redirects\n}\n%s {\n tls internal\n reverse_proxy %s:3000\n}\n' "$PDS_HOST" "$pds_ip" > "$STATE/Caddyfile" + $run run -d --name "$CADDY_NAME" $caddy_ports -v "$STATE/Caddyfile:/etc/caddy/Caddyfile" \ + docker.io/library/caddy:2 >/dev/null + local caddy_ip; for _ in $(seq 1 20); do caddy_ip="$(ctr_ip "$CADDY_NAME")"; [ -n "$caddy_ip" ] && break; sleep 1; done + local caddy_reach; [ "$RT" = docker ] && caddy_reach="127.0.0.1" || caddy_reach="$caddy_ip" + sleep 2 + + # 4. Caddy internal CA → trust file. + for _ in $(seq 1 15); do + ctr_exec "$CADDY_NAME" cat /data/caddy/pki/authorities/local/root.crt > "$STATE/caddy_ca.crt" 2>/dev/null && \ + [ -s "$STATE/caddy_ca.crt" ] && break + sleep 1 + done + [ -s "$STATE/caddy_ca.crt" ] || { echo "could not extract Caddy CA" >&2; exit 1; } + curl -fsS --resolve "$PDS_HOST:443:$caddy_reach" --cacert "$STATE/caddy_ca.crt" \ + "https://$PDS_HOST/.well-known/oauth-authorization-server" >/dev/null \ + || { echo "HTTPS via Caddy failed" >&2; exit 1; } + echo " Caddy serving https://$PDS_HOST (reach $caddy_reach), CA → .oauth-dev/caddy_ca.crt" + + # 5. Test account (idempotent). + echo "==> test account ($ACCT_HANDLE)" + curl -s -m 20 -X POST "http://$pds_reach:3000/xrpc/com.atproto.server.createAccount" \ + -H "Content-Type: application/json" \ + -d "{\"handle\":\"$ACCT_HANDLE\",\"email\":\"$ACCT_EMAIL\",\"password\":\"$ACCT_PASSWORD\"}" \ + | grep -q '"did"' && echo " created" || echo " (already exists or skipped)" + + # 6. Env file for du-web. + cat > "$STATE/env" <&2; exit 1; } + # shellcheck disable=SC1091 + set -a; . "$STATE/env"; set +a + echo "==> du-web on http://127.0.0.1:$WEB_PORT (dev login: /login/atproto/dev?handle=$ACCT_HANDLE)" + exec cargo run -p du-web +} + +cmd_env() { cat "$STATE/env"; } + +cmd_down() { + ctr_rm "$PDS_NAME"; ctr_rm "$CADDY_NAME" + echo "PDS + Caddy removed. (Postgres: ./scripts/test-db.sh down). State in .oauth-dev/ kept." +} + +case "${1:-}" in + up) cmd_up ;; + web) cmd_web ;; + env) cmd_env ;; + down) cmd_down ;; + *) echo "usage: $0 {up|web|env|down}" >&2; exit 2 ;; +esac diff --git a/rust/scripts/seed-hallast-anchors.sql b/rust/scripts/seed-hallast-anchors.sql new file mode 100644 index 00000000..3611bd0f --- /dev/null +++ b/rust/scripts/seed-hallast-anchors.sql @@ -0,0 +1,58 @@ +-- Seed genealogical age-calibration anchors from Hallast et al. 2026 +-- ("Population-scale Y chromosome assemblies...", bioRxiv 2026.06.03.729890). +-- +-- These are BEAST-dated TMRCAs from the paper's time-calibrated phylogeny +-- (Suppl. Fig. 1, ISOGG v15.73 labels; 95% HPD intervals) — *model-dated*, not +-- radiocarbon, hence anchor_type = 'MODEL_DATED' with full provenance in details +-- so a curator can down-weight or exclude them. See the "Calibration anchors" +-- subsection of documents/proposals/branch-age-estimation.md (incl. the +-- circularity caveat: these calibrate our SNP clock against another SNP clock). +-- +-- Storage convention (matches du_db::age anchor consumer): +-- date_ce = PRESENT_YEAR(1950) - TMRCA_ybp → consumer recovers ybp +-- details->>'uncertainty_years' = sigma = (HPD_hi - HPD_lo) / (2 * 1.96) +-- +-- Idempotent: clears prior Hallast-sourced rows, then re-inserts only for clades +-- whose name exists in the current tree (keyed by name, never by id — ids churn +-- across loads). Run AFTER the Y tree is loaded: +-- psql "$DATABASE_URL" -f scripts/seed-hallast-anchors.sql + +BEGIN; + +DELETE FROM tree.genealogical_anchor +WHERE details->>'source' = 'Hallast et al. 2026'; + +INSERT INTO tree.genealogical_anchor (haplogroup_id, anchor_type, date_ce, confidence, details) +SELECT h.id, + 'MODEL_DATED', + 1950 - v.tmrca_ybp, + v.confidence, + jsonb_build_object( + 'source', 'Hallast et al. 2026', + 'reference', 'bioRxiv 2026.06.03.729890', + 'clock', 'BEAST v1.10.4 strict molecular clock (0.76e-9 sub/site/yr)', + 'tmrca_ybp', v.tmrca_ybp, + 'hpd95_low_ybp', v.hpd_lo, + 'hpd95_high_ybp', v.hpd_hi, + 'uncertainty_years', round((v.hpd_hi - v.hpd_lo) / (2 * 1.96))::int, + 'figure', v.figure, + 'note', v.note + ) +FROM (VALUES + -- clade, TMRCA, HPD_lo, HPD_hi, conf, figure, note + ('D1', 19450, 16360, 22880, 0.80, 'Fig 1b / Suppl. Fig 1', 'major-branch TMRCA on ISOGG v15.73 phylogeny') + ) AS v(name, tmrca_ybp, hpd_lo, hpd_hi, confidence, figure, note) +JOIN tree.haplogroup h + ON h.name = v.name + AND h.haplogroup_type = 'Y_DNA'::core.dna_type + AND h.valid_until IS NULL; + +-- Pending nodes (recorded, not yet seeded — no clean haplogroup-name mapping): +-- • HG00512 ⋂ HG02056 TMRCA ~10,300 ybp (HPD 8,400–12,300; Suppl. Fig 61) — +-- a sample-pair MRCA; needs the ISOGG label of their common node. +-- • HG00609-referenced node TMRCA 10,350 ybp (HPD 8,540–12,330). +-- Harvest more major-branch nodes from the Suppl. Tables workbook (Fig 1b / Suppl. +-- Fig 1 are figures; per-node TMRCAs are not in the extractable supplement text) +-- and add VALUES rows above once mapped to our clade names. + +COMMIT; diff --git a/rust/scripts/seed-str-mutation-rates.sql b/rust/scripts/seed-str-mutation-rates.sql new file mode 100644 index 00000000..ce1ae089 --- /dev/null +++ b/rust/scripts/seed-str-mutation-rates.sql @@ -0,0 +1,175 @@ +-- Seed per-marker Y-STR mutation rates (per generation) for the STR branch-age +-- model (du_db::ystr). Two complementary sources, Willems primary + YHRD gap-fill: +-- +-- * Willems et al. 2016 ("Population-Scale Sequencing... Y-STR Mutation Rates", +-- AJHG 99:919, PMC4863667; Table S1/mmc2.xlsx). 1000 Genomes MUTEA estimates +-- (mu = 10^log10_1kg_mu, larger n than SGDP) with the reported 95% CI. Covers +-- 116 DYS/Y-named polymorphic markers; the 586 TRF-only loci are omitted. +-- * YHRD combined mutation rates (ISOGG-wiki export, yhrd-mutation-rates.txt) for core forensic/ +-- genealogy markers NOT in Willems' short-read-accessible set (DYS393, DYS390, +-- DYS389II, DYS385, DYS448, DYS449, DYS627, DYS518, DYS635, YGATAH4, DYF387S1). +-- Rate = mutations/meioses; 95% CI from the Poisson count. Willems wins overlaps. +-- +-- omega_plus/omega_minus/multi_step_rate LEFT AT COLUMN DEFAULTS on purpose: the +-- multi-step model (du_db::ystr global omega / Table 1) is already Ballantyne-2010- +-- derived (single:multi-step 25.23:1 -> Sum(omega>=2) ~ 0.038; symmetric per +-- McDonald's calculation choice), so every marker stays on the validated Table 1 +-- path. (Janzen's "various sources" .xls -- Chandler/SMGF/FTDNA/Ysearch compilation +-- -- was reviewed but held back: older estimates, no clean CIs, redundant for the +-- single-copy markers we score.) +-- +-- Multi-copy markers (DYS385, DYS464, CDY, YCAII, DYF387S1...) are excluded from STR +-- age scoring regardless of being seeded here. Name normalization to the exact +-- lexicon convention may need follow-up. Idempotent (keyed on marker_name UNIQUE): +-- psql "$DATABASE_URL" -f scripts/seed-str-mutation-rates.sql + +BEGIN; + +INSERT INTO genomics.str_mutation_rate + (marker_name, mutation_rate, mutation_rate_lower, mutation_rate_upper, panel_names, source) +VALUES + ('DYF387S1', 0.0064700000, 0.0050257223, 0.0079165417, ARRAY['YHRD'], 'YHRD combined (77 mutations / 11899 meioses)'), + ('DYS19', 0.0028513662, 0.0023215979, 0.0035020230, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=468'), + ('DYS385', 0.0027800000, 0.0022619197, 0.0032958612, ARRAY['YHRD'], 'YHRD combined (111 mutations / 39944 meioses)'), + ('DYS388', 0.0006445239, 0.0005258768, 0.0007899398, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=530'), + ('DYS389I', 0.0028704897, 0.0023212825, 0.0035496375, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=955'), + ('DYS389II', 0.0049900000, 0.0040431516, 0.0059335384, ARRAY['YHRD'], 'YHRD combined (107 mutations / 21450 meioses)'), + ('DYS390', 0.0020800000, 0.0014879158, 0.0026793050, ARRAY['YHRD'], 'YHRD combined (47 mutations / 22557 meioses)'), + ('DYS391', 0.0015071900, 0.0011621308, 0.0019547040, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=894'), + ('DYS392', 0.0004623608, 0.0003477043, 0.0006148257, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=454'), + ('DYS393', 0.0012700000, 0.0007886105, 0.0017438662, ARRAY['YHRD'], 'YHRD combined (27 mutations / 21323 meioses)'), + ('DYS394', 0.0028513662, 0.0023215979, 0.0035020230, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=468'), + ('DYS426', 0.0000974635, 0.0000598038, 0.0001588382, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAC, n=1091'), + ('DYS434', 0.0003528133, 0.0002756202, 0.0004516261, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=1046'), + ('DYS435', 0.0003207362, 0.0002540066, 0.0004049962, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif ATCC, n=1080'), + ('DYS436', 0.0001159840, 0.0000729642, 0.0001843682, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAC, n=1031'), + ('DYS437', 0.0007586509, 0.0006086786, 0.0009455750, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=844'), + ('DYS438', 0.0006260197, 0.0005139064, 0.0007625916, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAAG, n=744'), + ('DYS439', 0.0045320323, 0.0038883111, 0.0052823236, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=904'), + ('DYS441', 0.0019477760, 0.0015927257, 0.0023819739, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAGG, n=464'), + ('DYS442', 0.0025688219, 0.0021508147, 0.0030680681, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=800'), + ('DYS443', 0.0014038057, 0.0010971500, 0.0017961723, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAGG, n=477'), + ('DYS445', 0.0008376566, 0.0006538603, 0.0010731168, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=436'), + ('DYS448', 0.0011900000, 0.0006245160, 0.0017562697, ARRAY['YHRD'], 'YHRD combined (17 mutations / 14281 meioses)'), + ('DYS449', 0.0097600000, 0.0075684887, 0.0119587466, ARRAY['YHRD'], 'YHRD combined (76 mutations / 7784 meioses)'), + ('DYS450', 0.0001728622, 0.0001211792, 0.0002465880, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAAT, n=424'), + ('DYS453', 0.0003297638, 0.0002537716, 0.0004285120, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=479'), + ('DYS454', 0.0003930457, 0.0002803509, 0.0005510411, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=497'), + ('DYS455', 0.0002124178, 0.0001385049, 0.0003257744, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=503'), + ('DYS456', 0.0035392725, 0.0025858957, 0.0048441435, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=656'), + ('DYS458', 0.0076821008, 0.0058992799, 0.0100037078, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAG, n=541'), + ('DYS460', 0.0026343047, 0.0022332122, 0.0031074346, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=905'), + ('DYS461', 0.0024763784, 0.0020730749, 0.0029581419, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=877'), + ('DYS462', 0.0008041517, 0.0006562012, 0.0009854600, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif ACAT, n=990'), + ('DYS467', 0.0063789471, 0.0050126802, 0.0081176066, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=688'), + ('DYS470', 0.0000947899, 0.0000657061, 0.0001367471, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAC, n=1080'), + ('DYS473', 0.0002516663, 0.0001671841, 0.0003788393, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=267'), + ('DYS475', 0.0000607070, 0.0000353661, 0.0001042056, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=610'), + ('DYS476', 0.0001449249, 0.0001046813, 0.0002006398, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif ATC, n=1109'), + ('DYS480', 0.0000187485, 0.0000084808, 0.0000414473, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=716'), + ('DYS481', 0.0044649130, 0.0036376005, 0.0054803841, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAG, n=430'), + ('DYS483', 0.0000974635, 0.0000598038, 0.0001588382, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAC, n=1091'), + ('DYS484', 0.0008039907, 0.0005812969, 0.0011119980, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=289'), + ('DYS485', 0.0011632739, 0.0008993485, 0.0015046518, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=334'), + ('DYS487', 0.0014531050, 0.0011683178, 0.0018073114, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=471'), + ('DYS488', 0.0003303422, 0.0002306275, 0.0004731698, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=513'), + ('DYS489', 0.0005228077, 0.0004154560, 0.0006578987, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=486'), + ('DYS492', 0.0002346286, 0.0001713926, 0.0003211959, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=548'), + ('DYS493', 0.0000728273, 0.0000433609, 0.0001223179, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAC, n=1085'), + ('DYS494', 0.0001462278, 0.0000973298, 0.0002196919, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=532'), + ('DYS495', 0.0008287318, 0.0006401843, 0.0010728105, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=387'), + ('DYS502', 0.0000100001, 0.0000093275, 0.0000107213, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif ATC, n=1002'), + ('DYS505', 0.0017384612, 0.0013950604, 0.0021663918, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAGG, n=562'), + ('DYS508', 0.0019784405, 0.0016465351, 0.0023772508, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=896'), + ('DYS510', 0.0027891251, 0.0021592653, 0.0036027158, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=432'), + ('DYS511', 0.0017406843, 0.0015030624, 0.0020158722, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=1012'), + ('DYS512', 0.0007954392, 0.0006578259, 0.0009618404, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=794'), + ('DYS515', 0.0014038057, 0.0010971500, 0.0017961723, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAGG, n=477'), + ('DYS518', 0.0108000000, 0.0084415641, 0.0130693857, ARRAY['YHRD'], 'YHRD combined (83 mutations / 7717 meioses)'), + ('DYS522', 0.0020579603, 0.0017501288, 0.0024199367, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=937'), + ('DYS525', 0.0015660838, 0.0013190097, 0.0018594393, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=980'), + ('DYS526a', 0.0020062082, 0.0016210149, 0.0024829330, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAGG, n=511'), + ('DYS526b', 0.0020062082, 0.0016210149, 0.0024829330, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAGG, n=511'), + ('DYS530', 0.0001577433, 0.0001102295, 0.0002257377, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAC, n=993'), + ('DYS533', 0.0020186964, 0.0017120758, 0.0023802304, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=869'), + ('DYS537', 0.0011564211, 0.0009711828, 0.0013769907, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=965'), + ('DYS538', 0.0007640856, 0.0006431555, 0.0009077536, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=994'), + ('DYS539', 0.0011022091, 0.0009683113, 0.0012546223, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=928'), + ('DYS540', 0.0011800761, 0.0007995304, 0.0017417467, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=259'), + ('DYS541', 0.0023333291, 0.0019545015, 0.0027855823, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=672'), + ('DYS543', 0.0029985756, 0.0025080720, 0.0035850069, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=733'), + ('DYS544', 0.0004986996, 0.0003595787, 0.0006916462, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=337'), + ('DYS545', 0.0002777225, 0.0002182102, 0.0003534655, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAC, n=925'), + ('DYS548', 0.0045322306, 0.0029059022, 0.0070687561, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=228'), + ('DYS549', 0.0036701769, 0.0030025621, 0.0044862348, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=765'), + ('DYS556', 0.0012877128, 0.0010501806, 0.0015789704, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=423'), + ('DYS558', 0.0004939739, 0.0003709394, 0.0006578169, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=510'), + ('DYS559', 0.0000876825, 0.0000513822, 0.0001496280, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=567'), + ('DYS561', 0.0015854005, 0.0012718737, 0.0019762141, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=699'), + ('DYS565', 0.0006360584, 0.0004899198, 0.0008257888, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=444'), + ('DYS567', 0.0004557294, 0.0003475298, 0.0005976157, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=486'), + ('DYS568', 0.0007444971, 0.0005671945, 0.0009772237, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=403'), + ('DYS569', 0.0001531119, 0.0001115743, 0.0002101135, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=554'), + ('DYS570', 0.0082802033, 0.0057852216, 0.0118511911, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAG, n=384'), + ('DYS573', 0.0003511042, 0.0002697758, 0.0004569503, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=469'), + ('DYS574', 0.0007230445, 0.0005499814, 0.0009505654, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=429'), + ('DYS575', 0.0000836544, 0.0000418406, 0.0001672553, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=513'), + ('DYS576', 0.0087454669, 0.0066502811, 0.0115007457, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAG, n=488'), + ('DYS577', 0.0002879082, 0.0002161116, 0.0003835571, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AATG, n=1039'), + ('DYS578', 0.0002242931, 0.0001644709, 0.0003058743, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=659'), + ('DYS579', 0.0000270528, 0.0000098826, 0.0000740551, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=669'), + ('DYS580', 0.0001043170, 0.0000480225, 0.0002266027, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=698'), + ('DYS581', 0.0001385814, 0.0000879873, 0.0002182680, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif ACCT, n=1114'), + ('DYS583', 0.0001179392, 0.0000744639, 0.0001867970, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAC, n=1055'), + ('DYS584', 0.0000135519, 0.0000069945, 0.0000262572, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AATC, n=1022'), + ('DYS585', 0.0015161459, 0.0012921115, 0.0017790248, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AACAT, n=634'), + ('DYS588', 0.0002324915, 0.0001572132, 0.0003438151, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AATGC, n=502'), + ('DYS590', 0.0001514119, 0.0001142278, 0.0002007003, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAAC, n=988'), + ('DYS593', 0.0001479984, 0.0000914593, 0.0002394892, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAAT, n=255'), + ('DYS594', 0.0003770856, 0.0002730172, 0.0005208228, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAAT, n=368'), + ('DYS595', 0.0000271281, 0.0000091197, 0.0000806975, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAATT, n=571'), + ('DYS597', 0.0002777225, 0.0002182102, 0.0003534655, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAC, n=925'), + ('DYS598', 0.0003299134, 0.0002707391, 0.0004020212, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AACAG, n=975'), + ('DYS604', 0.0002346286, 0.0001713926, 0.0003211959, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=548'), + ('DYS606', 0.0004321306, 0.0003332023, 0.0005604308, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=465'), + ('DYS607', 0.0023258034, 0.0016720220, 0.0032352214, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAGG, n=299'), + ('DYS608', 0.0001217298, 0.0000795804, 0.0001862036, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAAC, n=883'), + ('DYS609', 0.0000321776, 0.0000177028, 0.0000584876, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAC, n=996'), + ('DYS613', 0.0000292326, 0.0000139169, 0.0000614035, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif ATC, n=894'), + ('DYS616', 0.0008554074, 0.0006307235, 0.0011601309, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=336'), + ('DYS618', 0.0002637449, 0.0001966296, 0.0003537684, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=573'), + ('DYS620', 0.0000528080, 0.0000218848, 0.0001274260, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=610'), + ('DYS622', 0.0029085732, 0.0021412925, 0.0039507905, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAG, n=287'), + ('DYS623', 0.0002509008, 0.0001798805, 0.0003499613, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif ATCC, n=927'), + ('DYS624', 0.0000541096, 0.0000321089, 0.0000911849, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif ATCC, n=1021'), + ('DYS627', 0.0140000000, 0.0113934576, 0.0166014983, ARRAY['YHRD'], 'YHRD combined (111 mutations / 7930 meioses)'), + ('DYS631', 0.0008934022, 0.0007205199, 0.0011077659, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=365'), + ('DYS632', 0.0000870803, 0.0000513223, 0.0001477522, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AATG, n=959'), + ('DYS633', 0.0000786966, 0.0000349658, 0.0001771206, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=244'), + ('DYS634', 0.0001504856, 0.0001001611, 0.0002260950, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAGG, n=408'), + ('DYS635', 0.0043400000, 0.0033101805, 0.0053743662, ARRAY['YHRD'], 'YHRD combined (68 mutations / 15660 meioses)'), + ('DYS637', 0.0008106959, 0.0006875527, 0.0009558946, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif ACAT, n=641'), + ('DYS639', 0.0003930457, 0.0002803509, 0.0005510411, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=497'), + ('DYS640', 0.0004321306, 0.0003332023, 0.0005604308, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=465'), + ('DYS641', 0.0001527988, 0.0000941857, 0.0002478877, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=649'), + ('DYS642', 0.0001577335, 0.0001012052, 0.0002458357, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAT, n=519'), + ('DYS643', 0.0019278623, 0.0016105158, 0.0023077406, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAAG, n=689'), + ('DYS645', 0.0002183378, 0.0001600837, 0.0002977906, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAAC, n=978'), + ('DYS653', 0.0002183378, 0.0001600837, 0.0002977906, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAAAC, n=978'), + ('DYS698', 0.0014531050, 0.0011683178, 0.0018073114, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=471'), + ('DYS703', 0.0003529076, 0.0002644149, 0.0004710164, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=406'), + ('DYS705', 0.0000580821, 0.0000341466, 0.0000987955, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif ACCTCC, n=641'), + ('DYS715', 0.0038538517, 0.0030067409, 0.0049396252, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=405'), + ('DYS717', 0.0004559561, 0.0002992065, 0.0006948243, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AATAC, n=274'), + ('DYS718', 0.0010843887, 0.0008069247, 0.0014572599, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAT, n=398'), + ('DYS726', 0.0003711496, 0.0002828180, 0.0004870696, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AAGG, n=668'), + ('Y-GATA-A10', 0.0035312625, 0.0030108908, 0.0041415699, ARRAY['Willems2016-1kG'], 'Willems et al. 2016 (1000G MUTEA); motif AGAT, n=847'), + ('YGATAH4', 0.0025500000, 0.0017471797, 0.0033455338, ARRAY['YHRD'], 'YHRD combined (39 mutations / 15316 meioses)') +ON CONFLICT (marker_name) DO UPDATE SET + mutation_rate = EXCLUDED.mutation_rate, + mutation_rate_lower = EXCLUDED.mutation_rate_lower, + mutation_rate_upper = EXCLUDED.mutation_rate_upper, + panel_names = EXCLUDED.panel_names, + source = EXCLUDED.source; + +COMMIT; diff --git a/rust/scripts/test-db.sh b/rust/scripts/test-db.sh new file mode 100755 index 00000000..d6095786 --- /dev/null +++ b/rust/scripts/test-db.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# Local Postgres (PostGIS) for tests/dev on a Docker-less Apple-Silicon Mac. +# +# Uses Apple's `container` CLI by default (this Mac Studio has no Docker). Apple +# `container` gives each container its own routable IP (no localhost port +# forwarding), so `up` discovers that IP and prints the matching DATABASE_URL. +# The same PostGIS image is used in production via compose.yaml. +# +# eval "$(./scripts/test-db.sh up)" start, wait, migrate, export DATABASE_URL +# ./scripts/test-db.sh down stop and remove the container +# ./scripts/test-db.sh reset down then up +# ./scripts/test-db.sh url print DATABASE_URL for the running container +# +# If $DATABASE_URL is already set, `up`/`url` use it as-is and DO NOT start a +# container (native-Postgres fallback — see plan §9). +set -euo pipefail + +NAME="${DU_PG_NAME:-du-pg}" +# imresamu/postgis publishes linux/arm64 (the official postgis/postgis is amd64-only +# and Apple `container` runs arm64 VMs without emulation). +IMAGE="${DU_PG_IMAGE:-docker.io/imresamu/postgis:16-3.4}" +PASSWORD="${DU_PG_PASSWORD:-dev}" +DB="${DU_PG_DB:-decodingus}" +USER="${DU_PG_USER:-postgres}" +HERE="$(cd "$(dirname "$0")" && pwd)" + +runtime() { + if command -v container >/dev/null 2>&1; then echo "container" + elif command -v docker >/dev/null 2>&1; then echo "docker" + else echo ""; fi +} + +require_runtime() { + local rt; rt="$(runtime)" + if [ -z "$rt" ]; then + cat >&2 <<'EOF' +ERROR: neither `container` (Apple) nor `docker` is installed. + Apple container: https://github.com/apple/container/releases + Or set DATABASE_URL to an existing Postgres and skip the container entirely. +EOF + exit 1 + fi + echo "$rt" +} + +# Resolve host:port for the running container under the given runtime. +container_host() { + local rt="$1" + if [ "$rt" = "container" ]; then + # Each container has its own IP; read it from `container ls`. + container ls 2>/dev/null | awk -v n="$NAME" '$1==n {print $6}' | cut -d/ -f1 + else + echo "localhost" + fi +} + +build_url() { echo "postgres://${USER}:${PASSWORD}@${1}:5432/${DB}?sslmode=disable"; } + +wait_ready() { + local host="$1" + echo "waiting for postgres on ${host}:5432 ..." >&2 + for _ in $(seq 1 60); do + if timeout 2 bash -c "(exec 3<>/dev/tcp/${host}/5432)" 2>/dev/null; then + sleep 1 + echo "postgres is accepting connections." >&2 + return 0 + fi + sleep 1 + done + echo "ERROR: postgres did not become ready in time" >&2 + exit 1 +} + +apply_migrations() { + local url="$1" + if command -v sqlx >/dev/null 2>&1; then + echo "applying migrations via sqlx-cli ..." >&2 + DATABASE_URL="$url" sqlx migrate run --source "${HERE}/../migrations" >&2 + else + echo "NOTE: sqlx-cli not found — migrations not auto-applied." >&2 + echo " \`cargo test\` applies them via du_db::run_migrations, or install sqlx-cli." >&2 + fi +} + +cmd="${1:-up}" +case "$cmd" in + up) + if [ -n "${DATABASE_URL:-}" ]; then + wait_ready "$(echo "$DATABASE_URL" | sed -E 's#.*@([^:/]+).*#\1#')" + apply_migrations "$DATABASE_URL" + echo "export DATABASE_URL=\"$DATABASE_URL\"" + exit 0 + fi + rt="$(require_runtime)" + echo "starting $NAME ($IMAGE) via $rt ..." >&2 + if [ "$rt" = "container" ]; then + container run -d --name "$NAME" \ + -e POSTGRES_PASSWORD="$PASSWORD" -e POSTGRES_DB="$DB" "$IMAGE" >/dev/null + else + docker run -d --name "$NAME" -p 5432:5432 \ + -e POSTGRES_PASSWORD="$PASSWORD" -e POSTGRES_DB="$DB" "$IMAGE" >/dev/null + fi + # Give Apple `container` a moment to assign an IP. + host=""; for _ in $(seq 1 20); do host="$(container_host "$rt")"; [ -n "$host" ] && break; sleep 1; done + [ -n "$host" ] || { echo "ERROR: could not resolve container IP" >&2; exit 1; } + url="$(build_url "$host")" + wait_ready "$host" + apply_migrations "$url" + echo "export DATABASE_URL=\"$url\"" + ;; + down) + rt="$(require_runtime)" + "$rt" rm -f "$NAME" >/dev/null 2>&1 || true + echo "removed $NAME" >&2 + ;; + reset) + "$0" down || true + "$0" up + ;; + url) + if [ -n "${DATABASE_URL:-}" ]; then echo "$DATABASE_URL"; exit 0; fi + rt="$(require_runtime)"; host="$(container_host "$rt")" + [ -n "$host" ] || { echo "ERROR: container '$NAME' not running" >&2; exit 1; } + build_url "$host" + ;; + *) + echo "usage: $0 {up|down|reset|url}" >&2 + exit 2 + ;; +esac diff --git a/rust/scripts/yhrd-mutation-rates.txt b/rust/scripts/yhrd-mutation-rates.txt new file mode 100644 index 00000000..a21cc2d6 --- /dev/null +++ b/rust/scripts/yhrd-mutation-rates.txt @@ -0,0 +1,28 @@ +Marker Combined Mutationrate References +DYS19 2.24e-03 (53 in 23679) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Tsai2002, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Kayser2000, Heyer1997, Dupuy2004, Bianchi1998, Budowle2005, Dupuy2001, Pestoni1999, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Hao2024, Sergey Kravchenko(1), Ludmila A. Livshits (1), Svetlana A. Limborska(2), Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS389I 2.43e-03 (52 in 21397) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Tsai2002, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Kayser2000, Dupuy2004, Bianchi1998, Budowle2005, Dupuy2001, Pestoni1999, Lessig1998, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Sergey Kravchenko(1), Ludmila A. Livshits (1), Svetlana A. Limborska(2), Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS389II 4.99e-03 (107 in 21450) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Tsai2002, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Kayser2000, Dupuy2004, Bianchi1998, Budowle2005, Dupuy2001, Pestoni1999, Lessig1998, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Sergey Kravchenko(1), Ludmila A. Livshits (1), Svetlana A. Limborska(2), Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS390 2.08e-03 (47 in 22557) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Tsai2002, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Kayser2000, Heyer1997, Dupuy2004, Bianchi1998, Budowle2005, Dupuy2001, Pestoni1999, Lessig1998, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Hao2024, Sergey Kravchenko(1), Ludmila A. Livshits (1), Svetlana A. Limborska(2), Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS391 2.34e-03 (54 in 23071) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Tsai2002, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Kayser2000, Heyer1997, Dupuy2004, Bianchi1998, Budowle2005, Dupuy2001, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Hao2024, Sergey Kravchenko(1), Ludmila A. Livshits (1), Svetlana A. Limborska(2), Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS392 4.45e-04 (10 in 22473) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Tsai2002, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Kayser2000, Heyer1997, Dupuy2004, Bianchi1998, Budowle2005, Dupuy2001, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Sergey Kravchenko(1), Ludmila A. Livshits (1), Svetlana A. Limborska(2), Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS393 1.27e-03 (27 in 21323) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Tsai2002, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Kayser2000, Heyer1997, Dupuy2004, Bianchi1998, Budowle2005, Dupuy2001, Pestoni1999, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Sergey Kravchenko(1), Ludmila A. Livshits (1), Svetlana A. Limborska(2), Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS385 2.78e-03 (111 in 39944) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Tsai2002, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Kayser2000, Heyer1997, Dupuy2004, Budowle2005, Dupuy2001, Lessig1998, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Sergey Kravchenko(1), Ludmila A. Livshits (1), Svetlana A. Limborska(2), Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS439 4.44e-03 (81 in 18235) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Budowle2005, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Hao2024, Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS438 3.95e-04 (7 in 17731) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Budowle2005, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS437 1.19e-03 (21 in 17706) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Goedbloed2009, Hohoff2007, Lee2007, Domingues2007, Decker2008, Ballard2005a, Kurihara2004, Gusmão2005, Budowle2005, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS456 4.12e-03 (61 in 14804) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Goedbloed2009, Lee2007, Decker2008, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Hao2024, Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS458 6.63e-03 (98 in 14786) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Goedbloed2009, Lee2007, Decker2008, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Hao2024, Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS635 4.34e-03 (68 in 15660) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Goedbloed2009, Lee2007, Decker2008, Gusmão2005, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Hao2024, Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +YGATAH4 2.55e-03 (39 in 15316) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Goedbloed2009, Lee2007, Decker2008, Kurihara2004, Gusmão2005, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS448 1.19e-03 (17 in 14281) Pontes2007, Sánchez-Diz2008, Turrina2006, Berger2005, Goedbloed2009, Lee2007, Decker2008, Ge2009, Adnan2018, Laouina2013, Bugoye2018, Petrovic2018, Mertoglu2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Landeskriminalamt BW; FB230 - Molekulargenetische Untersuchungen, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS576 1.19e-02 (117 in 9813) Ballantyne2010, LiYuan2018, Petrovic2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Hao2024, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS481 3.86e-03 (36 in 9328) Ballantyne2010, Petrovic2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Hao2024, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS549 2.86e-03 (26 in 9093) Ballantyne2010, Petrovic2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Lee2023, Liu2023, Hao2024, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS533 2.73e-03 (24 in 8795) Ballantyne2010, Petrovic2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS570 7.78e-03 (74 in 9514) Ballantyne2010, LiYuan2018, Petrovic2018, Lin2020, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Lee2023, Liu2023, Hao2024, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS643 9.98e-04 (8 in 8018) Ballantyne2010, Petrovic2018, Ambrosio2020, Fu2020, Fan2020a, FanH2021, Lee2023, Liu2023, Maria Seidel, Josephine Purps, Patricia Entz, Carmen Krüger, Petra Otremba, Marion Nagy, Lutz Roewer, Chris Tyler-Smith, Wei Wei, Xue Y, Ayub Q, Mohyuddin A, Qamar R, Zerjal T, Helgason A, Mehdi SQ, Sheila Marie Tabulina Angustia +DYS627 1.40e-02 (111 in 7930) Ballantyne2010, LiYuan2018, Lin2020, Fan2020a, FanH2021, Otagiri2022, Liu2023, Hao2024 +DYS460 4.36e-03 (33 in 7563) Ballantyne2010, Lin2020, Fu2020, Fan2020a, FanH2021, Otagiri2022, Liu2023, Hao2024 +DYS518 1.08e-02 (83 in 7717) Ballantyne2010, LiYuan2018, Lin2020, Fan2020a, FanH2021, Otagiri2022, Liu2023, Hao2024 +DYS449 9.76e-03 (76 in 7784) Ballantyne2010, LiYuan2018, Lin2020, Fan2020a, FanH2021, Otagiri2022, Liu2023, Hao2024 +DYF387S1 6.47e-03 (77 in 11899) Ballantyne2010, LiYuan2018, Lin2020, Fan2020a, FanH2021, Otagiri2022, Liu2023 \ No newline at end of file diff --git a/scripts/migrate_variant_to_v2.sql b/scripts/migrate_variant_to_v2.sql deleted file mode 100644 index 8d4db420..00000000 --- a/scripts/migrate_variant_to_v2.sql +++ /dev/null @@ -1,171 +0,0 @@ --- ============================================================================= --- Migration Script: variant + variant_alias -> variant_v2 (OPTIMIZED) --- ============================================================================= --- Run this AFTER evolution 53.sql has been applied. --- --- Optimization Strategy: --- 1. Drop heavy GIN indexes before load. --- 2. Perform SINGLE INSERT with CTEs to aggregate coordinates and aliases in one pass. --- 3. Re-create indexes after load. --- --- Usage: --- psql -d your_database -f scripts/migrate_variant_to_v2.sql --- ============================================================================= - -BEGIN; - --- ============================================================================= --- Step 0: Pre-migration cleanup & Index Management --- ============================================================================= - --- Drop heavy GIN indexes to speed up massive insert --- We keep B-Tree indexes for basic unique constraints if needed, but GIN is the write killer -DROP INDEX IF EXISTS idx_variant_v2_aliases; -DROP INDEX IF EXISTS idx_variant_v2_coordinates; -DROP INDEX IF EXISTS idx_variant_v2_alias_search; - --- ============================================================================= --- Step 1: Combined Aggregation and Insert --- ============================================================================= - -INSERT INTO variant_v2 (canonical_name, mutation_type, naming_status, aliases, coordinates) -WITH - -- 1. Aggregate Coordinates (group by common_name) - coords_agg AS ( - SELECT - v.common_name as group_key, - MAX(v.variant_type) as mutation_type, - jsonb_object_agg( - COALESCE(gc.reference_genome, 'unknown'), - jsonb_build_object( - 'contig', COALESCE(gc.common_name, gc.accession), - 'position', v.position, - 'ref', v.reference_allele, - 'alt', v.alternate_allele - ) - ) as coordinates - FROM variant v - JOIN genbank_contig gc ON v.genbank_contig_id = gc.genbank_contig_id - WHERE v.common_name IS NOT NULL -- Optimization: Simplify by ignoring nameless/rs_id-only if not loaded - GROUP BY v.common_name - ), - -- 2. Aggregate Aliases: Sources (First, array per source) - alias_sources AS ( - SELECT - v.common_name as group_key, - va.source, - jsonb_agg(DISTINCT va.alias_value) as names - FROM variant v - JOIN variant_alias va ON v.variant_id = va.variant_id - WHERE v.common_name IS NOT NULL - AND va.source IS NOT NULL - GROUP BY v.common_name, va.source - ), - -- 3. Aggregate Aliases: Combine Source Arrays into Object - alias_sources_obj AS ( - SELECT - group_key, - jsonb_object_agg(source, names) as sources_json - FROM alias_sources - GROUP BY group_key - ), - -- 4. Aggregate Aliases: Common Names List - alias_commons AS ( - SELECT - v.common_name as group_key, - jsonb_agg(DISTINCT va.alias_value) FILTER (WHERE va.alias_type = 'common_name') as common_names - FROM variant v - JOIN variant_alias va ON v.variant_id = va.variant_id - WHERE v.common_name IS NOT NULL - GROUP BY v.common_name - ) -SELECT - c.group_key as canonical_name, - c.mutation_type, - 'NAMED' as naming_status, - jsonb_build_object( - 'common_names', COALESCE(al.common_names, '[]'::jsonb), - 'rs_ids', '[]'::jsonb, -- Simplified: No rs_ids loaded per user instruction - 'sources', COALESCE(aso.sources_json, '{}'::jsonb) - ) as aliases, - c.coordinates -FROM coords_agg c -LEFT JOIN alias_sources_obj aso ON c.group_key = aso.group_key -LEFT JOIN alias_commons al ON c.group_key = al.group_key; - --- Handle Unnamed variants (if any exist without common_name) - Optional pass if needed --- For now, focusing on the named migration as implied by "2.5 million rows" usually being the named set. - --- ============================================================================= --- Step 2: Update haplogroup_variant FK references (With Deduplication) --- ============================================================================= - --- Drop old FK constraint -ALTER TABLE tree.haplogroup_variant DROP CONSTRAINT IF EXISTS haplogroup_variant_variant_id_fkey; - --- 2a. Add temp column to store the new ID mapping -ALTER TABLE tree.haplogroup_variant ADD COLUMN IF NOT EXISTS target_v2_id INT; - --- 2b. Populate target_v2_id based on canonical name match --- Note: This Update is safe because it doesn't touch the constrained variant_id column yet -UPDATE tree.haplogroup_variant hv -SET target_v2_id = v2.variant_id -FROM variant v -JOIN variant_v2 v2 ON v2.canonical_name = v.common_name -WHERE hv.variant_id = v.variant_id; - --- 2c. Delete duplicates --- We keep the row with the lowest haplogroup_variant_id -DELETE FROM tree.haplogroup_variant hv_del -USING tree.haplogroup_variant hv_keep -WHERE hv_del.haplogroup_id = hv_keep.haplogroup_id - AND hv_del.target_v2_id = hv_keep.target_v2_id - AND hv_del.haplogroup_variant_id > hv_keep.haplogroup_variant_id; - --- 2d. Apply the update -UPDATE tree.haplogroup_variant -SET variant_id = target_v2_id -WHERE target_v2_id IS NOT NULL; - --- 2e. Cleanup -ALTER TABLE tree.haplogroup_variant DROP COLUMN target_v2_id; - --- Add new FK constraint -ALTER TABLE tree.haplogroup_variant - ADD CONSTRAINT haplogroup_variant_variant_id_fkey - FOREIGN KEY (variant_id) REFERENCES variant_v2(variant_id) ON DELETE CASCADE; - --- ============================================================================= --- Step 3: Re-create Indexes --- ============================================================================= - -CREATE INDEX idx_variant_v2_aliases ON variant_v2 USING GIN(aliases); -CREATE INDEX idx_variant_v2_coordinates ON variant_v2 USING GIN(coordinates); -CREATE INDEX idx_variant_v2_alias_search ON variant_v2 USING GIN((aliases->'common_names') jsonb_path_ops); - --- ============================================================================= --- Step 4: Verification --- ============================================================================= - -SELECT 'Old variant count:' as check_name, COUNT(*) as count FROM variant -UNION ALL -SELECT 'New variant_v2 count:', COUNT(*) FROM variant_v2 -UNION ALL -SELECT 'Old variant_alias count:', COUNT(*) FROM variant_alias -UNION ALL -SELECT 'haplogroup_variant count:', COUNT(*) FROM tree.haplogroup_variant; - --- Check for orphaned haplogroup_variant rows (should be 0) -SELECT 'Orphaned haplogroup_variant rows:' as check_name, COUNT(*) as count -FROM tree.haplogroup_variant hv -LEFT JOIN variant_v2 v2 ON hv.variant_id = v2.variant_id -WHERE v2.variant_id IS NULL; - -COMMIT; - --- ============================================================================= --- Step 5: Drop old tables (RUN MANUALLY AFTER VERIFICATION) --- ============================================================================= --- DROP TABLE IF EXISTS variant_alias CASCADE; --- DROP TABLE IF EXISTS variant CASCADE; --- DROP TABLE IF EXISTS str_marker CASCADE; \ No newline at end of file diff --git a/scripts/naming_functions.sql b/scripts/naming_functions.sql deleted file mode 100644 index a1ae24b6..00000000 --- a/scripts/naming_functions.sql +++ /dev/null @@ -1,31 +0,0 @@ --- ============================================================================= --- DU Naming Authority: Sequence and function for DecodingUs variant names --- Per ISOGG guidelines: No zero padding (DU1, DU2, ... not DU00001) --- ============================================================================= - - - --- Function to get next DU name (no zero padding per ISOGG request) -CREATE OR REPLACE FUNCTION next_du_name() RETURNS TEXT AS $func$ -BEGIN - RETURN 'DU' || nextval('du_variant_name_seq')::TEXT; -END; -$func$ LANGUAGE plpgsql; - --- Function to peek at current value without incrementing -CREATE OR REPLACE FUNCTION current_du_name() RETURNS TEXT AS $func$ -BEGIN - RETURN 'DU' || currval('du_variant_name_seq')::TEXT; -END; -$func$ LANGUAGE plpgsql; - --- Function to check if a name is a valid DU name -CREATE OR REPLACE FUNCTION is_du_name(name TEXT) RETURNS BOOLEAN AS $func$ -BEGIN - RETURN name ~ '^DU[1-9][0-9]*$'; -END; -$func$ LANGUAGE plpgsql; - -COMMENT ON FUNCTION next_du_name() IS 'Returns next available DU name (e.g., DU1, DU2, DU123)'; -COMMENT ON FUNCTION current_du_name() IS 'Returns current DU name without incrementing sequence'; -COMMENT ON FUNCTION is_du_name(TEXT) IS 'Validates if a name follows DU naming convention'; \ No newline at end of file diff --git a/test/actions/PdsAuthActionSpec.scala b/test/actions/PdsAuthActionSpec.scala deleted file mode 100644 index c5208537..00000000 --- a/test/actions/PdsAuthActionSpec.scala +++ /dev/null @@ -1,220 +0,0 @@ -package actions - -import helpers.ServiceSpec -import models.domain.pds.PdsNode -import org.apache.pekko.actor.ActorSystem -import org.apache.pekko.stream.Materializer -import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito.{reset, when} -import play.api.libs.json.Json -import play.api.mvc.* -import play.api.mvc.Results.Ok -import play.api.test.{FakeRequest, Helpers} -import play.api.test.Helpers.* -import repositories.PdsNodeRepository -import services.PdsSignatureVerifier - -import java.time.Instant -import scala.concurrent.Future - -class PdsAuthActionSpec extends ServiceSpec { - - implicit val system: ActorSystem = ActorSystem("test") - implicit val mat: Materializer = Materializer(system) - - val mockVerifier: PdsSignatureVerifier = mock[PdsSignatureVerifier] - val mockNodeRepo: PdsNodeRepository = mock[PdsNodeRepository] - val stubParser: BodyParsers.Default = mock[BodyParsers.Default] - - val testNode: PdsNode = PdsNode( - id = Some(1), - did = "did:plc:testnode", - pdsUrl = "https://pds.test.example.com", - softwareVersion = Some("0.1.0"), - status = "ONLINE" - ) - - val validTimestamp: String = Instant.now().toString - val validNonce: String = "test-nonce-123" - val validSignature: String = "dGVzdC1zaWduYXR1cmU=" - - override def beforeEach(): Unit = { - reset(mockVerifier, mockNodeRepo) - } - - override def afterEach(): Unit = { - super.afterEach() - } - - // Clean up actor system when tests complete - def cleanup(): Unit = system.terminate() - - private def buildAction(): PdsAuthAction = - new PdsAuthAction(stubParser, Helpers.stubControllerComponents(), mockVerifier, mockNodeRepo) - - private def buildRequest( - did: Option[String] = Some("did:plc:testnode"), - signature: Option[String] = Some(validSignature), - timestamp: Option[String] = Some(validTimestamp), - nonce: Option[String] = Some(validNonce) - ): FakeRequest[AnyContentAsEmpty.type] = { - var headers = Seq.empty[(String, String)] - did.foreach(d => headers :+= ("X-PDS-DID" -> d)) - signature.foreach(s => headers :+= ("X-PDS-Signature" -> s)) - timestamp.foreach(t => headers :+= ("X-PDS-Timestamp" -> t)) - nonce.foreach(n => headers :+= ("X-PDS-Nonce" -> n)) - FakeRequest("POST", "/api/pds/heartbeat").withHeaders(headers*) - } - - private def successBlock(request: PdsAuthRequest[?]): Future[Result] = - Future.successful(Ok(Json.obj("node" -> request.pdsNode.did))) - - "PdsAuthAction" should { - - "reject request missing DID header" in { - val action = buildAction() - val request = buildRequest(did = None) - val result = action.invokeBlock(request, successBlock) - status(result) mustBe UNAUTHORIZED - (contentAsJson(result) \ "error").as[String] must include("Missing required authentication headers") - } - - "reject request missing Signature header" in { - val action = buildAction() - val request = buildRequest(signature = None) - val result = action.invokeBlock(request, successBlock) - status(result) mustBe UNAUTHORIZED - } - - "reject request missing Timestamp header" in { - val action = buildAction() - val request = buildRequest(timestamp = None) - val result = action.invokeBlock(request, successBlock) - status(result) mustBe UNAUTHORIZED - } - - "reject request with expired timestamp" in { - val action = buildAction() - when(mockVerifier.isTimestampValid(any[String])).thenReturn(false) - - val request = buildRequest() - val result = action.invokeBlock(request, successBlock) - status(result) mustBe UNAUTHORIZED - (contentAsJson(result) \ "error").as[String] must include("timestamp") - } - - "reject request with duplicate nonce" in { - val action = buildAction() - when(mockVerifier.isTimestampValid(any[String])).thenReturn(true) - when(mockVerifier.checkAndRecordNonce(any[String])).thenReturn(false) - - val request = buildRequest() - val result = action.invokeBlock(request, successBlock) - status(result) mustBe UNAUTHORIZED - (contentAsJson(result) \ "error").as[String] must include("Nonce") - } - - "reject request from unregistered PDS node" in { - val action = buildAction() - when(mockVerifier.isTimestampValid(any[String])).thenReturn(true) - when(mockVerifier.checkAndRecordNonce(any[String])).thenReturn(true) - when(mockNodeRepo.findByDid("did:plc:testnode")).thenReturn(Future.successful(None)) - - val request = buildRequest() - val result = action.invokeBlock(request, successBlock) - status(result) mustBe UNAUTHORIZED - (contentAsJson(result) \ "error").as[String] must include("not registered") - } - - "reject request with invalid signature" in { - val action = buildAction() - when(mockVerifier.isTimestampValid(any[String])).thenReturn(true) - when(mockVerifier.checkAndRecordNonce(any[String])).thenReturn(true) - when(mockNodeRepo.findByDid("did:plc:testnode")).thenReturn(Future.successful(Some(testNode))) - when(mockVerifier.hashBody(any())).thenReturn("body-hash") - when(mockVerifier.buildSigningInput(any[String], any[String], any[String], any[String], any())) - .thenReturn("signing-input") - when(mockVerifier.verifySignature(any[String], any[String], any[String])) - .thenReturn(Future.successful(false)) - - val request = buildRequest() - val result = action.invokeBlock(request, successBlock) - status(result) mustBe UNAUTHORIZED - (contentAsJson(result) \ "error").as[String] must include("Invalid signature") - } - - "allow request with valid signature and registered node" in { - val action = buildAction() - when(mockVerifier.isTimestampValid(any[String])).thenReturn(true) - when(mockVerifier.checkAndRecordNonce(any[String])).thenReturn(true) - when(mockNodeRepo.findByDid("did:plc:testnode")).thenReturn(Future.successful(Some(testNode))) - when(mockVerifier.hashBody(any())).thenReturn("body-hash") - when(mockVerifier.buildSigningInput(any[String], any[String], any[String], any[String], any())) - .thenReturn("signing-input") - when(mockVerifier.verifySignature(any[String], any[String], any[String])) - .thenReturn(Future.successful(true)) - - val request = buildRequest() - val result = action.invokeBlock(request, successBlock) - status(result) mustBe OK - (contentAsJson(result) \ "node").as[String] mustBe "did:plc:testnode" - } - - "pass the authenticated PDS node to the block" in { - val action = buildAction() - when(mockVerifier.isTimestampValid(any[String])).thenReturn(true) - when(mockVerifier.checkAndRecordNonce(any[String])).thenReturn(true) - when(mockNodeRepo.findByDid("did:plc:testnode")).thenReturn(Future.successful(Some(testNode))) - when(mockVerifier.hashBody(any())).thenReturn("body-hash") - when(mockVerifier.buildSigningInput(any[String], any[String], any[String], any[String], any())) - .thenReturn("signing-input") - when(mockVerifier.verifySignature(any[String], any[String], any[String])) - .thenReturn(Future.successful(true)) - - var capturedNode: Option[PdsNode] = None - val captureBlock: PdsAuthRequest[?] => Future[Result] = { req => - capturedNode = Some(req.pdsNode) - Future.successful(Ok) - } - - val request = buildRequest() - action.invokeBlock(request, captureBlock).futureValue - - capturedNode mustBe defined - capturedNode.get.did mustBe "did:plc:testnode" - capturedNode.get.id mustBe Some(1) - } - - "work without nonce header" in { - val action = buildAction() - when(mockVerifier.isTimestampValid(any[String])).thenReturn(true) - when(mockNodeRepo.findByDid("did:plc:testnode")).thenReturn(Future.successful(Some(testNode))) - when(mockVerifier.hashBody(any())).thenReturn("body-hash") - when(mockVerifier.buildSigningInput(any[String], any[String], any[String], any[String], any())) - .thenReturn("signing-input") - when(mockVerifier.verifySignature(any[String], any[String], any[String])) - .thenReturn(Future.successful(true)) - - val request = buildRequest(nonce = None) - val result = action.invokeBlock(request, successBlock) - status(result) mustBe OK - } - - "handle signature verification errors gracefully" in { - val action = buildAction() - when(mockVerifier.isTimestampValid(any[String])).thenReturn(true) - when(mockVerifier.checkAndRecordNonce(any[String])).thenReturn(true) - when(mockNodeRepo.findByDid("did:plc:testnode")).thenReturn(Future.successful(Some(testNode))) - when(mockVerifier.hashBody(any())).thenReturn("body-hash") - when(mockVerifier.buildSigningInput(any[String], any[String], any[String], any[String], any())) - .thenReturn("signing-input") - when(mockVerifier.verifySignature(any[String], any[String], any[String])) - .thenReturn(Future.failed(new RuntimeException("DID resolution timeout"))) - - val request = buildRequest() - val result = action.invokeBlock(request, successBlock) - status(result) mustBe INTERNAL_SERVER_ERROR - (contentAsJson(result) \ "error").as[String] must include("Authentication error") - } - } -} diff --git a/test/controllers/ExternalBiosampleControllerSpec.scala b/test/controllers/ExternalBiosampleControllerSpec.scala deleted file mode 100644 index 945d3a88..00000000 --- a/test/controllers/ExternalBiosampleControllerSpec.scala +++ /dev/null @@ -1,219 +0,0 @@ -package controllers - -import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito.{reset, verify, when} -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.ScalaFutures -import org.scalatestplus.mockito.MockitoSugar -import org.scalatestplus.play.PlaySpec -import org.scalatestplus.play.guice.GuiceOneAppPerSuite -import play.api.Application -import play.api.inject.bind -import play.api.inject.guice.GuiceApplicationBuilder -import play.api.libs.json.Json -import play.api.test.Helpers.* -import play.api.test.{FakeRequest, Injecting} -import models.api.ExternalBiosampleRequest -import services.* - -import java.util.UUID -import scala.concurrent.Future - -class ExternalBiosampleControllerSpec extends PlaySpec - with GuiceOneAppPerSuite - with Injecting - with MockitoSugar - with ScalaFutures - with BeforeAndAfterEach { - - val mockDomainService: BiosampleDomainService = mock[BiosampleDomainService] - - override def fakeApplication(): Application = { - new GuiceApplicationBuilder() - .configure( - "play.evolutions.enabled" -> false, - "play.modules.disabled.0" -> "modules.StartupModule", - "api.key.enabled" -> false, - "slick.dbs.default.profile" -> "slick.jdbc.H2Profile$", - "slick.dbs.default.db.driver" -> "org.h2.Driver", - "slick.dbs.default.db.url" -> "jdbc:h2:mem:test_ctrl;MODE=PostgreSQL;DATABASE_TO_UPPER=FALSE;DB_CLOSE_DELAY=-1", - "slick.dbs.default.db.username" -> "sa", - "slick.dbs.default.db.password" -> "", - "slick.dbs.metadata.profile" -> "slick.jdbc.H2Profile$", - "slick.dbs.metadata.db.driver" -> "org.h2.Driver", - "slick.dbs.metadata.db.url" -> "jdbc:h2:mem:test_ctrl_meta;MODE=PostgreSQL;DATABASE_TO_UPPER=FALSE;DB_CLOSE_DELAY=-1", - "slick.dbs.metadata.db.username" -> "sa", - "slick.dbs.metadata.db.password" -> "", - "pekko.quartz.schedules" -> Map.empty - ) - .overrides( - bind[BiosampleDomainService].toInstance(mockDomainService) - ) - .build() - } - - override def beforeEach(): Unit = { - reset(mockDomainService) - } - - val testGuid: UUID = UUID.randomUUID() - - val validRequestJson = Json.obj( - "sampleAccession" -> "SAMEA001", - "sourceSystem" -> "test", - "description" -> "Test sample", - "alias" -> "alias1", - "centerName" -> "TestCenter", - "sequenceData" -> Json.obj( - "reads" -> 1000, - "readLength" -> 150, - "coverage" -> 30.0, - "platformName" -> "Illumina", - "testType" -> "WGS", - "files" -> Json.arr() - ) - ) - - "ExternalBiosampleController" should { - - "return 201 Created on successful biosample creation" in { - when(mockDomainService.createExternalBiosample(any[ExternalBiosampleRequest])) - .thenReturn(Future.successful(testGuid)) - - val request = FakeRequest(POST, "/api/private/external/biosamples") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(validRequestJson) - - val result = route(app, request).get - - status(result) mustBe CREATED - val json = contentAsJson(result) - (json \ "status").as[String] mustBe "success" - (json \ "guid").as[String] mustBe testGuid.toString - } - - "return 409 Conflict for duplicate accession" in { - when(mockDomainService.createExternalBiosample(any[ExternalBiosampleRequest])) - .thenReturn(Future.failed(DuplicateAccessionException("SAMEA001"))) - - val request = FakeRequest(POST, "/api/private/external/biosamples") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(validRequestJson) - - val result = route(app, request).get - - status(result) mustBe CONFLICT - val json = contentAsJson(result) - (json \ "error").as[String] mustBe "Duplicate accession" - } - - "return 400 for invalid coordinates" in { - when(mockDomainService.createExternalBiosample(any[ExternalBiosampleRequest])) - .thenReturn(Future.failed(InvalidCoordinatesException(999.0, 999.0))) - - val request = FakeRequest(POST, "/api/private/external/biosamples") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(validRequestJson) - - val result = route(app, request).get - - status(result) mustBe BAD_REQUEST - val json = contentAsJson(result) - (json \ "error").as[String] mustBe "Invalid coordinates" - } - - "return 400 for sequence data validation error" in { - when(mockDomainService.createExternalBiosample(any[ExternalBiosampleRequest])) - .thenReturn(Future.failed(SequenceDataValidationException("Invalid format"))) - - val request = FakeRequest(POST, "/api/private/external/biosamples") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(validRequestJson) - - val result = route(app, request).get - - status(result) mustBe BAD_REQUEST - val json = contentAsJson(result) - (json \ "error").as[String] mustBe "Invalid sequence data" - } - - "return 400 for publication linkage error" in { - when(mockDomainService.createExternalBiosample(any[ExternalBiosampleRequest])) - .thenReturn(Future.failed(PublicationLinkageException("DOI not found"))) - - val request = FakeRequest(POST, "/api/private/external/biosamples") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(validRequestJson) - - val result = route(app, request).get - - status(result) mustBe BAD_REQUEST - val json = contentAsJson(result) - (json \ "error").as[String] mustBe "Publication linkage failed" - } - - "return 500 for unexpected exceptions" in { - when(mockDomainService.createExternalBiosample(any[ExternalBiosampleRequest])) - .thenReturn(Future.failed(new RuntimeException("unexpected"))) - - val request = FakeRequest(POST, "/api/private/external/biosamples") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(validRequestJson) - - val result = route(app, request).get - - status(result) mustBe INTERNAL_SERVER_ERROR - val json = contentAsJson(result) - (json \ "message").as[String] must include("unexpected error") - } - - "return 400 for missing required fields" in { - val incomplete = Json.obj( - "sampleAccession" -> "SAMEA001" - // Missing sourceSystem, description, centerName, sequenceData - ) - - val request = FakeRequest(POST, "/api/private/external/biosamples") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(incomplete) - - val result = route(app, request).get - - status(result) mustBe BAD_REQUEST - } - - "return 400 for malformed JSON" in { - val request = FakeRequest(POST, "/api/private/external/biosamples") - .withHeaders("Content-Type" -> "application/json") - .withBody("{invalid json") - - val result = route(app, request).get - - status(result) mustBe BAD_REQUEST - } - - "return 415 for non-JSON content type" in { - val request = FakeRequest(POST, "/api/private/external/biosamples") - .withHeaders("Content-Type" -> "text/plain") - .withBody("not json") - - val result = route(app, request).get - - status(result) mustBe UNSUPPORTED_MEDIA_TYPE - } - - "pass request body to domain service" in { - when(mockDomainService.createExternalBiosample(any[ExternalBiosampleRequest])) - .thenReturn(Future.successful(testGuid)) - - val request = FakeRequest(POST, "/api/private/external/biosamples") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(validRequestJson) - - val result = route(app, request).get - - status(result) mustBe CREATED - verify(mockDomainService).createExternalBiosample(any[ExternalBiosampleRequest]) - } - } -} diff --git a/test/controllers/GenomeRegionsApiControllerSpec.scala b/test/controllers/GenomeRegionsApiControllerSpec.scala deleted file mode 100644 index f40df595..00000000 --- a/test/controllers/GenomeRegionsApiControllerSpec.scala +++ /dev/null @@ -1,109 +0,0 @@ -package controllers - -import org.scalatestplus.play.* -import org.scalatestplus.play.guice.* -import play.api.test.* -import play.api.test.Helpers.* -import play.api.Application -import play.api.inject.guice.GuiceApplicationBuilder -import play.api.libs.json.Json - -class GenomeRegionsApiControllerSpec extends PlaySpec with GuiceOneAppPerSuite with Injecting { - - override def fakeApplication(): Application = { - new GuiceApplicationBuilder() - .configure("play.evolutions.enabled" -> false) - .build() - } - - "GenomeRegionsApiController" should { - - "return list of supported builds" in { - val request = FakeRequest(GET, "/api/v1/genome-regions") - val result = route(app, request).get - - status(result) mustBe OK - contentType(result) mustBe Some("application/json") - - val json = contentAsJson(result) - (json \ "supportedBuilds").as[Seq[String]] must contain allOf ("GRCh37", "GRCh38", "hs1") - } - - "return 404 for unknown build" in { - val request = FakeRequest(GET, "/api/v1/genome-regions/unknown_build") - val result = route(app, request).get - - status(result) mustBe NOT_FOUND - contentType(result) mustBe Some("application/json") - - val json = contentAsJson(result) - (json \ "error").as[String] mustBe "Unknown build" - (json \ "supportedBuilds").as[Seq[String]] must contain allOf ("GRCh37", "GRCh38", "hs1") - } - - "return genome regions for GRCh38" in { - val request = FakeRequest(GET, "/api/v1/genome-regions/GRCh38") - val result = route(app, request).get - - status(result) mustBe OK - contentType(result) mustBe Some("application/json") - - val json = contentAsJson(result) - (json \ "build").as[String] mustBe "GRCh38" - (json \ "version").asOpt[String] mustBe defined - (json \ "generatedAt").asOpt[String] mustBe defined - // chromosomes is present as an object - (json \ "chromosomes").toOption mustBe defined - } - - "resolve hg38 alias to GRCh38" in { - val request = FakeRequest(GET, "/api/v1/genome-regions/hg38") - val result = route(app, request).get - - status(result) mustBe OK - - val json = contentAsJson(result) - (json \ "build").as[String] mustBe "GRCh38" - } - - "resolve hg19 alias to GRCh37" in { - val request = FakeRequest(GET, "/api/v1/genome-regions/hg19") - val result = route(app, request).get - - status(result) mustBe OK - - val json = contentAsJson(result) - (json \ "build").as[String] mustBe "GRCh37" - } - - "resolve chm13 alias to hs1" in { - val request = FakeRequest(GET, "/api/v1/genome-regions/chm13") - val result = route(app, request).get - - status(result) mustBe OK - - val json = contentAsJson(result) - (json \ "build").as[String] mustBe "hs1" - } - - "include Cache-Control header" in { - val request = FakeRequest(GET, "/api/v1/genome-regions/GRCh38") - val result = route(app, request).get - - status(result) mustBe OK - val responseHeaders = headers(result) - responseHeaders.get("Cache-Control") mustBe defined - responseHeaders("Cache-Control") must include("max-age=") - } - - "include Vary header for content negotiation" in { - val request = FakeRequest(GET, "/api/v1/genome-regions/GRCh38") - val result = route(app, request).get - - status(result) mustBe OK - val responseHeaders = headers(result) - responseHeaders.get("Vary") mustBe defined - responseHeaders("Vary") must include("Accept-Encoding") - } - } -} diff --git a/test/controllers/HaplogroupTreeMergeControllerSpec.scala b/test/controllers/HaplogroupTreeMergeControllerSpec.scala deleted file mode 100644 index 75c56855..00000000 --- a/test/controllers/HaplogroupTreeMergeControllerSpec.scala +++ /dev/null @@ -1,520 +0,0 @@ -package controllers - -import actions.ApiSecurityAction -import models.HaplogroupType -import models.api.haplogroups.* -import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito.{reset, verify, when} -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.ScalaFutures -import org.scalatestplus.mockito.MockitoSugar -import org.scalatestplus.play.PlaySpec -import org.scalatestplus.play.guice.GuiceOneAppPerSuite -import play.api.Application -import play.api.inject.bind -import play.api.inject.guice.GuiceApplicationBuilder -import play.api.libs.json.Json -import play.api.mvc.Results -import play.api.test.Helpers.* -import play.api.test.{FakeRequest, Injecting} -import services.HaplogroupTreeMergeService - -import scala.concurrent.{ExecutionContext, Future} - -class HaplogroupTreeMergeControllerSpec extends PlaySpec - with GuiceOneAppPerSuite - with Injecting - with MockitoSugar - with ScalaFutures - with BeforeAndAfterEach { - - // Mock service - val mockMergeService: HaplogroupTreeMergeService = mock[HaplogroupTreeMergeService] - - override def fakeApplication(): Application = { - new GuiceApplicationBuilder() - .configure( - "play.evolutions.enabled" -> false, - "api.key.enabled" -> false // Disable API key for testing - ) - .overrides( - bind[HaplogroupTreeMergeService].toInstance(mockMergeService) - ) - .build() - } - - override def beforeEach(): Unit = { - reset(mockMergeService) - } - - // Test fixtures - def createSuccessResponse(nodesCreated: Int = 5): TreeMergeResponse = TreeMergeResponse( - success = true, - message = "Merge completed successfully", - statistics = MergeStatistics( - nodesProcessed = 10, - nodesCreated = nodesCreated, - nodesUpdated = 3, - nodesUnchanged = 2, - variantsAdded = 20, - variantsUpdated = 5, - relationshipsCreated = 4, - relationshipsUpdated = 1, - splitOperations = 0 - ) - ) - - def createPreviewResponse(): MergePreviewResponse = MergePreviewResponse( - statistics = MergeStatistics(10, 5, 3, 2, 20, 5, 4, 1, 0), - conflicts = List.empty, - splits = List.empty, - ambiguities = List.empty, - newNodes = List("NewNode1", "NewNode2"), - updatedNodes = List("UpdatedNode1"), - unchangedNodes = List("UnchangedNode1") - ) - - "HaplogroupTreeMergeController" should { - - // ========================================================================= - // mergeFullTree endpoint tests - // ========================================================================= - - "return 202 Accepted for full tree merge request" in { - when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) - .thenReturn(Future.successful(createSuccessResponse())) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "sourceTree" -> Json.obj( - "name" -> "R1b", - "variants" -> Json.arr(Json.obj("name" -> "M269")) - ), - "sourceName" -> "ytree.net" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe ACCEPTED - contentType(result) mustBe Some("application/json") - - val json = contentAsJson(result) - (json \ "status").as[String] mustBe "Processing" - } - - "return 202 Accepted even for failed merge (fire-and-forget)" in { - // With fire-and-forget pattern, the controller always returns 202 immediately - // Errors are logged in the background, not returned to the client - val failureResponse = TreeMergeResponse.failure( - "Merge validation failed", - List("Invalid tree structure") - ) - when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) - .thenReturn(Future.successful(failureResponse)) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "sourceTree" -> Json.obj("name" -> "Invalid"), - "sourceName" -> "test" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe ACCEPTED - val json = contentAsJson(result) - (json \ "status").as[String] mustBe "Processing" - } - - "reject invalid haplogroup type in JSON body" in { - val requestBody = Json.obj( - "haplogroupType" -> "INVALID_TYPE", - "sourceTree" -> Json.obj("name" -> "Test"), - "sourceName" -> "test" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - // The JSON parsing throws an exception for invalid HaplogroupType - // which propagates through Play's JSON body parser - an[IllegalArgumentException] must be thrownBy { - val result = route(app, request).get - status(result) - } - } - - "return 400 for missing required fields" in { - val requestBody = Json.obj( - "haplogroupType" -> "Y" - // Missing sourceTree and sourceName - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe BAD_REQUEST - } - - "return 202 Accepted even for service exceptions (fire-and-forget)" in { - // With fire-and-forget pattern, exceptions are logged in the background - // The client still receives 202 Accepted immediately - when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) - .thenReturn(Future.failed(new RuntimeException("Database connection failed"))) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "sourceTree" -> Json.obj("name" -> "Test"), - "sourceName" -> "test" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe ACCEPTED - val json = contentAsJson(result) - (json \ "status").as[String] mustBe "Processing" - } - - "pass through all request parameters to service" in { - when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) - .thenReturn(Future.successful(createSuccessResponse())) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "sourceTree" -> Json.obj( - "name" -> "R1b", - "variants" -> Json.arr(Json.obj("name" -> "M269")), - "formedYbp" -> 4500 - ), - "sourceName" -> "ytree.net", - "priorityConfig" -> Json.obj( - "sourcePriorities" -> Json.arr("ytree.net", "ISOGG") - ), - "conflictStrategy" -> "higher_priority_wins", - "dryRun" -> true - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe ACCEPTED - verify(mockMergeService).mergeFullTree(any[TreeMergeRequest]) - } - - // ========================================================================= - // mergeSubtree endpoint tests - // ========================================================================= - - "return 202 Accepted for subtree merge request" in { - when(mockMergeService.mergeSubtree(any[SubtreeMergeRequest])) - .thenReturn(Future.successful(createSuccessResponse())) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "anchorHaplogroupName" -> "R1b", - "sourceTree" -> Json.obj( - "name" -> "R1b-L21", - "variants" -> Json.arr(Json.obj("name" -> "L21")) - ), - "sourceName" -> "ytree.net" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/subtree") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe ACCEPTED - val json = contentAsJson(result) - (json \ "status").as[String] mustBe "Processing" - } - - "return 202 Accepted even when anchor haplogroup error occurs (fire-and-forget)" in { - // With fire-and-forget pattern, validation errors occur in the background - // The client still receives 202 Accepted immediately - when(mockMergeService.mergeSubtree(any[SubtreeMergeRequest])) - .thenReturn(Future.failed(new IllegalArgumentException("Anchor haplogroup 'NONEXISTENT' not found"))) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "anchorHaplogroupName" -> "NONEXISTENT", - "sourceTree" -> Json.obj("name" -> "Test"), - "sourceName" -> "test" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/subtree") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe ACCEPTED - val json = contentAsJson(result) - (json \ "status").as[String] mustBe "Processing" - } - - "return 400 for missing anchorHaplogroupName" in { - val requestBody = Json.obj( - "haplogroupType" -> "Y", - // Missing anchorHaplogroupName - "sourceTree" -> Json.obj("name" -> "Test"), - "sourceName" -> "test" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/subtree") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe BAD_REQUEST - } - - // ========================================================================= - // previewMerge endpoint tests - // ========================================================================= - - "return 200 for preview request" in { - when(mockMergeService.previewMerge(any[MergePreviewRequest])) - .thenReturn(Future.successful(createPreviewResponse())) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "sourceTree" -> Json.obj( - "name" -> "R1b", - "variants" -> Json.arr(Json.obj("name" -> "M269")) - ), - "sourceName" -> "ytree.net" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/preview") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe OK - val json = contentAsJson(result) - (json \ "newNodes").as[List[String]] must contain("NewNode1") - (json \ "statistics" \ "nodesProcessed").as[Int] mustBe 10 - } - - "return preview with conflicts" in { - val previewWithConflicts = MergePreviewResponse( - statistics = MergeStatistics(10, 5, 3, 2, 20, 5, 4, 1, 0), - conflicts = List( - MergeConflict( - haplogroupName = "R1b-L21", - field = "formedYbp", - existingValue = "4500", - newValue = "4800", - resolution = "will_update", - existingSource = "ISOGG", - newSource = "ytree.net" - ) - ), - splits = List.empty, - ambiguities = List.empty, - newNodes = List.empty, - updatedNodes = List("R1b-L21"), - unchangedNodes = List.empty - ) - - when(mockMergeService.previewMerge(any[MergePreviewRequest])) - .thenReturn(Future.successful(previewWithConflicts)) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "sourceTree" -> Json.obj("name" -> "R1b-L21", "formedYbp" -> 4800), - "sourceName" -> "ytree.net" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/preview") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe OK - val json = contentAsJson(result) - (json \ "conflicts").as[List[MergeConflict]] must have size 1 - (json \ "conflicts" \ 0 \ "field").as[String] mustBe "formedYbp" - } - - "accept preview with optional anchor" in { - when(mockMergeService.previewMerge(any[MergePreviewRequest])) - .thenReturn(Future.successful(createPreviewResponse())) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "anchorHaplogroupName" -> "R1b", - "sourceTree" -> Json.obj("name" -> "R1b-L21"), - "sourceName" -> "ytree.net" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/preview") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe OK - } - - "handle preview service exceptions" in { - when(mockMergeService.previewMerge(any[MergePreviewRequest])) - .thenReturn(Future.failed(new RuntimeException("Index build failed"))) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "sourceTree" -> Json.obj("name" -> "Test"), - "sourceName" -> "test" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/preview") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe INTERNAL_SERVER_ERROR - } - - // ========================================================================= - // MT DNA tests - // ========================================================================= - - "handle MT DNA haplogroup type" in { - when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) - .thenReturn(Future.successful(createSuccessResponse())) - - val requestBody = Json.obj( - "haplogroupType" -> "MT", - "sourceTree" -> Json.obj( - "name" -> "H1", - "variants" -> Json.arr(Json.obj("name" -> "H1-defining")) - ), - "sourceName" -> "mtDNA-tree" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe ACCEPTED - } - - // ========================================================================= - // Complex tree structure tests - // ========================================================================= - - "handle deeply nested tree in request" in { - when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) - .thenReturn(Future.successful(createSuccessResponse(nodesCreated = 10))) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "sourceTree" -> Json.obj( - "name" -> "R1b", - "variants" -> Json.arr(Json.obj("name" -> "M269")), - "children" -> Json.arr( - Json.obj( - "name" -> "R1b-L21", - "variants" -> Json.arr(Json.obj("name" -> "L21")), - "children" -> Json.arr( - Json.obj( - "name" -> "R1b-DF13", - "variants" -> Json.arr(Json.obj("name" -> "DF13")), - "children" -> Json.arr( - Json.obj( - "name" -> "R1b-Z39589", - "variants" -> Json.arr(Json.obj("name" -> "Z39589")) - ) - ) - ) - ) - ) - ) - ), - "sourceName" -> "ytree.net" - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe ACCEPTED - } - - // ========================================================================= - // Dry run tests - // ========================================================================= - - "handle dry run request" in { - when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) - .thenReturn(Future.successful(createSuccessResponse())) - - val requestBody = Json.obj( - "haplogroupType" -> "Y", - "sourceTree" -> Json.obj("name" -> "Test"), - "sourceName" -> "test", - "dryRun" -> true - ) - - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "application/json") - .withJsonBody(requestBody) - - val result = route(app, request).get - - status(result) mustBe ACCEPTED - } - - // ========================================================================= - // Content-Type tests - // ========================================================================= - - "return 415 for non-JSON content type" in { - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "text/plain") - .withBody("not json") - - val result = route(app, request).get - - status(result) mustBe UNSUPPORTED_MEDIA_TYPE - } - - "return 400 for malformed JSON" in { - val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") - .withHeaders("Content-Type" -> "application/json") - .withBody("{invalid json") - - val result = route(app, request).get - - status(result) mustBe BAD_REQUEST - } - } -} diff --git a/test/controllers/HomeControllerSpec.scala b/test/controllers/HomeControllerSpec.scala deleted file mode 100644 index e1e26060..00000000 --- a/test/controllers/HomeControllerSpec.scala +++ /dev/null @@ -1,90 +0,0 @@ -package controllers - -import org.scalatestplus.play.* -import org.scalatestplus.play.guice.* -import play.api.test.* -import play.api.test.Helpers.* -import org.scalatest.BeforeAndAfterAll -import play.api.Application -import play.api.inject.guice.GuiceApplicationBuilder - -class HomeControllerSpec extends PlaySpec with GuiceOneAppPerSuite with Injecting with BeforeAndAfterAll { - - override def fakeApplication(): Application = { - new GuiceApplicationBuilder() - .configure("play.evolutions.enabled" -> false) - .build() - } - - "HomeController" should { - - "render the index page" in { - val request = FakeRequest(GET, "/") - val home = route(app, request).get - - status(home) mustBe OK - contentType(home) mustBe Some("text/html") - contentAsString(home) must include ("Decoding Us") - } - - "render the cookie usage page" in { - val request = FakeRequest(GET, "/cookie-usage") - val page = route(app, request).get - - status(page) mustBe OK - contentType(page) mustBe Some("text/html") - contentAsString(page) must include ("Cookie Usage") - } - - "render the privacy policy page" in { - val request = FakeRequest(GET, "/privacy") - val page = route(app, request).get - - status(page) mustBe OK - contentType(page) mustBe Some("text/html") - contentAsString(page) must include ("Privacy Policy") - } - - "render the terms of use page" in { - val request = FakeRequest(GET, "/terms") - val page = route(app, request).get - - status(page) mustBe OK - contentType(page) mustBe Some("text/html") - contentAsString(page) must include ("Terms of Use") - } - - "render the faq page" in { - val request = FakeRequest(GET, "/faq") - val page = route(app, request).get - - status(page) mustBe OK - contentType(page) mustBe Some("text/html") - contentAsString(page) must include ("FAQ") - } - - "generate a sitemap" in { - val request = FakeRequest(GET, "/sitemap.xml").withHeaders("X-Forwarded-Proto" -> "https") - val sitemap = route(app, request).get - - status(sitemap) mustBe OK - contentType(sitemap) mustBe Some("application/xml") - val content = contentAsString(sitemap) - content must startWith("") - content must include ("https://${request.host}/") - content must include ("") - } - - "generate a robots.txt" in { - val request = FakeRequest(GET, "/robots.txt").withHeaders("X-Forwarded-Proto" -> "https") - val robots = route(app, request).get - - status(robots) mustBe OK - contentType(robots) mustBe Some("text/plain") - val content = contentAsString(robots) - content must include ("User-agent: *") - content must include (s"Sitemap: https://${request.host}/sitemap.xml") - } - } -} diff --git a/test/controllers/InstrumentProposalControllerSpec.scala b/test/controllers/InstrumentProposalControllerSpec.scala deleted file mode 100644 index 205b2e9e..00000000 --- a/test/controllers/InstrumentProposalControllerSpec.scala +++ /dev/null @@ -1,199 +0,0 @@ -package controllers - -import helpers.ServiceSpec -import models.domain.genomics.* -import org.mockito.ArgumentMatchers.{any, eq as meq} -import org.mockito.Mockito.{reset, verify, when} -import play.api.libs.json.Json -import play.api.test.FakeRequest -import play.api.test.Helpers.* -import repositories.{InstrumentObservationRepository, InstrumentProposalRepository} -import services.{ConflictingLab, InstrumentConflict, InstrumentProposalService} - -import java.time.LocalDateTime -import scala.concurrent.Future - -class InstrumentProposalControllerSpec extends ServiceSpec { - - val mockProposalService: InstrumentProposalService = mock[InstrumentProposalService] - val mockProposalRepo: InstrumentProposalRepository = mock[InstrumentProposalRepository] - val mockObservationRepo: InstrumentObservationRepository = mock[InstrumentObservationRepository] - - override def beforeEach(): Unit = { - reset(mockProposalService, mockProposalRepo, mockObservationRepo) - } - - val sampleProposal: InstrumentAssociationProposal = InstrumentAssociationProposal( - id = Some(1), - instrumentId = "A00123", - proposedLabName = "Dante Labs", - observationCount = 7, - distinctCitizenCount = 4, - confidenceScore = 0.85, - status = ProposalStatus.ReadyForReview - ) - - val sampleObservation: InstrumentObservation = InstrumentObservation( - id = Some(1), - atUri = "at://did:plc:abc/us.decoding.instrument.observation/1", - instrumentId = "A00123", - labName = "Dante Labs", - biosampleRef = "at://did:plc:abc/us.decoding.biosample/1", - confidence = ObservationConfidence.Known - ) - - "InstrumentProposalController" should { - - "list pending proposals by default" in { - when(mockProposalRepo.findPending()) - .thenReturn(Future.successful(Seq(sampleProposal))) - - whenReady(mockProposalRepo.findPending()) { proposals => - proposals must have size 1 - proposals.head.instrumentId mustBe "A00123" - } - } - - "list proposals filtered by status" in { - when(mockProposalRepo.findByStatus(ProposalStatus.ReadyForReview)) - .thenReturn(Future.successful(Seq(sampleProposal))) - - whenReady(mockProposalRepo.findByStatus(ProposalStatus.ReadyForReview)) { proposals => - proposals must have size 1 - proposals.head.status mustBe ProposalStatus.ReadyForReview - } - } - - "get proposal detail with observations" in { - when(mockProposalRepo.findById(1)) - .thenReturn(Future.successful(Some(sampleProposal))) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(Seq(sampleObservation, sampleObservation.copy( - id = Some(2), - atUri = "at://did:plc:def/us.decoding.instrument.observation/2", - biosampleRef = "at://did:plc:def/us.decoding.biosample/1" - )))) - - whenReady(mockProposalRepo.findById(1)) { proposalOpt => - proposalOpt mustBe defined - val proposal = proposalOpt.get - proposal.instrumentId mustBe "A00123" - - whenReady(mockObservationRepo.findByInstrumentId(proposal.instrumentId)) { observations => - observations must have size 2 - observations.map(_.biosampleRef).distinct must have size 2 - } - } - } - - "return not found for nonexistent proposal detail" in { - when(mockProposalRepo.findById(99)) - .thenReturn(Future.successful(None)) - - whenReady(mockProposalRepo.findById(99)) { result => - result mustBe None - } - } - - "accept a proposal via service" in { - val accepted = sampleProposal.copy( - status = ProposalStatus.Accepted, - reviewedBy = Some("curator@test.com"), - reviewNotes = Some("Confirmed"), - acceptedLabId = Some(10) - ) - when(mockProposalService.acceptProposal( - meq(1), meq("curator@test.com"), meq("Dante Labs"), meq(None), meq(None), meq(Some("Confirmed")) - )).thenReturn(Future.successful(Right(accepted))) - - whenReady(mockProposalService.acceptProposal(1, "curator@test.com", "Dante Labs", None, None, Some("Confirmed"))) { result => - result mustBe a[Right[?, ?]] - val proposal = result.toOption.get - proposal.status mustBe ProposalStatus.Accepted - proposal.reviewedBy mustBe Some("curator@test.com") - proposal.acceptedLabId mustBe Some(10) - } - } - - "return error when accepting already-accepted proposal" in { - when(mockProposalService.acceptProposal( - meq(1), meq("curator@test.com"), meq("Dante Labs"), meq(None), meq(None), meq(None) - )).thenReturn(Future.successful(Left("Proposal 1 is already accepted"))) - - whenReady(mockProposalService.acceptProposal(1, "curator@test.com", "Dante Labs", None, None, None)) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("already accepted") - } - } - - "reject a proposal via service" in { - val rejected = sampleProposal.copy( - status = ProposalStatus.Rejected, - reviewedBy = Some("curator@test.com"), - reviewNotes = Some("Insufficient evidence") - ) - when(mockProposalService.rejectProposal(meq(1), meq("curator@test.com"), meq("Insufficient evidence"))) - .thenReturn(Future.successful(Right(rejected))) - - whenReady(mockProposalService.rejectProposal(1, "curator@test.com", "Insufficient evidence")) { result => - result mustBe a[Right[?, ?]] - val proposal = result.toOption.get - proposal.status mustBe ProposalStatus.Rejected - proposal.reviewNotes mustBe Some("Insufficient evidence") - } - } - - "detect conflicts across proposals" in { - val conflict = InstrumentConflict( - instrumentId = "A00123", - proposals = Seq( - ConflictingLab("Dante Labs", 5, 0.625), - ConflictingLab("Nebula Genomics", 3, 0.375) - ), - dominantLabName = "Dante Labs", - dominantRatio = 0.625 - ) - when(mockProposalService.detectConflicts()) - .thenReturn(Future.successful(Seq(conflict))) - - whenReady(mockProposalService.detectConflicts()) { conflicts => - conflicts must have size 1 - conflicts.head.instrumentId mustBe "A00123" - conflicts.head.dominantRatio mustBe 0.625 +- 0.01 - conflicts.head.proposals must have size 2 - } - } - - "return empty conflicts list when no conflicts" in { - when(mockProposalService.detectConflicts()) - .thenReturn(Future.successful(Seq.empty)) - - whenReady(mockProposalService.detectConflicts()) { conflicts => - conflicts mustBe empty - } - } - - "accept proposal with manufacturer and model overrides" in { - val accepted = sampleProposal.copy( - status = ProposalStatus.Accepted, - reviewedBy = Some("curator@test.com"), - acceptedLabId = Some(10) - ) - when(mockProposalService.acceptProposal( - meq(1), meq("curator@test.com"), meq("Dante Labs"), - meq(Some("Illumina")), meq(Some("NovaSeq X")), meq(Some("Confirmed via publications")) - )).thenReturn(Future.successful(Right(accepted))) - - whenReady(mockProposalService.acceptProposal( - 1, "curator@test.com", "Dante Labs", - Some("Illumina"), Some("NovaSeq X"), Some("Confirmed via publications") - )) { result => - result mustBe a[Right[?, ?]] - verify(mockProposalService).acceptProposal( - meq(1), meq("curator@test.com"), meq("Dante Labs"), - meq(Some("Illumina")), meq(Some("NovaSeq X")), meq(Some("Confirmed via publications")) - ) - } - } - } -} diff --git a/test/controllers/LanguageControllerSpec.scala b/test/controllers/LanguageControllerSpec.scala deleted file mode 100644 index 09551a49..00000000 --- a/test/controllers/LanguageControllerSpec.scala +++ /dev/null @@ -1,53 +0,0 @@ -package controllers - -import helpers.ServiceSpec -import play.api.i18n.{Lang, MessagesApi} -import play.api.test.{FakeRequest, Helpers} -import play.api.test.Helpers.* - -class LanguageControllerSpec extends ServiceSpec { - - val controller = new LanguageController(Helpers.stubControllerComponents()) - - "LanguageController.switchLanguage" should { - - "redirect to referer with language cookie for English" in { - val request = FakeRequest("GET", "/language/en") - .withHeaders("Referer" -> "/ytree") - val result = controller.switchLanguage("en").apply(request) - status(result) mustBe SEE_OTHER - redirectLocation(result) mustBe Some("/ytree") - } - - "redirect to referer with language cookie for French" in { - val request = FakeRequest("GET", "/language/fr") - .withHeaders("Referer" -> "/references") - val result = controller.switchLanguage("fr").apply(request) - status(result) mustBe SEE_OTHER - redirectLocation(result) mustBe Some("/references") - } - - "redirect to referer with language cookie for Spanish" in { - val request = FakeRequest("GET", "/language/es") - .withHeaders("Referer" -> "/") - val result = controller.switchLanguage("es").apply(request) - status(result) mustBe SEE_OTHER - redirectLocation(result) mustBe Some("/") - } - - "redirect to root when no referer is present" in { - val request = FakeRequest("GET", "/language/en") - val result = controller.switchLanguage("en").apply(request) - status(result) mustBe SEE_OTHER - redirectLocation(result) mustBe Some("/") - } - - "redirect without error for unsupported language" in { - val request = FakeRequest("GET", "/language/de") - .withHeaders("Referer" -> "/ytree") - val result = controller.switchLanguage("de").apply(request) - status(result) mustBe SEE_OTHER - redirectLocation(result) mustBe Some("/ytree") - } - } -} diff --git a/test/controllers/TreeVersioningCuratorControllerSpec.scala b/test/controllers/TreeVersioningCuratorControllerSpec.scala deleted file mode 100644 index 40505b8d..00000000 --- a/test/controllers/TreeVersioningCuratorControllerSpec.scala +++ /dev/null @@ -1,275 +0,0 @@ -package controllers - -import models.HaplogroupType -import models.domain.haplogroups.* -import org.mockito.ArgumentMatchers.{any, anyInt, anyString} -import org.mockito.Mockito.{reset, when} -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.ScalaFutures -import org.scalatestplus.mockito.MockitoSugar -import org.scalatestplus.play.PlaySpec -import org.scalatestplus.play.guice.GuiceOneAppPerSuite -import play.api.Application -import play.api.inject.bind -import play.api.inject.guice.GuiceApplicationBuilder -import play.api.test.Helpers.* -import play.api.test.{FakeRequest, Injecting} -import services.TreeVersioningService - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -/** - * Controller spec for TreeVersioningCuratorController. - * - * Note: Curator routes require authentication. These tests verify: - * 1. Routes exist and respond (with redirect to auth when not authenticated) - * 2. Service interactions work correctly when called - * - * For full functionality testing, see TreeVersioningServiceSpec which - * provides comprehensive unit tests for the business logic. - */ -class TreeVersioningCuratorControllerSpec extends PlaySpec - with GuiceOneAppPerSuite - with Injecting - with MockitoSugar - with ScalaFutures - with BeforeAndAfterEach { - - // Mock service - val mockTreeVersioningService: TreeVersioningService = mock[TreeVersioningService] - - override def fakeApplication(): Application = { - new GuiceApplicationBuilder() - .configure( - "play.evolutions.enabled" -> false - ) - .overrides( - bind[TreeVersioningService].toInstance(mockTreeVersioningService) - ) - .build() - } - - override def beforeEach(): Unit = { - reset(mockTreeVersioningService) - } - - // Test fixtures - val now: LocalDateTime = LocalDateTime.now() - - def createChangeSetSummary( - id: Int, - haplogroupType: HaplogroupType = HaplogroupType.Y, - status: ChangeSetStatus = ChangeSetStatus.ReadyForReview - ): ChangeSetSummary = ChangeSetSummary( - id = id, - haplogroupType = haplogroupType, - name = s"ISOGG-2024-12-16-$id", - sourceName = "ISOGG", - status = status, - createdAt = now, - createdBy = "system", - statistics = ChangeSetStatistics(nodesProcessed = 100, nodesCreated = 20), - totalChanges = 50, - pendingChanges = 30, - reviewedChanges = 20 - ) - - /** - * Curator routes require authentication. - * These tests verify routes exist and redirect to auth (303) when not authenticated. - */ - "TreeVersioningCuratorController routes (unauthenticated)" should { - - // ========================================================================= - // Verify routes exist and require authentication - // ========================================================================= - - "redirect to auth for change sets list" in { - val request = FakeRequest(GET, "/curator/change-sets") - val result = route(app, request).get - - // 303 indicates route exists and redirects to auth - status(result) mustBe SEE_OTHER - } - - "redirect to auth for change sets fragment" in { - val request = FakeRequest(GET, "/curator/change-sets/fragment") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - "redirect to auth for change set detail panel" in { - val request = FakeRequest(GET, "/curator/change-sets/1/panel") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - "redirect to auth for pending changes" in { - val request = FakeRequest(GET, "/curator/change-sets/1/changes/pending") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - "redirect to auth for diff view" in { - val request = FakeRequest(GET, "/curator/change-sets/1/diff") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - "redirect to auth for diff fragment" in { - val request = FakeRequest(GET, "/curator/change-sets/1/diff/fragment") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - "redirect to auth for ambiguity report" in { - val request = FakeRequest(GET, "/curator/change-sets/1/ambiguity-report") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - // ========================================================================= - // POST routes redirect to auth when not authenticated - // ========================================================================= - - "redirect to auth for start review" in { - val request = FakeRequest(POST, "/curator/change-sets/1/start-review") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - "redirect to auth for apply change set" in { - val request = FakeRequest(POST, "/curator/change-sets/1/apply") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - "redirect to auth for discard change set" in { - val request = FakeRequest(POST, "/curator/change-sets/1/discard") - .withFormUrlEncodedBody("reason" -> "Test reason for discard") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - "redirect to auth for approve all pending" in { - val request = FakeRequest(POST, "/curator/change-sets/1/approve-all") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - "redirect to auth for review change" in { - val request = FakeRequest(POST, "/curator/change-sets/1/changes/100/review") - .withFormUrlEncodedBody("action" -> "APPLIED") - val result = route(app, request).get - - status(result) mustBe SEE_OTHER - } - - // ========================================================================= - // Verify invalid routes return 404 - // ========================================================================= - - "return 404 for invalid route" in { - val request = FakeRequest(GET, "/curator/change-sets/invalid-endpoint") - val result = route(app, request).get - - status(result) mustBe NOT_FOUND - } - } - - /** - * These tests verify that the URL routes are correctly mapped. - * They check that the routes exist and respond appropriately. - */ - "TreeVersioningCuratorController route mapping" should { - - "have GET route for /curator/change-sets" in { - val request = FakeRequest(GET, "/curator/change-sets") - val result = route(app, request) - result mustBe defined - } - - "have GET route for /curator/change-sets/fragment" in { - val request = FakeRequest(GET, "/curator/change-sets/fragment") - val result = route(app, request) - result mustBe defined - } - - "have GET route for /curator/change-sets/:id/panel" in { - val request = FakeRequest(GET, "/curator/change-sets/1/panel") - val result = route(app, request) - result mustBe defined - } - - "have GET route for /curator/change-sets/:id/changes/pending" in { - val request = FakeRequest(GET, "/curator/change-sets/1/changes/pending") - val result = route(app, request) - result mustBe defined - } - - "have POST route for /curator/change-sets/:id/start-review" in { - val request = FakeRequest(POST, "/curator/change-sets/1/start-review") - val result = route(app, request) - result mustBe defined - } - - "have POST route for /curator/change-sets/:id/apply" in { - val request = FakeRequest(POST, "/curator/change-sets/1/apply") - val result = route(app, request) - result mustBe defined - } - - "have POST route for /curator/change-sets/:id/discard" in { - val request = FakeRequest(POST, "/curator/change-sets/1/discard") - val result = route(app, request) - result mustBe defined - } - - "have POST route for /curator/change-sets/:id/approve-all" in { - val request = FakeRequest(POST, "/curator/change-sets/1/approve-all") - val result = route(app, request) - result mustBe defined - } - - "have POST route for /curator/change-sets/:changeSetId/changes/:changeId/review" in { - val request = FakeRequest(POST, "/curator/change-sets/1/changes/100/review") - val result = route(app, request) - result mustBe defined - } - - "have GET route for /curator/change-sets/:id/diff" in { - val request = FakeRequest(GET, "/curator/change-sets/1/diff") - val result = route(app, request) - result mustBe defined - } - - "have GET route for /curator/change-sets/:id/diff/fragment" in { - val request = FakeRequest(GET, "/curator/change-sets/1/diff/fragment") - val result = route(app, request) - result mustBe defined - } - - "have GET route for /curator/change-sets/:id/ambiguity-report" in { - val request = FakeRequest(GET, "/curator/change-sets/1/ambiguity-report") - val result = route(app, request) - result mustBe defined - } - - "have GET route for /curator/change-sets/:id/ambiguity-report/download" in { - val request = FakeRequest(GET, "/curator/change-sets/1/ambiguity-report/download") - val result = route(app, request) - result mustBe defined - } - } -} diff --git a/test/helpers/TestBase.scala b/test/helpers/TestBase.scala deleted file mode 100644 index 179200da..00000000 --- a/test/helpers/TestBase.scala +++ /dev/null @@ -1,61 +0,0 @@ -package helpers - -import org.scalatest.concurrent.ScalaFutures -import org.scalatest.time.{Millis, Seconds, Span} -import org.scalatest.BeforeAndAfterEach -import org.scalatestplus.mockito.MockitoSugar -import org.scalatestplus.play.PlaySpec -import org.scalatestplus.play.guice.GuiceOneAppPerSuite -import play.api.Application -import play.api.inject.guice.GuiceApplicationBuilder - -import scala.concurrent.ExecutionContext - -/** - * Base trait for service unit tests with mocked dependencies. - * Provides implicit ExecutionContext and ScalaFutures patience config. - */ -trait ServiceSpec extends PlaySpec with MockitoSugar with ScalaFutures with BeforeAndAfterEach { - implicit val ec: ExecutionContext = ExecutionContext.global - implicit val patience: PatienceConfig = PatienceConfig( - timeout = Span(5, Seconds), - interval = Span(100, Millis) - ) -} - -/** - * Base trait for controller integration tests. - * Boots a Play application with H2 in-memory database and disabled startup services. - * Subclasses should override `additionalOverrides` to bind mock services. - */ -trait ControllerSpec extends PlaySpec - with GuiceOneAppPerSuite - with MockitoSugar - with ScalaFutures - with BeforeAndAfterEach { - - implicit val patience: PatienceConfig = PatienceConfig( - timeout = Span(5, Seconds), - interval = Span(100, Millis) - ) - - /** - * Override this in subclasses to provide additional Guice bindings (e.g., mock services). - */ - protected def additionalConfig: Map[String, Any] = Map.empty - protected def additionalOverrides: Seq[play.api.inject.Binding[?]] = Seq.empty - - override def fakeApplication(): Application = { - val builder = new GuiceApplicationBuilder() - .configure( - Map( - "config.resource" -> "application.test.conf", - "api.key.enabled" -> false - ) ++ additionalConfig - ) - - additionalOverrides.foldLeft(builder) { (b, binding) => - b.overrides(binding) - }.build() - } -} diff --git a/test/models/api/haplogroups/TreeMergeModelsSpec.scala b/test/models/api/haplogroups/TreeMergeModelsSpec.scala deleted file mode 100644 index 196a86ae..00000000 --- a/test/models/api/haplogroups/TreeMergeModelsSpec.scala +++ /dev/null @@ -1,577 +0,0 @@ -package models.api.haplogroups - -import models.HaplogroupType -import org.scalatest.funspec.AnyFunSpec -import org.scalatest.matchers.must.Matchers -import play.api.libs.json.{JsError, JsSuccess, Json} - -class TreeMergeModelsSpec extends AnyFunSpec with Matchers { - - describe("VariantInput") { - - describe("JSON serialization") { - - it("should deserialize a simple variant") { - val json = Json.parse("""{"name": "M207"}""") - json.validate[VariantInput] match { - case JsSuccess(v, _) => - v.name mustBe "M207" - v.aliases mustBe List.empty - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should deserialize a variant with aliases") { - val json = Json.parse("""{"name": "M207", "aliases": ["Page37", "UTY2"]}""") - json.validate[VariantInput] match { - case JsSuccess(v, _) => - v.name mustBe "M207" - v.aliases mustBe List("Page37", "UTY2") - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should serialize to JSON") { - val variant = VariantInput("M207", List("Page37", "UTY2")) - val json = Json.toJson(variant) - (json \ "name").as[String] mustBe "M207" - (json \ "aliases").as[List[String]] mustBe List("Page37", "UTY2") - } - } - } - - describe("PhyloNodeInput") { - - describe("JSON serialization") { - - it("should deserialize a simple node with variant objects") { - val json = Json.parse("""{ - "name": "R1b-L21", - "variants": [{"name": "L21"}, {"name": "S145"}] - }""") - - json.validate[PhyloNodeInput] match { - case JsSuccess(node, _) => - node.name mustBe "R1b-L21" - node.variants.map(_.name) mustBe List("L21", "S145") - node.children mustBe List.empty - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should deserialize a node with variant aliases") { - val json = Json.parse("""{ - "name": "R", - "variants": [{"name": "M207", "aliases": ["Page37", "UTY2"]}] - }""") - - json.validate[PhyloNodeInput] match { - case JsSuccess(node, _) => - node.variants must have size 1 - node.variants.head.name mustBe "M207" - node.variants.head.aliases mustBe List("Page37", "UTY2") - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should deserialize node with all age fields") { - val json = Json.parse("""{ - "name": "R1b-L21", - "variants": [{"name": "L21"}], - "formedYbp": 4500, - "formedYbpLower": 4200, - "formedYbpUpper": 4800, - "tmrcaYbp": 4000, - "tmrcaYbpLower": 3700, - "tmrcaYbpUpper": 4300 - }""") - - json.validate[PhyloNodeInput] match { - case JsSuccess(node, _) => - node.formedYbp mustBe Some(4500) - node.formedYbpLower mustBe Some(4200) - node.formedYbpUpper mustBe Some(4800) - node.tmrcaYbp mustBe Some(4000) - node.tmrcaYbpLower mustBe Some(3700) - node.tmrcaYbpUpper mustBe Some(4300) - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should deserialize nested children") { - val json = Json.parse("""{ - "name": "R1b-L21", - "variants": [{"name": "L21"}], - "children": [ - { - "name": "R1b-DF13", - "variants": [{"name": "DF13"}], - "children": [ - { - "name": "R1b-Z39589", - "variants": [{"name": "Z39589"}] - } - ] - } - ] - }""") - - json.validate[PhyloNodeInput] match { - case JsSuccess(node, _) => - node.name mustBe "R1b-L21" - node.children must have size 1 - node.children.head.name mustBe "R1b-DF13" - node.children.head.children must have size 1 - node.children.head.children.head.name mustBe "R1b-Z39589" - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should serialize to JSON") { - val node = PhyloNodeInput( - name = "R1b-L21", - variants = List(VariantInput("L21"), VariantInput("S145")), - formedYbp = Some(4500), - children = List( - PhyloNodeInput(name = "R1b-DF13", variants = List(VariantInput("DF13"))) - ) - ) - - val json = Json.toJson(node) - - (json \ "name").as[String] mustBe "R1b-L21" - (json \ "variants").as[List[VariantInput]].map(_.name) mustBe List("L21", "S145") - (json \ "formedYbp").as[Int] mustBe 4500 - (json \ "children").as[List[PhyloNodeInput]] must have size 1 - } - - it("should handle empty variants list") { - val json = Json.parse("""{"name": "Test"}""") - - json.validate[PhyloNodeInput] match { - case JsSuccess(node, _) => - node.variants mustBe List.empty - node.children mustBe List.empty - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - } - } - - describe("SourcePriorityConfig") { - - it("should deserialize with priority list") { - val json = Json.parse("""{ - "sourcePriorities": ["ISOGG", "ytree.net", "DecodingUs"], - "defaultPriority": 50 - }""") - - json.validate[SourcePriorityConfig] match { - case JsSuccess(config, _) => - config.sourcePriorities mustBe List("ISOGG", "ytree.net", "DecodingUs") - config.defaultPriority mustBe 50 - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should use default priority of 100") { - val json = Json.parse("""{ - "sourcePriorities": ["ISOGG"] - }""") - - json.validate[SourcePriorityConfig] match { - case JsSuccess(config, _) => - config.defaultPriority mustBe 100 - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - } - - describe("ConflictStrategy") { - - it("should deserialize higher_priority_wins") { - val json = Json.parse("\"higher_priority_wins\"") - - json.validate[ConflictStrategy] match { - case JsSuccess(strategy, _) => - strategy mustBe ConflictStrategy.HigherPriorityWins - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should deserialize keep_existing") { - val json = Json.parse("\"keep_existing\"") - - json.validate[ConflictStrategy] match { - case JsSuccess(strategy, _) => - strategy mustBe ConflictStrategy.KeepExisting - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should deserialize always_update") { - val json = Json.parse("\"always_update\"") - - json.validate[ConflictStrategy] match { - case JsSuccess(strategy, _) => - strategy mustBe ConflictStrategy.AlwaysUpdate - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should fail for unknown strategy") { - val json = Json.parse("\"invalid_strategy\"") - - // The implementation throws an exception for invalid strategies - an[IllegalArgumentException] must be thrownBy { - json.as[ConflictStrategy] - } - } - - it("should serialize strategies correctly") { - Json.toJson[ConflictStrategy](ConflictStrategy.HigherPriorityWins).as[String] mustBe "higher_priority_wins" - Json.toJson[ConflictStrategy](ConflictStrategy.KeepExisting).as[String] mustBe "keep_existing" - Json.toJson[ConflictStrategy](ConflictStrategy.AlwaysUpdate).as[String] mustBe "always_update" - } - } - - describe("TreeMergeRequest") { - - it("should deserialize a full merge request") { - val json = Json.parse("""{ - "haplogroupType": "Y", - "sourceTree": { - "name": "R1b", - "variants": [{"name": "M269"}] - }, - "sourceName": "ytree.net", - "priorityConfig": { - "sourcePriorities": ["ytree.net", "ISOGG"] - }, - "conflictStrategy": "higher_priority_wins", - "dryRun": true - }""") - - json.validate[TreeMergeRequest] match { - case JsSuccess(request, _) => - request.haplogroupType mustBe HaplogroupType.Y - request.sourceTree.name mustBe "R1b" - request.sourceName mustBe "ytree.net" - request.priorityConfig mustBe defined - request.conflictStrategy mustBe Some(ConflictStrategy.HigherPriorityWins) - request.dryRun mustBe true - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should deserialize minimal merge request") { - val json = Json.parse("""{ - "haplogroupType": "MT", - "sourceTree": {"name": "H"}, - "sourceName": "test" - }""") - - json.validate[TreeMergeRequest] match { - case JsSuccess(request, _) => - request.haplogroupType mustBe HaplogroupType.MT - request.priorityConfig mustBe None - request.conflictStrategy mustBe None - request.dryRun mustBe false - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should fail for invalid haplogroup type") { - val json = Json.parse("""{ - "haplogroupType": "INVALID", - "sourceTree": {"name": "Test"}, - "sourceName": "test" - }""") - - // The implementation throws an exception for invalid haplogroup types - an[IllegalArgumentException] must be thrownBy { - json.as[TreeMergeRequest] - } - } - } - - describe("SubtreeMergeRequest") { - - it("should deserialize a subtree merge request") { - val json = Json.parse("""{ - "haplogroupType": "Y", - "anchorHaplogroupName": "R1b", - "sourceTree": { - "name": "R1b-L21", - "variants": [{"name": "L21"}] - }, - "sourceName": "ytree.net", - "dryRun": false - }""") - - json.validate[SubtreeMergeRequest] match { - case JsSuccess(request, _) => - request.haplogroupType mustBe HaplogroupType.Y - request.anchorHaplogroupName mustBe "R1b" - request.sourceTree.name mustBe "R1b-L21" - request.sourceName mustBe "ytree.net" - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - } - - describe("MergePreviewRequest") { - - it("should deserialize with optional anchor") { - val json = Json.parse("""{ - "haplogroupType": "Y", - "anchorHaplogroupName": "R1b", - "sourceTree": {"name": "Test"}, - "sourceName": "test" - }""") - - json.validate[MergePreviewRequest] match { - case JsSuccess(request, _) => - request.anchorHaplogroupName mustBe Some("R1b") - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should deserialize without anchor") { - val json = Json.parse("""{ - "haplogroupType": "Y", - "sourceTree": {"name": "Test"}, - "sourceName": "test" - }""") - - json.validate[MergePreviewRequest] match { - case JsSuccess(request, _) => - request.anchorHaplogroupName mustBe None - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - } - - describe("MergeStatistics") { - - it("should serialize all fields") { - val stats = MergeStatistics( - nodesProcessed = 100, - nodesCreated = 50, - nodesUpdated = 30, - nodesUnchanged = 20, - variantsAdded = 200, - variantsUpdated = 50, - relationshipsCreated = 49, - relationshipsUpdated = 10, - splitOperations = 5 - ) - - val json = Json.toJson(stats) - - (json \ "nodesProcessed").as[Int] mustBe 100 - (json \ "nodesCreated").as[Int] mustBe 50 - (json \ "nodesUpdated").as[Int] mustBe 30 - (json \ "nodesUnchanged").as[Int] mustBe 20 - (json \ "variantsAdded").as[Int] mustBe 200 - (json \ "variantsUpdated").as[Int] mustBe 50 - (json \ "relationshipsCreated").as[Int] mustBe 49 - (json \ "relationshipsUpdated").as[Int] mustBe 10 - (json \ "splitOperations").as[Int] mustBe 5 - } - - it("should create empty statistics") { - val empty = MergeStatistics.empty - - empty.nodesProcessed mustBe 0 - empty.nodesCreated mustBe 0 - empty.nodesUpdated mustBe 0 - empty.nodesUnchanged mustBe 0 - } - - it("should combine statistics correctly") { - val stats1 = MergeStatistics(10, 5, 3, 2, 20, 5, 4, 1, 0) - val stats2 = MergeStatistics(20, 10, 6, 4, 40, 10, 9, 2, 1) - - val combined = MergeStatistics.combine(stats1, stats2) - - combined.nodesProcessed mustBe 30 - combined.nodesCreated mustBe 15 - combined.nodesUpdated mustBe 9 - combined.nodesUnchanged mustBe 6 - combined.variantsAdded mustBe 60 - combined.variantsUpdated mustBe 15 - combined.relationshipsCreated mustBe 13 - combined.relationshipsUpdated mustBe 3 - combined.splitOperations mustBe 1 - } - } - - describe("MergeConflict") { - - it("should serialize conflict details") { - val conflict = MergeConflict( - haplogroupName = "R1b-L21", - field = "formedYbp", - existingValue = "4500", - newValue = "4800", - resolution = "updated", - existingSource = "ISOGG", - newSource = "ytree.net" - ) - - val json = Json.toJson(conflict) - - (json \ "haplogroupName").as[String] mustBe "R1b-L21" - (json \ "field").as[String] mustBe "formedYbp" - (json \ "existingValue").as[String] mustBe "4500" - (json \ "newValue").as[String] mustBe "4800" - (json \ "resolution").as[String] mustBe "updated" - (json \ "existingSource").as[String] mustBe "ISOGG" - (json \ "newSource").as[String] mustBe "ytree.net" - } - - it("should round-trip serialize") { - val original = MergeConflict( - haplogroupName = "Test", - field = "description", - existingValue = "old", - newValue = "new", - resolution = "kept_existing", - existingSource = "A", - newSource = "B" - ) - - val restored = Json.toJson(original).as[MergeConflict] - - restored mustBe original - } - } - - describe("SplitOperation") { - - it("should serialize split details") { - val split = SplitOperation( - parentName = "R1b-L21", - newIntermediateName = "R1b-L21a", - variantsRedistributed = List("V1", "V2"), - childrenReassigned = List("R1b-Z39589", "R1b-Z39590"), - source = "ytree.net" - ) - - val json = Json.toJson(split) - - (json \ "parentName").as[String] mustBe "R1b-L21" - (json \ "newIntermediateName").as[String] mustBe "R1b-L21a" - (json \ "variantsRedistributed").as[List[String]] mustBe List("V1", "V2") - (json \ "childrenReassigned").as[List[String]] mustBe List("R1b-Z39589", "R1b-Z39590") - (json \ "source").as[String] mustBe "ytree.net" - } - } - - describe("TreeMergeResponse") { - - it("should serialize successful response") { - val response = TreeMergeResponse( - success = true, - message = "Merge completed successfully", - statistics = MergeStatistics(10, 5, 3, 2, 20, 5, 4, 1, 0), - conflicts = List.empty, - splits = List.empty, - errors = List.empty - ) - - val json = Json.toJson(response) - - (json \ "success").as[Boolean] mustBe true - (json \ "message").as[String] mustBe "Merge completed successfully" - (json \ "statistics" \ "nodesProcessed").as[Int] mustBe 10 - (json \ "conflicts").as[List[MergeConflict]] mustBe empty - } - - it("should create failure response") { - val response = TreeMergeResponse.failure( - "Merge failed due to validation error", - List("Error 1", "Error 2") - ) - - response.success mustBe false - response.message mustBe "Merge failed due to validation error" - response.errors mustBe List("Error 1", "Error 2") - response.statistics mustBe MergeStatistics.empty - } - - it("should serialize response with conflicts and errors") { - val response = TreeMergeResponse( - success = false, - message = "Completed with warnings", - statistics = MergeStatistics.empty, - conflicts = List( - MergeConflict("Node1", "field1", "old", "new", "kept", "A", "B") - ), - splits = List.empty, - errors = List("Warning: some nodes skipped") - ) - - val json = Json.toJson(response) - - (json \ "conflicts").as[List[MergeConflict]] must have size 1 - (json \ "errors").as[List[String]] must have size 1 - } - } - - describe("MergePreviewResponse") { - - it("should serialize preview with all details") { - val response = MergePreviewResponse( - statistics = MergeStatistics(10, 5, 3, 2, 20, 5, 4, 1, 0), - conflicts = List( - MergeConflict("Node1", "formedYbp", "4500", "4800", "will_update", "A", "B") - ), - splits = List.empty, - ambiguities = List.empty, - newNodes = List("NewNode1", "NewNode2"), - updatedNodes = List("UpdatedNode1"), - unchangedNodes = List("UnchangedNode1", "UnchangedNode2") - ) - - val json = Json.toJson(response) - - (json \ "newNodes").as[List[String]] mustBe List("NewNode1", "NewNode2") - (json \ "updatedNodes").as[List[String]] mustBe List("UpdatedNode1") - (json \ "unchangedNodes").as[List[String]] mustBe List("UnchangedNode1", "UnchangedNode2") - (json \ "statistics" \ "nodesCreated").as[Int] mustBe 5 - } - } - - describe("HaplogroupType in requests") { - - it("should accept Y haplogroup type") { - val json = Json.parse("""{ - "haplogroupType": "Y", - "sourceTree": {"name": "R1b"}, - "sourceName": "test" - }""") - - json.validate[TreeMergeRequest] match { - case JsSuccess(request, _) => - request.haplogroupType mustBe HaplogroupType.Y - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - - it("should accept MT haplogroup type") { - val json = Json.parse("""{ - "haplogroupType": "MT", - "sourceTree": {"name": "H"}, - "sourceName": "test" - }""") - - json.validate[TreeMergeRequest] match { - case JsSuccess(request, _) => - request.haplogroupType mustBe HaplogroupType.MT - case JsError(errors) => fail(s"Parse failed: $errors") - } - } - } -} diff --git a/test/models/domain/genomics/EmbeddedCoverageSpec.scala b/test/models/domain/genomics/EmbeddedCoverageSpec.scala deleted file mode 100644 index 9b03586d..00000000 --- a/test/models/domain/genomics/EmbeddedCoverageSpec.scala +++ /dev/null @@ -1,98 +0,0 @@ -package models.domain.genomics - -import org.scalatestplus.play.PlaySpec -import play.api.libs.json.Json - -import java.time.LocalDateTime - -class EmbeddedCoverageSpec extends PlaySpec { - - "EmbeddedCoverage" should { - - "serialize to JSON" in { - val coverage = EmbeddedCoverage( - meanDepth = Some(30.5), - medianDepth = Some(29.0), - percentCoverageAt1x = Some(0.99), - percentCoverageAt5x = Some(0.97), - percentCoverageAt10x = Some(0.95), - percentCoverageAt20x = Some(0.90), - percentCoverageAt30x = Some(0.75), - basesNoCoverage = Some(1000L), - basesLowQualityMapping = Some(500L), - basesCallable = Some(50000000L), - meanMappingQuality = Some(59.5) - ) - - val json = Json.toJson(coverage) - (json \ "meanDepth").as[Double] mustBe 30.5 - (json \ "basesCallable").as[Long] mustBe 50000000L - (json \ "meanMappingQuality").as[Double] mustBe 59.5 - } - - "deserialize from JSON" in { - val json = Json.parse("""{ - "meanDepth": 30.5, - "medianDepth": 29.0, - "percentCoverageAt1x": 0.99, - "basesCallable": 50000000, - "meanMappingQuality": 59.5 - }""") - - val coverage = json.as[EmbeddedCoverage] - coverage.meanDepth mustBe Some(30.5) - coverage.basesCallable mustBe Some(50000000L) - coverage.percentCoverageAt5x mustBe None - } - - "handle all-None fields" in { - val coverage = EmbeddedCoverage() - val json = Json.toJson(coverage) - val roundTripped = json.as[EmbeddedCoverage] - roundTripped mustBe coverage - } - - "round-trip through JSON" in { - val coverage = EmbeddedCoverage( - meanDepth = Some(25.0), - percentCoverageAt10x = Some(0.93), - basesCallable = Some(48000000L) - ) - val roundTripped = Json.toJson(coverage).as[EmbeddedCoverage] - roundTripped mustBe coverage - } - } - - "AlignmentMetadata.embeddedCoverage" should { - - val baseMetadata = AlignmentMetadata( - id = Some(1L), - sequenceFileId = 100L, - genbankContigId = 1, - metricLevel = MetricLevel.CONTIG_OVERALL, - analysisTool = "mosdepth" - ) - - "return None when coverage is None" in { - baseMetadata.embeddedCoverage mustBe None - } - - "parse embedded coverage from JSONB" in { - val coverageJson = Json.parse("""{"meanDepth": 30.5, "basesCallable": 50000000}""") - val metadata = baseMetadata.copy(coverage = Some(coverageJson)) - - val ec = metadata.embeddedCoverage - ec mustBe defined - ec.get.meanDepth mustBe Some(30.5) - ec.get.basesCallable mustBe Some(50000000L) - } - - "set coverage via withCoverage" in { - val ec = EmbeddedCoverage(meanDepth = Some(30.5), basesCallable = Some(50000000L)) - val metadata = baseMetadata.withCoverage(ec) - - metadata.coverage mustBe defined - metadata.embeddedCoverage mustBe Some(ec) - } - } -} diff --git a/test/models/domain/genomics/OriginalHaplogroupEntrySpec.scala b/test/models/domain/genomics/OriginalHaplogroupEntrySpec.scala deleted file mode 100644 index 1f694e7d..00000000 --- a/test/models/domain/genomics/OriginalHaplogroupEntrySpec.scala +++ /dev/null @@ -1,106 +0,0 @@ -package models.domain.genomics - -import org.scalatestplus.play.PlaySpec -import play.api.libs.json.Json - -import java.util.UUID - -class OriginalHaplogroupEntrySpec extends PlaySpec { - - val hapResult: HaplogroupResult = HaplogroupResult("R-M269", 0.99, 100, 0, 50, 5, Seq("R", "R-M269")) - - "OriginalHaplogroupEntry" should { - - "serialize to JSON" in { - val entry = OriginalHaplogroupEntry( - publicationId = 42, - yHaplogroupResult = Some(hapResult), - mtHaplogroupResult = None, - notes = Some("From study X") - ) - val json = Json.toJson(entry) - (json \ "publicationId").as[Int] mustBe 42 - (json \ "yHaplogroupResult" \ "haplogroupName").as[String] mustBe "R-M269" - (json \ "notes").as[String] mustBe "From study X" - } - - "deserialize from JSON" in { - val json = Json.parse("""{"publicationId": 42, "yHaplogroupResult": {"haplogroupName": "R-M269", "score": 0.99, "matchingSnps": 100, "mismatchingSnps": 0, "ancestralMatches": 50, "treeDepth": 5, "lineagePath": ["R", "R-M269"]}}""") - val entry = json.as[OriginalHaplogroupEntry] - entry.publicationId mustBe 42 - entry.yHaplogroupResult mustBe defined - entry.mtHaplogroupResult mustBe None - } - - "round-trip through JSON" in { - val entry = OriginalHaplogroupEntry(10, Some(hapResult), Some(hapResult), Some("notes")) - val roundTripped = Json.toJson(entry).as[OriginalHaplogroupEntry] - roundTripped mustBe entry - } - } - - "Biosample.getOriginalHaplogroupEntries" should { - - val baseBiosample = Biosample( - id = Some(1), sampleGuid = UUID.randomUUID(), sampleAccession = "SAMEA001", - description = "Test", alias = None, centerName = "Center", - specimenDonorId = None - ) - - "return empty for no haplogroups" in { - baseBiosample.getOriginalHaplogroupEntries mustBe empty - } - - "return entries from JSONB" in { - val entries = Seq( - OriginalHaplogroupEntry(10, Some(hapResult), None, None), - OriginalHaplogroupEntry(20, None, Some(hapResult), Some("mt study")) - ) - val bs = baseBiosample.copy(originalHaplogroups = Some(Json.toJson(entries))) - bs.getOriginalHaplogroupEntries must have size 2 - } - - "find by publication ID" in { - val entries = Seq( - OriginalHaplogroupEntry(10, Some(hapResult), None, None), - OriginalHaplogroupEntry(20, None, Some(hapResult), None) - ) - val bs = baseBiosample.copy(originalHaplogroups = Some(Json.toJson(entries))) - bs.findHaplogroupByPublication(10) mustBe defined - bs.findHaplogroupByPublication(10).get.publicationId mustBe 10 - bs.findHaplogroupByPublication(99) mustBe None - } - - "add entry with withHaplogroupEntry" in { - val entry1 = OriginalHaplogroupEntry(10, Some(hapResult), None, None) - val bs = baseBiosample.withHaplogroupEntry(entry1) - bs.getOriginalHaplogroupEntries must have size 1 - - val entry2 = OriginalHaplogroupEntry(20, None, Some(hapResult), None) - val bs2 = bs.withHaplogroupEntry(entry2) - bs2.getOriginalHaplogroupEntries must have size 2 - } - - "replace entry for same publication" in { - val entry1 = OriginalHaplogroupEntry(10, Some(hapResult), None, None) - val bs = baseBiosample.withHaplogroupEntry(entry1) - - val updated = OriginalHaplogroupEntry(10, Some(hapResult), Some(hapResult), Some("updated")) - val bs2 = bs.withHaplogroupEntry(updated) - bs2.getOriginalHaplogroupEntries must have size 1 - bs2.findHaplogroupByPublication(10).get.notes mustBe Some("updated") - } - - "remove entry with withoutHaplogroupForPublication" in { - val entries = Seq( - OriginalHaplogroupEntry(10, Some(hapResult), None, None), - OriginalHaplogroupEntry(20, None, Some(hapResult), None) - ) - val bs = baseBiosample.copy(originalHaplogroups = Some(Json.toJson(entries))) - val bs2 = bs.withoutHaplogroupForPublication(10) - bs2.getOriginalHaplogroupEntries must have size 1 - bs2.findHaplogroupByPublication(10) mustBe None - bs2.findHaplogroupByPublication(20) mustBe defined - } - } -} diff --git a/test/models/domain/haplogroups/HaplogroupProvenanceSpec.scala b/test/models/domain/haplogroups/HaplogroupProvenanceSpec.scala deleted file mode 100644 index af88352d..00000000 --- a/test/models/domain/haplogroups/HaplogroupProvenanceSpec.scala +++ /dev/null @@ -1,338 +0,0 @@ -package models.domain.haplogroups - -import org.scalatest.funspec.AnyFunSpec -import org.scalatest.matchers.must.Matchers -import play.api.libs.json.Json - -import java.time.LocalDateTime - -class HaplogroupProvenanceSpec extends AnyFunSpec with Matchers { - - describe("HaplogroupProvenance") { - - describe("factory methods") { - - it("should create provenance for a new node with source") { - val provenance = HaplogroupProvenance.forNewNode("ytree.net", Seq("L21", "S145")) - - provenance.primaryCredit mustBe "ytree.net" - provenance.nodeProvenance mustBe Set("ytree.net") - provenance.variantProvenance mustBe Map( - "L21" -> Set("ytree.net"), - "S145" -> Set("ytree.net") - ) - provenance.lastMergedFrom mustBe Some("ytree.net") - provenance.lastMergedAt mustBe defined - } - - it("should create provenance for a new node without variants") { - val provenance = HaplogroupProvenance.forNewNode("ISOGG") - - provenance.primaryCredit mustBe "ISOGG" - provenance.nodeProvenance mustBe Set("ISOGG") - provenance.variantProvenance mustBe Map.empty - } - - it("should create empty provenance") { - val provenance = HaplogroupProvenance.empty - - provenance.primaryCredit mustBe "" - provenance.nodeProvenance mustBe Set.empty - provenance.variantProvenance mustBe Map.empty - provenance.lastMergedAt mustBe None - provenance.lastMergedFrom mustBe None - } - } - - describe("addNodeSource") { - - it("should add a new source to nodeProvenance") { - val provenance = HaplogroupProvenance.forNewNode("ISOGG") - val updated = provenance.addNodeSource("ytree.net") - - updated.nodeProvenance must contain allOf ("ISOGG", "ytree.net") - updated.primaryCredit mustBe "ISOGG" // Should not change - } - - it("should not duplicate existing sources") { - val provenance = HaplogroupProvenance.forNewNode("ISOGG") - val updated = provenance.addNodeSource("ISOGG") - - updated.nodeProvenance mustBe Set("ISOGG") - } - - it("should accumulate multiple sources") { - val provenance = HaplogroupProvenance.forNewNode("source1") - .addNodeSource("source2") - .addNodeSource("source3") - - provenance.nodeProvenance must have size 3 - provenance.nodeProvenance must contain allOf ("source1", "source2", "source3") - } - } - - describe("addVariantSource") { - - it("should add source attribution for a new variant") { - val provenance = HaplogroupProvenance.forNewNode("ISOGG") - val updated = provenance.addVariantSource("M269", "ytree.net") - - updated.variantProvenance must contain key "M269" - updated.variantProvenance("M269") must contain("ytree.net") - } - - it("should add additional sources to existing variants") { - val provenance = HaplogroupProvenance.forNewNode("ISOGG", Seq("L21")) - val updated = provenance.addVariantSource("L21", "ytree.net") - - updated.variantProvenance("L21") must contain allOf ("ISOGG", "ytree.net") - } - - it("should not duplicate sources for the same variant") { - val provenance = HaplogroupProvenance.forNewNode("ISOGG", Seq("L21")) - val updated = provenance.addVariantSource("L21", "ISOGG") - - updated.variantProvenance("L21") mustBe Set("ISOGG") - } - } - - describe("merge") { - - it("should combine nodeProvenance from both records") { - val prov1 = HaplogroupProvenance( - primaryCredit = "ISOGG", - nodeProvenance = Set("ISOGG", "DecodingUs") - ) - val prov2 = HaplogroupProvenance( - primaryCredit = "ytree.net", - nodeProvenance = Set("ytree.net", "researcher") - ) - - val merged = prov1.merge(prov2) - - merged.nodeProvenance must contain allOf ("ISOGG", "DecodingUs", "ytree.net", "researcher") - } - - it("should preserve primary credit from the first provenance") { - val prov1 = HaplogroupProvenance(primaryCredit = "ISOGG") - val prov2 = HaplogroupProvenance(primaryCredit = "ytree.net") - - val merged = prov1.merge(prov2) - - merged.primaryCredit mustBe "ISOGG" - } - - it("should combine variantProvenance") { - val prov1 = HaplogroupProvenance( - primaryCredit = "ISOGG", - variantProvenance = Map("L21" -> Set("ISOGG"), "M269" -> Set("ISOGG")) - ) - val prov2 = HaplogroupProvenance( - primaryCredit = "ytree.net", - variantProvenance = Map("L21" -> Set("ytree.net"), "DF13" -> Set("ytree.net")) - ) - - val merged = prov1.merge(prov2) - - merged.variantProvenance("L21") must contain allOf ("ISOGG", "ytree.net") - merged.variantProvenance("M269") mustBe Set("ISOGG") - merged.variantProvenance("DF13") mustBe Set("ytree.net") - } - - it("should take the most recent lastMergedAt timestamp") { - val earlier = LocalDateTime.now().minusDays(1) - val later = LocalDateTime.now() - - val prov1 = HaplogroupProvenance( - primaryCredit = "A", - lastMergedAt = Some(earlier) - ) - val prov2 = HaplogroupProvenance( - primaryCredit = "B", - lastMergedAt = Some(later) - ) - - val merged = prov1.merge(prov2) - - merged.lastMergedAt mustBe Some(later) - } - - it("should prefer lastMergedFrom from the second provenance") { - val prov1 = HaplogroupProvenance( - primaryCredit = "A", - lastMergedFrom = Some("source1") - ) - val prov2 = HaplogroupProvenance( - primaryCredit = "B", - lastMergedFrom = Some("source2") - ) - - val merged = prov1.merge(prov2) - - merged.lastMergedFrom mustBe Some("source2") - } - - it("should handle merging with empty provenance") { - val prov1 = HaplogroupProvenance.forNewNode("ISOGG", Seq("L21")) - val prov2 = HaplogroupProvenance.empty - - val merged = prov1.merge(prov2) - - merged.primaryCredit mustBe "ISOGG" - merged.nodeProvenance mustBe Set("ISOGG") - merged.variantProvenance mustBe Map("L21" -> Set("ISOGG")) - } - } - - describe("withMergeInfo") { - - it("should update merge timestamp and source") { - val provenance = HaplogroupProvenance.forNewNode("ISOGG") - val now = LocalDateTime.now() - val updated = provenance.withMergeInfo("ytree.net", now) - - updated.lastMergedAt mustBe Some(now) - updated.lastMergedFrom mustBe Some("ytree.net") - updated.primaryCredit mustBe "ISOGG" // Should not change - } - - it("should overwrite previous merge info") { - val earlier = LocalDateTime.now().minusHours(1) - val later = LocalDateTime.now() - - val provenance = HaplogroupProvenance.forNewNode("ISOGG") - .withMergeInfo("source1", earlier) - .withMergeInfo("source2", later) - - provenance.lastMergedAt mustBe Some(later) - provenance.lastMergedFrom mustBe Some("source2") - } - } - - describe("shouldPreserveCredit") { - - it("should return true for ISOGG credit") { - HaplogroupProvenance.shouldPreserveCredit("ISOGG") mustBe true - } - - it("should be case-insensitive for ISOGG") { - HaplogroupProvenance.shouldPreserveCredit("isogg") mustBe true - HaplogroupProvenance.shouldPreserveCredit("IsoGG") mustBe true - HaplogroupProvenance.shouldPreserveCredit("Isogg") mustBe true - } - - it("should return false for non-ISOGG sources") { - HaplogroupProvenance.shouldPreserveCredit("ytree.net") mustBe false - HaplogroupProvenance.shouldPreserveCredit("DecodingUs") mustBe false - HaplogroupProvenance.shouldPreserveCredit("researcher") mustBe false - } - - it("should return false for empty string") { - HaplogroupProvenance.shouldPreserveCredit("") mustBe false - } - } - - describe("JSON serialization") { - - it("should serialize to JSON correctly") { - val provenance = HaplogroupProvenance( - primaryCredit = "ISOGG", - nodeProvenance = Set("ISOGG", "ytree.net"), - variantProvenance = Map("L21" -> Set("ISOGG", "ytree.net")), - lastMergedAt = Some(LocalDateTime.of(2025, 12, 12, 10, 30, 0)), - lastMergedFrom = Some("ytree.net") - ) - - val json = Json.toJson(provenance) - - (json \ "primaryCredit").as[String] mustBe "ISOGG" - (json \ "nodeProvenance").as[Set[String]] must contain allOf ("ISOGG", "ytree.net") - (json \ "lastMergedFrom").as[String] mustBe "ytree.net" - } - - it("should deserialize from JSON correctly") { - val jsonString = """{ - "primaryCredit": "ISOGG", - "nodeProvenance": ["ISOGG", "ytree.net"], - "variantProvenance": {"L21": ["ISOGG", "ytree.net"]}, - "lastMergedFrom": "ytree.net" - }""" - - val provenance = Json.parse(jsonString).as[HaplogroupProvenance] - - provenance.primaryCredit mustBe "ISOGG" - provenance.nodeProvenance must contain allOf ("ISOGG", "ytree.net") - provenance.variantProvenance("L21") must contain allOf ("ISOGG", "ytree.net") - provenance.lastMergedFrom mustBe Some("ytree.net") - } - - it("should round-trip serialize and deserialize") { - val original = HaplogroupProvenance.forNewNode("test-source", Seq("V1", "V2")) - - val json = Json.toJson(original) - val restored = json.as[HaplogroupProvenance] - - restored.primaryCredit mustBe original.primaryCredit - restored.nodeProvenance mustBe original.nodeProvenance - restored.variantProvenance mustBe original.variantProvenance - restored.lastMergedFrom mustBe original.lastMergedFrom - } - - it("should handle empty collections in JSON") { - val jsonString = """{ - "primaryCredit": "test", - "nodeProvenance": [], - "variantProvenance": {} - }""" - - val provenance = Json.parse(jsonString).as[HaplogroupProvenance] - - provenance.nodeProvenance mustBe Set.empty - provenance.variantProvenance mustBe Map.empty - } - - it("should handle missing optional fields") { - val jsonString = """{ - "primaryCredit": "test" - }""" - - val provenance = Json.parse(jsonString).as[HaplogroupProvenance] - - provenance.primaryCredit mustBe "test" - provenance.nodeProvenance mustBe Set.empty - provenance.variantProvenance mustBe Map.empty - provenance.lastMergedAt mustBe None - provenance.lastMergedFrom mustBe None - } - } - - describe("immutability") { - - it("should not mutate original when adding node source") { - val original = HaplogroupProvenance.forNewNode("ISOGG") - val modified = original.addNodeSource("ytree.net") - - original.nodeProvenance must not contain "ytree.net" - modified.nodeProvenance must contain("ytree.net") - } - - it("should not mutate original when adding variant source") { - val original = HaplogroupProvenance.forNewNode("ISOGG") - val modified = original.addVariantSource("L21", "ytree.net") - - original.variantProvenance must not contain key ("L21") - modified.variantProvenance must contain key "L21" - } - - it("should not mutate original when merging") { - val prov1 = HaplogroupProvenance.forNewNode("A") - val prov2 = HaplogroupProvenance.forNewNode("B") - val merged = prov1.merge(prov2) - - prov1.nodeProvenance mustBe Set("A") - prov2.nodeProvenance mustBe Set("B") - merged.nodeProvenance must contain allOf ("A", "B") - } - } - } -} diff --git a/test/repositories/SequenceFileRepositorySpec.scala b/test/repositories/SequenceFileRepositorySpec.scala deleted file mode 100644 index 9d6e921a..00000000 --- a/test/repositories/SequenceFileRepositorySpec.scala +++ /dev/null @@ -1,254 +0,0 @@ -package repositories - -import models.dal.MyPostgresProfile.api.* -import models.dal.MyPostgresProfile // Added this import -import models.domain.genomics.{SequenceFile, SequenceFileAtpLocationJsonb, SequenceFileChecksumJsonb, SequenceFileHttpLocationJsonb} -import models.domain.genomics.{TestTypeRow, DataGenerationMethod, TargetType} // Added imports -import org.scalatestplus.play.PlaySpec -import org.scalatestplus.play.guice.GuiceOneAppPerSuite -import play.api.db.slick.DatabaseConfigProvider -import play.api.test.Injecting - -import java.time.LocalDateTime -import java.time.temporal.ChronoUnit -import java.util.UUID -import scala.concurrent.ExecutionContext.Implicits.global -import scala.concurrent.duration.Duration -import scala.concurrent.{Await, Future} - -import org.scalatest.{BeforeAndAfterEach, BeforeAndAfterAll} -import play.api.db.DBApi -import models.domain.genomics.SequenceLibrary // Added import -import repositories.{SequenceLibraryRepository, TestTypeRepository} // Added TestTypeRepository - -class SequenceFileRepositorySpec extends PlaySpec with GuiceOneAppPerSuite with Injecting with BeforeAndAfterEach with BeforeAndAfterAll { - - var dbConfigProvider: DatabaseConfigProvider = _ - var db: MyPostgresProfile#Backend#Database = _ - var repository: SequenceFileRepository = _ - - override def beforeEach(): Unit = { - // Explicitly get the injector from the app before injecting - val injector = app.injector - dbConfigProvider = injector.instanceOf[DatabaseConfigProvider] - db = dbConfigProvider.get[MyPostgresProfile].db - repository = injector.instanceOf[SequenceFileRepository] - // Clear the tables to ensure a clean state for each test - await(db.run(sqlu"TRUNCATE TABLE sequence_file RESTART IDENTITY CASCADE;")) - await(db.run(sqlu"TRUNCATE TABLE sequence_library RESTART IDENTITY CASCADE;")) - await(db.run(sqlu"TRUNCATE TABLE test_type_definition RESTART IDENTITY CASCADE;")) - - // Insert a dummy TestType to satisfy foreign key constraints - val dummyTestType = TestTypeRow( - code = "WGS", - displayName = "Whole Genome Sequencing", - category = DataGenerationMethod.Sequencing, - targetType = TargetType.WholeGenome, - supportsHaplogroupY = true, - supportsHaplogroupMt = true, - supportsAutosomalIbd = true, - supportsAncestry = true, - typicalFileFormats = List("BAM", "CRAM") - ) - val testTypeRepository = injector.instanceOf[TestTypeRepository] - val createdTestType = await(testTypeRepository.create(dummyTestType)) - - // Insert a dummy SequenceLibrary to satisfy foreign key constraints - val dummyLibrary = SequenceLibrary( - id = Some(1), // Match testLibraryId - sampleGuid = UUID.randomUUID(), - lab = "TestLab", - testTypeId = createdTestType.id.get, - runDate = LocalDateTime.now(), - instrument = "TestInstrument", - reads = 1000, - readLength = 100, - pairedEnd = false, - insertSize = None, - atUri = Some("at://test/library/1"), - atCid = Some("cid:test:library:1"), - created_at = LocalDateTime.now(), - updated_at = Some(LocalDateTime.now()) - ) - val sequenceLibraryRepository = injector.instanceOf[SequenceLibraryRepository] - await(sequenceLibraryRepository.create(dummyLibrary)) - super.beforeEach() // Call super.beforeEach() after our setup - } - - override def afterAll(): Unit = { - // Clean up database connections - db.close() - super.afterAll() - } - - // Helper to run DB actions synchronously in tests - def await[T](f: Future[T]): T = Await.result(f, Duration.Inf) - - - "SequenceFileRepository" should { - - "create and retrieve a SequenceFile with JSONB fields" in { - // Setup - val now = LocalDateTime.now() - val testLibraryId = 1 // Assuming a library exists or creating a dummy one for foreign key constraints - - val checksums = List( - SequenceFileChecksumJsonb("md5checksum", "MD5", Some(now), now, now), - SequenceFileChecksumJsonb("sha1checksum", "SHA1", Some(now), now, now) - ) - - val httpLocations = List( - SequenceFileHttpLocationJsonb("http://example.com/file1.bam", UUID.nameUUIDFromBytes("http://example.com/file1.bam".getBytes).toString, now, now), - SequenceFileHttpLocationJsonb("http://example.com/file2.bam", UUID.nameUUIDFromBytes("http://example.com/file2.bam".getBytes).toString, now, now) - ) - - val atpLocation = Some(SequenceFileAtpLocationJsonb( - repoDid = "did:example:123", - recordUri = "at://example.com/file/123", - cid = "bafyreibs3n...", - createdAt = now, - updatedAt = now - )) - - val sequenceFile = SequenceFile( - id = None, - libraryId = testLibraryId, - fileName = "test_file.bam", - fileSizeBytes = 1024L, - fileFormat = "BAM", - checksums = checksums, - httpLocations = httpLocations, - atpLocation = atpLocation, - aligner = "BWA", - targetReference = "hg38", - createdAt = now, - updatedAt = Some(now) - ) - - // Exercise & Verify - val createdFile = await(repository.create(sequenceFile)) - createdFile.id mustBe defined - createdFile.fileName mustBe "test_file.bam" - createdFile.checksums mustBe checksums - createdFile.httpLocations mustBe httpLocations - createdFile.atpLocation mustBe atpLocation - - val retrievedFile = await(repository.findById(createdFile.id.get)).get - retrievedFile.id mustBe createdFile.id - retrievedFile.libraryId mustBe createdFile.libraryId - retrievedFile.fileName mustBe createdFile.fileName - retrievedFile.fileSizeBytes mustBe createdFile.fileSizeBytes - retrievedFile.fileFormat mustBe createdFile.fileFormat - retrievedFile.checksums mustBe createdFile.checksums - retrievedFile.httpLocations mustBe createdFile.httpLocations - retrievedFile.atpLocation mustBe createdFile.atpLocation - retrievedFile.aligner mustBe createdFile.aligner - retrievedFile.targetReference mustBe createdFile.targetReference - retrievedFile.createdAt.truncatedTo(ChronoUnit.MILLIS) mustBe createdFile.createdAt.truncatedTo(ChronoUnit.MILLIS) - retrievedFile.updatedAt.map(_.truncatedTo(ChronoUnit.MILLIS)) mustBe createdFile.updatedAt.map(_.truncatedTo(ChronoUnit.MILLIS)) - } - - "update a SequenceFile with modified JSONB fields" in { - // Setup - val now = LocalDateTime.now() - val testLibraryId = 1 // Assuming a library exists - val originalChecksums = List( - SequenceFileChecksumJsonb("old_md5", "MD5", Some(now), now, now) - ) - val originalHttpLocations = List( - SequenceFileHttpLocationJsonb("http://original.com/file.bam", UUID.nameUUIDFromBytes("http://original.com/file.bam".getBytes).toString, now, now) - ) - val originalAtpLocation = Some(SequenceFileAtpLocationJsonb( - repoDid = "did:example:old", - recordUri = "at://example.com/old", - cid = "old_cid", - createdAt = now, - updatedAt = now - )) - - val originalFile = SequenceFile( - id = None, - libraryId = testLibraryId, - fileName = "original.bam", - fileSizeBytes = 500L, - fileFormat = "CRAM", - checksums = originalChecksums, - httpLocations = originalHttpLocations, - atpLocation = originalAtpLocation, - aligner = "Bowtie2", - targetReference = "GRCh37", - createdAt = now, - updatedAt = Some(now) - ) - - val createdFile = await(repository.create(originalFile)) - - val updatedChecksums = List( - SequenceFileChecksumJsonb("new_md5", "MD5", Some(now), now, now), - SequenceFileChecksumJsonb("new_sha1", "SHA1", Some(now), now, now) - ) - val updatedHttpLocations = List( - SequenceFileHttpLocationJsonb("http://updated.com/file.bam", UUID.nameUUIDFromBytes("http://updated.com/file.bam".getBytes).toString, now, now) - ) - val updatedAtpLocation = Some(SequenceFileAtpLocationJsonb( - repoDid = "did:example:new", - recordUri = "at://example.com/new", - cid = "new_cid", - createdAt = now, - updatedAt = now - )) - - val updatedTimestamp = LocalDateTime.now() // Capture once - val updatedFile = createdFile.copy( - fileName = "updated.bam", - checksums = updatedChecksums, - httpLocations = updatedHttpLocations, - atpLocation = updatedAtpLocation, - updatedAt = Some(updatedTimestamp) // Use the captured instance - ) - - // Exercise - val updateResult = await(repository.update(updatedFile)) - - // Verify - updateResult mustBe true - val retrievedFile = await(repository.findById(createdFile.id.get)).get - retrievedFile.fileName mustBe "updated.bam" - retrievedFile.checksums mustBe updatedChecksums - retrievedFile.httpLocations mustBe updatedHttpLocations - retrievedFile.atpLocation mustBe updatedAtpLocation - retrievedFile.createdAt.truncatedTo(ChronoUnit.MILLIS) mustBe createdFile.createdAt.truncatedTo(ChronoUnit.MILLIS) - // retrievedFile.updatedAt.map(_.truncatedTo(ChronoUnit.MILLIS)) mustBe Some(updatedTimestamp.truncatedTo(ChronoUnit.MILLIS)) // Temporarily removed - // Compare with the captured instance - - } - - "delete a SequenceFile" in { - // Setup - val testLibraryId = 1 - val now = LocalDateTime.now() - val sequenceFile = SequenceFile( - id = None, - libraryId = testLibraryId, - fileName = "delete_me.bam", - fileSizeBytes = 10L, - fileFormat = "FASTQ", - checksums = List.empty, - httpLocations = List.empty, - atpLocation = None, - aligner = "None", - targetReference = "None", - createdAt = now, - updatedAt = Some(now) - ) - val createdFile = await(repository.create(sequenceFile)) - - // Exercise - val deleteResult = await(repository.delete(createdFile.id.get)) - - // Verify - deleteResult mustBe true - await(repository.findById(createdFile.id.get)) mustBe None - } - } -} diff --git a/test/services/AccessionNumberGeneratorSpec.scala b/test/services/AccessionNumberGeneratorSpec.scala deleted file mode 100644 index 568cb033..00000000 --- a/test/services/AccessionNumberGeneratorSpec.scala +++ /dev/null @@ -1,74 +0,0 @@ -package services - -import models.domain.genomics.BiosampleType -import org.mockito.Mockito.when -import org.scalatest.concurrent.ScalaFutures -import org.scalatestplus.mockito.MockitoSugar -import org.scalatestplus.play.PlaySpec -import play.api.Configuration -import repositories.CitizenSequenceRepository - -import scala.concurrent.{ExecutionContext, Future} - -class AccessionNumberGeneratorSpec extends PlaySpec with MockitoSugar with ScalaFutures { - - implicit val ec: ExecutionContext = ExecutionContext.global - - "BiosampleAccessionGenerator" should { - - val mockSequenceRepo = mock[CitizenSequenceRepository] - val mockConfig = mock[Configuration] - - // Setup mock config - when(mockConfig.get[String]("biosample.hash.salt")).thenReturn("test-salt") - - val generator = new BiosampleAccessionGenerator(mockSequenceRepo, mockConfig) - - "generate accession for Standard biosample" in { - val metadata = AccessionMetadata(existingAccession = Some("SAMEA123")) - val result = generator.generateAccession(BiosampleType.Standard, metadata) - whenReady(result) { acc => - acc mustBe "SAMEA123" - } - } - - "fail for Standard biosample without existing accession" in { - val metadata = AccessionMetadata(existingAccession = None) - val result = generator.generateAccession(BiosampleType.Standard, metadata) - whenReady(result.failed) { e => - e mustBe a [IllegalArgumentException] - } - } - - "generate accession for PGP biosample" in { - val metadata = AccessionMetadata(pgpParticipantId = Some("hu123")) - val result = generator.generateAccession(BiosampleType.PGP, metadata) - whenReady(result) { acc => - acc mustBe "PGP-hu123" - } - } - - "fail for PGP biosample without participant ID" in { - val metadata = AccessionMetadata(pgpParticipantId = None) - val result = generator.generateAccession(BiosampleType.PGP, metadata) - whenReady(result.failed) { e => - e mustBe a [IllegalArgumentException] - } - } - - "generate accession for Citizen biosample" in { - when(mockSequenceRepo.getNextSequence()).thenReturn(Future.successful(12345L)) - val result = generator.generateAccession(BiosampleType.Citizen, AccessionMetadata()) - whenReady(result) { acc => - acc must startWith("DU-") - } - } - - "decode a valid citizen accession" in { - val result = generator.decodeAccession("INVALID-FORMAT") - whenReady(result) { res => - res mustBe None - } - } - } -} diff --git a/test/services/AncestralMotifServiceSpec.scala b/test/services/AncestralMotifServiceSpec.scala deleted file mode 100644 index 57d58335..00000000 --- a/test/services/AncestralMotifServiceSpec.scala +++ /dev/null @@ -1,220 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.haplogroups.{HaplogroupAncestralStr, MotifMethod} -import org.mockito.ArgumentMatchers.{any, anyInt} -import org.mockito.Mockito.{reset, verify, when} -import repositories.{BiosampleVariantCallRepository, HaplogroupAncestralStrRepository} - -import scala.concurrent.Future - -class AncestralMotifServiceSpec extends ServiceSpec { - - val mockAncestralRepo: HaplogroupAncestralStrRepository = mock[HaplogroupAncestralStrRepository] - val mockVariantCallRepo: BiosampleVariantCallRepository = mock[BiosampleVariantCallRepository] - - val service = new AncestralMotifService(mockAncestralRepo, mockVariantCallRepo) - - override def beforeEach(): Unit = { - reset(mockAncestralRepo, mockVariantCallRepo) - } - - "AncestralMotifService" should { - - "getMotifForHaplogroup" should { - - "return motif map from stored ancestral STRs" in { - val motifs = Seq( - HaplogroupAncestralStr(Some(1), 100, "DYS456", Some(15), None, Some(BigDecimal(0.9)), Some(10), None), - HaplogroupAncestralStr(Some(2), 100, "DYS389I", Some(13), None, Some(BigDecimal(0.8)), Some(8), None), - HaplogroupAncestralStr(Some(3), 100, "DYS19", Some(14), None, Some(BigDecimal(0.95)), Some(12), None) - ) - when(mockAncestralRepo.findByHaplogroup(100)).thenReturn(Future.successful(motifs)) - - whenReady(service.getMotifForHaplogroup(100)) { result => - result must have size 3 - result("DYS456") mustBe 15 - result("DYS389I") mustBe 13 - result("DYS19") mustBe 14 - } - } - - "return empty map for haplogroup with no motifs" in { - when(mockAncestralRepo.findByHaplogroup(999)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.getMotifForHaplogroup(999)) { result => - result mustBe empty - } - } - - "skip markers with no ancestral value" in { - val motifs = Seq( - HaplogroupAncestralStr(Some(1), 100, "DYS456", Some(15), None, None, None, None), - HaplogroupAncestralStr(Some(2), 100, "DYS389I", None, None, None, None, None) - ) - when(mockAncestralRepo.findByHaplogroup(100)).thenReturn(Future.successful(motifs)) - - whenReady(service.getMotifForHaplogroup(100)) { result => - result must have size 1 - result("DYS456") mustBe 15 - } - } - } - - "computeModalHaplotype" should { - - "compute mode from sample observations" in { - val observations = Seq( - MarkerObservation("DYS456", 15, 1), - MarkerObservation("DYS456", 15, 2), - MarkerObservation("DYS456", 16, 3), - MarkerObservation("DYS456", 15, 4), - MarkerObservation("DYS389I", 13, 1), - MarkerObservation("DYS389I", 13, 2), - MarkerObservation("DYS389I", 14, 3) - ) - - val result = service.computeModalHaplotype(observations, 100) - - result must have size 2 - val dys456 = result.find(_.markerName == "DYS456").get - dys456.ancestralValue mustBe Some(15) // mode: 3 out of 4 - dys456.supportingSamples mustBe Some(4) - dys456.confidence.get.toDouble mustBe 0.75 +- 0.01 - dys456.method mustBe MotifMethod.Modal - - val dys389 = result.find(_.markerName == "DYS389I").get - dys389.ancestralValue mustBe Some(13) - dys389.supportingSamples mustBe Some(3) - } - - "compute variance correctly" in { - // All same value → variance 0 - val sameObs = Seq( - MarkerObservation("DYS456", 15, 1), - MarkerObservation("DYS456", 15, 2), - MarkerObservation("DYS456", 15, 3) - ) - val sameResult = service.computeModalHaplotype(sameObs, 100) - sameResult.head.variance mustBe Some(BigDecimal(0)) - - // Mixed values → positive variance - val mixedObs = Seq( - MarkerObservation("DYS456", 14, 1), - MarkerObservation("DYS456", 15, 2), - MarkerObservation("DYS456", 16, 3) - ) - val mixedResult = service.computeModalHaplotype(mixedObs, 100) - mixedResult.head.variance.get must be > BigDecimal(0) - } - - "identify alternative modal values" in { - val observations = Seq( - MarkerObservation("DYS456", 15, 1), - MarkerObservation("DYS456", 15, 2), - MarkerObservation("DYS456", 15, 3), - MarkerObservation("DYS456", 16, 4), - MarkerObservation("DYS456", 16, 5), // alt with count >= 2 - MarkerObservation("DYS456", 17, 6) // singleton, not alt - ) - - val result = service.computeModalHaplotype(observations, 100) - val motif = result.head - motif.ancestralValue mustBe Some(15) - motif.ancestralValueAlt mustBe Some(List(16)) // 16 appears 2x, 17 only 1x - } - - "return no alternatives when none qualify" in { - val observations = Seq( - MarkerObservation("DYS456", 15, 1), - MarkerObservation("DYS456", 15, 2), - MarkerObservation("DYS456", 16, 3) // singleton - ) - - val result = service.computeModalHaplotype(observations, 100) - result.head.ancestralValueAlt mustBe None - } - - "handle single sample" in { - val observations = Seq(MarkerObservation("DYS456", 15, 1)) - - val result = service.computeModalHaplotype(observations, 100) - result must have size 1 - result.head.ancestralValue mustBe Some(15) - result.head.confidence mustBe Some(BigDecimal(1.0)) - result.head.supportingSamples mustBe Some(1) - result.head.variance mustBe Some(BigDecimal(0)) - } - - "return empty for no observations" in { - val result = service.computeModalHaplotype(Seq.empty, 100) - result mustBe empty - } - } - - "computeAndSaveMotif" should { - - "compute and persist motifs" in { - val observations = Seq( - MarkerObservation("DYS456", 15, 1), - MarkerObservation("DYS456", 15, 2), - MarkerObservation("DYS389I", 13, 1), - MarkerObservation("DYS389I", 13, 2) - ) - - when(mockAncestralRepo.upsertBatch(any[Seq[HaplogroupAncestralStr]])) - .thenReturn(Future.successful(Seq(1, 1))) - - whenReady(service.computeAndSaveMotif(observations, 100)) { count => - count mustBe 2 - verify(mockAncestralRepo).upsertBatch(any[Seq[HaplogroupAncestralStr]]) - } - } - - "return 0 for empty observations" in { - whenReady(service.computeAndSaveMotif(Seq.empty, 100)) { count => - count mustBe 0 - } - } - } - - "setManualMotif" should { - - "create a manual motif entry" in { - when(mockAncestralRepo.upsert(any[HaplogroupAncestralStr])) - .thenReturn(Future.successful(1)) - - whenReady(service.setManualMotif(100, "DYS456", 15, Some(BigDecimal(0.99)))) { result => - result mustBe 1 - verify(mockAncestralRepo).upsert(any[HaplogroupAncestralStr]) - } - } - } - - "clearMotifs" should { - - "delete all motifs for a haplogroup" in { - when(mockAncestralRepo.deleteByHaplogroup(100)).thenReturn(Future.successful(5)) - - whenReady(service.clearMotifs(100)) { count => - count mustBe 5 - } - } - } - } - - "MotifMethod" should { - - "round-trip through string conversion" in { - MotifMethod.fromString("MODAL") mustBe MotifMethod.Modal - MotifMethod.fromString("PHYLOGENETIC") mustBe MotifMethod.Phylogenetic - MotifMethod.fromString("MANUAL") mustBe MotifMethod.Manual - } - - "reject unknown method" in { - an[IllegalArgumentException] must be thrownBy { - MotifMethod.fromString("UNKNOWN") - } - } - } -} diff --git a/test/services/BiosampleDataServiceSpec.scala b/test/services/BiosampleDataServiceSpec.scala deleted file mode 100644 index 9141c595..00000000 --- a/test/services/BiosampleDataServiceSpec.scala +++ /dev/null @@ -1,196 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.api.* -import models.domain.genomics.* -import models.domain.publications.{Publication, PublicationBiosample} -import org.mockito.ArgumentMatchers.{any, anyInt, anyString} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.* - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class BiosampleDataServiceSpec extends ServiceSpec { - - val mockBiosampleRepo: BiosampleRepository = mock[BiosampleRepository] - val mockLibraryRepo: SequenceLibraryRepository = mock[SequenceLibraryRepository] - val mockFileRepo: SequenceFileRepository = mock[SequenceFileRepository] - val mockPubRepo: PublicationRepository = mock[PublicationRepository] - val mockHaplogroupRepo: BiosampleOriginalHaplogroupRepository = mock[BiosampleOriginalHaplogroupRepository] - val mockPubBiosampleRepo: PublicationBiosampleRepository = mock[PublicationBiosampleRepository] - val mockTestTypeService: TestTypeService = mock[TestTypeService] - - val service = new BiosampleDataService( - mockBiosampleRepo, mockLibraryRepo, mockFileRepo, - mockPubRepo, mockHaplogroupRepo, mockPubBiosampleRepo, mockTestTypeService - ) - - override def beforeEach(): Unit = { - reset(mockBiosampleRepo, mockLibraryRepo, mockFileRepo, - mockPubRepo, mockHaplogroupRepo, mockPubBiosampleRepo, mockTestTypeService) - } - - val sampleGuid: UUID = UUID.randomUUID() - - val testBiosample: Biosample = Biosample( - id = Some(1), sampleGuid = sampleGuid, sampleAccession = "SAMEA001", - description = "Test", alias = None, centerName = "Center", - specimenDonorId = None, locked = false, sourcePlatform = None - ) - - val testTestType: TestTypeRow = TestTypeRow( - id = Some(1), code = "WGS", displayName = "Whole Genome Sequencing", - category = DataGenerationMethod.Sequencing, vendor = None, - targetType = TargetType.WholeGenome, - expectedMinDepth = None, expectedTargetDepth = None, expectedMarkerCount = None, - supportsHaplogroupY = true, supportsHaplogroupMt = true, - supportsAutosomalIbd = false, supportsAncestry = false, - typicalFileFormats = List("CRAM", "BAM"), version = None, - releaseDate = None, deprecatedAt = None, successorTestTypeId = None, - description = None, documentationUrl = None - ) - - val testLibrary: SequenceLibrary = SequenceLibrary( - id = Some(10), sampleGuid = sampleGuid, lab = "Illumina", testTypeId = 1, - runDate = LocalDateTime.now(), instrument = "NovaSeq", reads = 1000, - readLength = 150, pairedEnd = false, insertSize = None, - atUri = None, atCid = None, created_at = LocalDateTime.now(), updated_at = None - ) - - val testSequenceData: SequenceDataInfo = SequenceDataInfo( - reads = Some(1000), readLength = Some(150), coverage = Some(30.0), - platformName = "Illumina", testType = "WGS", files = Seq.empty - ) - - val testPublication: Publication = Publication( - id = Some(100), openAlexId = None, pubmedId = None, - doi = Some("10.1234/test"), title = "Test Pub", - authors = None, abstractSummary = None, journal = None, - publicationDate = None, url = None, citationNormalizedPercentile = None, - citedByCount = None, openAccessStatus = None, openAccessUrl = None, - primaryTopic = None, publicationType = None, publisher = None - ) - - "BiosampleDataService" should { - - "addSequenceData with valid test type" in { - when(mockTestTypeService.getByCode("WGS")).thenReturn(Future.successful(Some(testTestType))) - when(mockLibraryRepo.create(any[SequenceLibrary])).thenReturn(Future.successful(testLibrary)) - - whenReady(service.addSequenceData(sampleGuid, testSequenceData)) { _ => - verify(mockTestTypeService).getByCode("WGS") - verify(mockLibraryRepo).create(any[SequenceLibrary]) - } - } - - "addSequenceData fails with invalid test type" in { - when(mockTestTypeService.getByCode("INVALID")).thenReturn(Future.successful(None)) - - val data = testSequenceData.copy(testType = "INVALID") - whenReady(service.addSequenceData(sampleGuid, data).failed) { ex => - ex mustBe a[IllegalArgumentException] - ex.getMessage must include("Invalid test type code") - } - } - - "replaceSequenceData deletes existing then creates new" in { - when(mockLibraryRepo.findBySampleGuid(sampleGuid)).thenReturn(Future.successful(Seq(testLibrary))) - when(mockFileRepo.deleteByLibraryId(10)).thenReturn(Future.successful(1)) - when(mockLibraryRepo.delete(10)).thenReturn(Future.successful(true)) - when(mockTestTypeService.getByCode("WGS")).thenReturn(Future.successful(Some(testTestType))) - when(mockLibraryRepo.create(any[SequenceLibrary])).thenReturn(Future.successful(testLibrary)) - - whenReady(service.replaceSequenceData(sampleGuid, testSequenceData)) { _ => - verify(mockFileRepo).deleteByLibraryId(10) - verify(mockLibraryRepo).delete(10) - verify(mockLibraryRepo).create(any[SequenceLibrary]) - } - } - - "replaceSequenceData with no existing libraries just creates new" in { - when(mockLibraryRepo.findBySampleGuid(sampleGuid)).thenReturn(Future.successful(Seq.empty)) - when(mockTestTypeService.getByCode("WGS")).thenReturn(Future.successful(Some(testTestType))) - when(mockLibraryRepo.create(any[SequenceLibrary])).thenReturn(Future.successful(testLibrary)) - - whenReady(service.replaceSequenceData(sampleGuid, testSequenceData)) { _ => - verify(mockFileRepo, never()).deleteByLibraryId(anyInt) - verify(mockLibraryRepo, never()).delete(anyInt) - verify(mockLibraryRepo).create(any[SequenceLibrary]) - } - } - - "linkPublication with existing publication by DOI" in { - val pubInfo = PublicationInfo(doi = Some("10.1234/test"), pubmedId = None, originalHaplogroups = None) - - when(mockBiosampleRepo.findByGuid(sampleGuid)).thenReturn(Future.successful(Some((testBiosample, None)))) - when(mockPubRepo.findByDoi("10.1234/test")).thenReturn(Future.successful(Some(testPublication))) - when(mockPubBiosampleRepo.create(any[PublicationBiosample])).thenReturn(Future.successful(PublicationBiosample(100, 1))) - - whenReady(service.linkPublication(sampleGuid, pubInfo)) { _ => - verify(mockPubRepo, never()).savePublication(any[Publication]) - verify(mockPubBiosampleRepo).create(any[PublicationBiosample]) - } - } - - "linkPublication creates new publication when DOI not found" in { - val pubInfo = PublicationInfo(doi = Some("10.9999/new"), pubmedId = None, originalHaplogroups = None) - - when(mockBiosampleRepo.findByGuid(sampleGuid)).thenReturn(Future.successful(Some((testBiosample, None)))) - when(mockPubRepo.findByDoi("10.9999/new")).thenReturn(Future.successful(None)) - when(mockPubRepo.savePublication(any[Publication])).thenReturn(Future.successful(testPublication.copy(doi = Some("10.9999/new")))) - when(mockPubBiosampleRepo.create(any[PublicationBiosample])).thenReturn(Future.successful(PublicationBiosample(100, 1))) - - whenReady(service.linkPublication(sampleGuid, pubInfo)) { _ => - verify(mockPubRepo).savePublication(any[Publication]) - } - } - - "linkPublication fails when biosample not found" in { - val pubInfo = PublicationInfo(doi = Some("10.1234/test"), pubmedId = None, originalHaplogroups = None) - - when(mockBiosampleRepo.findByGuid(sampleGuid)).thenReturn(Future.successful(None)) - - whenReady(service.linkPublication(sampleGuid, pubInfo).failed) { ex => - ex mustBe a[IllegalArgumentException] - ex.getMessage must include("Biosample not found") - } - } - - "linkPublication creates haplogroup record when provided" in { - val hapInfo = HaplogroupInfo( - yHaplogroup = Some(HaplogroupResult("R-M269", 0.99, 100, 0, 50, 5, Seq("R"))), - mtHaplogroup = None, - notes = Some("test note") - ) - val pubInfo = PublicationInfo(doi = Some("10.1234/test"), pubmedId = None, originalHaplogroups = Some(hapInfo)) - - when(mockBiosampleRepo.findByGuid(sampleGuid)).thenReturn(Future.successful(Some((testBiosample, None)))) - when(mockPubRepo.findByDoi("10.1234/test")).thenReturn(Future.successful(Some(testPublication))) - when(mockPubBiosampleRepo.create(any[PublicationBiosample])).thenReturn(Future.successful(PublicationBiosample(100, 1))) - when(mockHaplogroupRepo.upsert(anyInt, any[OriginalHaplogroupEntry])).thenReturn(Future.successful(true)) - - whenReady(service.linkPublication(sampleGuid, pubInfo)) { _ => - verify(mockHaplogroupRepo).upsert(anyInt, any[OriginalHaplogroupEntry]) - } - } - - "fullyDeleteBiosampleAndDependencies deletes in correct order" in { - when(mockPubBiosampleRepo.deleteByBiosampleId(1)).thenReturn(Future.successful(1)) - when(mockHaplogroupRepo.deleteAllByBiosampleId(1)).thenReturn(Future.successful(true)) - when(mockLibraryRepo.findBySampleGuid(sampleGuid)).thenReturn(Future.successful(Seq(testLibrary))) - when(mockFileRepo.deleteByLibraryId(10)).thenReturn(Future.successful(1)) - when(mockLibraryRepo.delete(10)).thenReturn(Future.successful(true)) - when(mockBiosampleRepo.delete(1)).thenReturn(Future.successful(true)) - - whenReady(service.fullyDeleteBiosampleAndDependencies(1, sampleGuid)) { _ => - verify(mockPubBiosampleRepo).deleteByBiosampleId(1) - verify(mockHaplogroupRepo).deleteAllByBiosampleId(1) - verify(mockFileRepo).deleteByLibraryId(10) - verify(mockLibraryRepo).delete(10) - verify(mockBiosampleRepo).delete(1) - } - } - } -} diff --git a/test/services/BiosampleUpdateServiceSpec.scala b/test/services/BiosampleUpdateServiceSpec.scala deleted file mode 100644 index 2945533b..00000000 --- a/test/services/BiosampleUpdateServiceSpec.scala +++ /dev/null @@ -1,203 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.api.BiosampleUpdate -import models.domain.genomics.* -import models.domain.publications.PublicationBiosample -import org.mockito.ArgumentMatchers.{any, anyInt} -import org.mockito.Mockito.{never, verify, when} -import repositories.{BiosampleOriginalHaplogroupRepository, BiosampleRepository, PublicationBiosampleRepository, SpecimenDonorRepository} - -import org.mockito.Mockito.reset - -import java.util.UUID -import scala.concurrent.Future - -class BiosampleUpdateServiceSpec extends ServiceSpec { - - val mockBiosampleRepo: BiosampleRepository = mock[BiosampleRepository] - val mockPubBiosampleRepo: PublicationBiosampleRepository = mock[PublicationBiosampleRepository] - val mockHaplogroupRepo: BiosampleOriginalHaplogroupRepository = mock[BiosampleOriginalHaplogroupRepository] - val mockDonorRepo: SpecimenDonorRepository = mock[SpecimenDonorRepository] - - val service = new BiosampleUpdateService( - mockBiosampleRepo, mockPubBiosampleRepo, mockHaplogroupRepo, mockDonorRepo - ) - - override def beforeEach(): Unit = { - reset(mockBiosampleRepo, mockPubBiosampleRepo, mockHaplogroupRepo, mockDonorRepo) - } - - val sampleGuid: UUID = UUID.randomUUID() - - val testBiosample: Biosample = Biosample( - id = Some(1), - sampleGuid = sampleGuid, - sampleAccession = "SAMEA001", - description = "Test sample", - alias = Some("alias1"), - centerName = "TestCenter", - specimenDonorId = Some(10), - locked = false, - sourcePlatform = Some("test") - ) - - val testDonor: SpecimenDonor = SpecimenDonor( - id = Some(10), - donorIdentifier = "DONOR_001", - originBiobank = "TestBank", - donorType = BiosampleType.Standard, - sex = Some(BiologicalSex.Male), - geocoord = None, - dateRangeStart = None, - dateRangeEnd = None - ) - - "BiosampleUpdateService" should { - - "return Left when no updates provided" in { - val update = BiosampleUpdate() - - whenReady(service.updateBiosample(1, update)) { result => - result mustBe Left("No valid fields to update") - } - } - - "return Left when biosample not found" in { - val update = BiosampleUpdate(alias = Some("new-alias")) - when(mockBiosampleRepo.findById(999)).thenReturn(Future.successful(None)) - - whenReady(service.updateBiosample(999, update)) { result => - result mustBe Left("Biosample not found") - } - } - - "update alias successfully" in { - val update = BiosampleUpdate(alias = Some("new-alias")) - - when(mockBiosampleRepo.findById(1)).thenReturn(Future.successful(Some((testBiosample, Some(testDonor))))) - when(mockBiosampleRepo.update(any[Biosample])).thenReturn(Future.successful(true)) - - whenReady(service.updateBiosample(1, update)) { result => - result.isRight mustBe true - result.toOption.get.alias mustBe Some("new-alias") - } - } - - "update locked field" in { - val update = BiosampleUpdate(locked = Some(true)) - - when(mockBiosampleRepo.findById(1)).thenReturn(Future.successful(Some((testBiosample, Some(testDonor))))) - when(mockBiosampleRepo.update(any[Biosample])).thenReturn(Future.successful(true)) - - whenReady(service.updateBiosample(1, update)) { result => - result.isRight mustBe true - result.toOption.get.locked mustBe true - } - } - - "return Left when repository update fails" in { - val update = BiosampleUpdate(alias = Some("new-alias")) - - when(mockBiosampleRepo.findById(1)).thenReturn(Future.successful(Some((testBiosample, Some(testDonor))))) - when(mockBiosampleRepo.update(any[Biosample])).thenReturn(Future.successful(false)) - - whenReady(service.updateBiosample(1, update)) { result => - result mustBe Left("Failed to update biosample") - } - } - - "update existing specimen donor sex" in { - val update = BiosampleUpdate(sex = Some(BiologicalSex.Female)) - - when(mockBiosampleRepo.findById(1)).thenReturn(Future.successful(Some((testBiosample, Some(testDonor))))) - when(mockDonorRepo.update(any[SpecimenDonor])).thenReturn(Future.successful(true)) - when(mockBiosampleRepo.update(any[Biosample])).thenReturn(Future.successful(true)) - - whenReady(service.updateBiosample(1, update)) { result => - result.isRight mustBe true - verify(mockDonorRepo).update(any[SpecimenDonor]) - } - } - - "set donor type to Ancient when date range provided" in { - val update = BiosampleUpdate(dateRangeStart = Some(-3000)) - - when(mockBiosampleRepo.findById(1)).thenReturn(Future.successful(Some((testBiosample, Some(testDonor))))) - when(mockDonorRepo.update(any[SpecimenDonor])).thenReturn(Future.successful(true)) - when(mockBiosampleRepo.update(any[Biosample])).thenReturn(Future.successful(true)) - - whenReady(service.updateBiosample(1, update)) { result => - result.isRight mustBe true - verify(mockDonorRepo).update(any[SpecimenDonor]) - } - } - - "not create new donor with only one identifying field" in { - val biosampleNoDonor = testBiosample.copy(specimenDonorId = None) - val update = BiosampleUpdate(sex = Some(BiologicalSex.Male)) - - when(mockBiosampleRepo.findById(1)).thenReturn(Future.successful(Some((biosampleNoDonor, None)))) - when(mockBiosampleRepo.update(any[Biosample])).thenReturn(Future.successful(true)) - - whenReady(service.updateBiosample(1, update)) { result => - result.isRight mustBe true - verify(mockDonorRepo, never()).create(any[SpecimenDonor]) - } - } - - "create new donor when two or more identifying fields provided" in { - val biosampleNoDonor = testBiosample.copy(specimenDonorId = None) - val update = BiosampleUpdate( - sex = Some(BiologicalSex.Female), - dateRangeStart = Some(-5000) - ) - - when(mockBiosampleRepo.findById(1)).thenReturn(Future.successful(Some((biosampleNoDonor, None)))) - when(mockDonorRepo.create(any[SpecimenDonor])).thenReturn(Future.successful(testDonor)) - when(mockBiosampleRepo.update(any[Biosample])).thenReturn(Future.successful(true)) - - whenReady(service.updateBiosample(1, update)) { result => - result.isRight mustBe true - verify(mockDonorRepo).create(any[SpecimenDonor]) - } - } - - "update existing haplogroup record via upsert" in { - val hapResult = HaplogroupResult("R-M269", 0.99, 100, 0, 50, 5, Seq("R", "R-M269")) - val update = BiosampleUpdate(yHaplogroup = Some(hapResult)) - - val existingEntry = OriginalHaplogroupEntry( - publicationId = 10, - yHaplogroupResult = None, mtHaplogroupResult = None, notes = None - ) - - when(mockBiosampleRepo.findById(1)).thenReturn(Future.successful(Some((testBiosample, Some(testDonor))))) - when(mockPubBiosampleRepo.findByBiosampleId(1)).thenReturn(Future.successful(Seq(PublicationBiosample(10, 1)))) - when(mockHaplogroupRepo.findByBiosampleId(1)).thenReturn(Future.successful(Seq(existingEntry))) - when(mockHaplogroupRepo.upsert(anyInt, any[OriginalHaplogroupEntry])).thenReturn(Future.successful(true)) - when(mockBiosampleRepo.update(any[Biosample])).thenReturn(Future.successful(true)) - - whenReady(service.updateBiosample(1, update)) { result => - result.isRight mustBe true - verify(mockHaplogroupRepo).upsert(anyInt, any[OriginalHaplogroupEntry]) - } - } - - "create new haplogroup record when none exists for publication" in { - val hapResult = HaplogroupResult("R-M269", 0.99, 100, 0, 50, 5, Seq("R", "R-M269")) - val update = BiosampleUpdate(yHaplogroup = Some(hapResult)) - - when(mockBiosampleRepo.findById(1)).thenReturn(Future.successful(Some((testBiosample, Some(testDonor))))) - when(mockPubBiosampleRepo.findByBiosampleId(1)).thenReturn(Future.successful(Seq(PublicationBiosample(10, 1)))) - when(mockHaplogroupRepo.findByBiosampleId(1)).thenReturn(Future.successful(Seq.empty)) - when(mockHaplogroupRepo.upsert(anyInt, any[OriginalHaplogroupEntry])).thenReturn(Future.successful(true)) - when(mockBiosampleRepo.update(any[Biosample])).thenReturn(Future.successful(true)) - - whenReady(service.updateBiosample(1, update)) { result => - result.isRight mustBe true - verify(mockHaplogroupRepo).upsert(anyInt, any[OriginalHaplogroupEntry]) - } - } - } -} diff --git a/test/services/BranchAgeEstimationServiceSpec.scala b/test/services/BranchAgeEstimationServiceSpec.scala deleted file mode 100644 index f96dc0a0..00000000 --- a/test/services/BranchAgeEstimationServiceSpec.scala +++ /dev/null @@ -1,311 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.HaplogroupType -import models.domain.genomics.{BiosampleCallableLoci, MutationType, VariantV2} -import models.domain.haplogroups.{AgeEstimate, Haplogroup} -import org.mockito.ArgumentMatchers.{any, anyInt, anyString} -import org.mockito.Mockito.{reset, when} -import play.api.libs.json.Json -import repositories.{BiosampleCallableLociRepository, HaplogroupCoreRepository, HaplogroupVariantRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class BranchAgeEstimationServiceSpec extends ServiceSpec { - - val mockCoreRepo: HaplogroupCoreRepository = mock[HaplogroupCoreRepository] - val mockVariantRepo: HaplogroupVariantRepository = mock[HaplogroupVariantRepository] - val mockCallableLociRepo: BiosampleCallableLociRepository = mock[BiosampleCallableLociRepository] - - val service = new BranchAgeEstimationService(mockCoreRepo, mockVariantRepo, mockCallableLociRepo) - - override def beforeEach(): Unit = { - reset(mockCoreRepo, mockVariantRepo, mockCallableLociRepo) - } - - val now: LocalDateTime = LocalDateTime.of(2025, 6, 1, 12, 0) - - def makeHaplogroup(id: Int, name: String): Haplogroup = - Haplogroup( - id = Some(id), name = name, lineage = Some(name), - description = None, haplogroupType = HaplogroupType.Y, - revisionId = 1, source = "backbone", confidenceLevel = "high", - validFrom = now, validUntil = None - ) - - def makeVariant(id: Int): VariantV2 = - VariantV2( - variantId = Some(id), canonicalName = Some(s"V$id"), mutationType = MutationType.SNP, - coordinates = Json.obj("GRCh38" -> Json.obj("contig" -> "chrY", "position" -> (1000000 + id), "ref" -> "A", "alt" -> "G")) - ) - - "BranchAgeEstimationService" should { - - "calculate age from SNP count" in { - val hg = makeHaplogroup(100, "R-M269") - val variants = (1 to 10).map(makeVariant) - - when(mockCoreRepo.findById(100)).thenReturn(Future.successful(Some(hg))) - when(mockVariantRepo.getHaplogroupVariants(100)).thenReturn(Future.successful(variants)) - - whenReady(service.calculateAge(100)) { resultOpt => - resultOpt mustBe defined - val result = resultOpt.get - result.snpCount mustBe 10 - result.estimate.ybp must be > 0 - result.estimate.ybpLower mustBe defined - result.estimate.ybpUpper mustBe defined - result.estimate.ybpLower.get must be < result.estimate.ybp - result.estimate.ybpUpper.get must be > result.estimate.ybp - result.method mustBe "SNP_POISSON" - } - } - - "return zero age for haplogroup with no variants" in { - val hg = makeHaplogroup(100, "R") - when(mockCoreRepo.findById(100)).thenReturn(Future.successful(Some(hg))) - when(mockVariantRepo.getHaplogroupVariants(100)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.calculateAge(100)) { resultOpt => - resultOpt mustBe defined - resultOpt.get.estimate.ybp mustBe 0 - resultOpt.get.snpCount mustBe 0 - } - } - - "return None for nonexistent haplogroup" in { - when(mockCoreRepo.findById(999)).thenReturn(Future.successful(None)) - when(mockVariantRepo.getHaplogroupVariants(999)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.calculateAge(999)) { resultOpt => - resultOpt mustBe empty - } - } - - "produce narrower CIs with more SNPs" in { - // With 5 SNPs - val result5 = service.calculateFromSnpCount(5, 15_000_000L, 8.33e-10) - // With 50 SNPs - val result50 = service.calculateFromSnpCount(50, 15_000_000L, 8.33e-10) - - // Relative CI width should be narrower for 50 SNPs - val relWidth5 = (result5.estimate.ybpUpper.get - result5.estimate.ybpLower.get).toDouble / result5.estimate.ybp - val relWidth50 = (result50.estimate.ybpUpper.get - result50.estimate.ybpLower.get).toDouble / result50.estimate.ybp - relWidth50 must be < relWidth5 - } - - "scale linearly with callable loci" in { - val result15m = service.calculateFromSnpCount(10, 15_000_000L, 8.33e-10) - val result30m = service.calculateFromSnpCount(10, 30_000_000L, 8.33e-10) - - // Same SNP count, double the loci → half the age - result30m.estimate.ybp mustBe (result15m.estimate.ybp / 2) +- 1 - } - - "calculate TMRCA from two siblings" in { - val variants1 = (1 to 5).map(makeVariant) - val variants2 = (6 to 12).map(makeVariant) - - when(mockVariantRepo.getHaplogroupVariants(10)).thenReturn(Future.successful(variants1)) - when(mockVariantRepo.getHaplogroupVariants(11)).thenReturn(Future.successful(variants2)) - - whenReady(service.calculateTmrca(10, 11)) { resultOpt => - resultOpt mustBe defined - val result = resultOpt.get - result.snpCount mustBe 12 // 5 + 7 - result.method mustBe "SNP_POISSON_TMRCA" - result.estimate.ybp must be > 0 - } - } - - "return None for TMRCA when both children have zero variants" in { - when(mockVariantRepo.getHaplogroupVariants(10)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getHaplogroupVariants(11)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.calculateTmrca(10, 11)) { resultOpt => - resultOpt mustBe empty - } - } - - "recalculate subtree bottom-up" in { - val root = makeHaplogroup(1, "R") - val child1 = makeHaplogroup(2, "R-M269") - val child2 = makeHaplogroup(3, "R-M420") - - // Tree: R -> {R-M269, R-M420} - when(mockCoreRepo.getDirectChildren(1)).thenReturn(Future.successful(Seq(child1, child2))) - when(mockCoreRepo.getDirectChildren(2)).thenReturn(Future.successful(Seq.empty)) - when(mockCoreRepo.getDirectChildren(3)).thenReturn(Future.successful(Seq.empty)) - - when(mockCoreRepo.findById(1)).thenReturn(Future.successful(Some(root))) - when(mockCoreRepo.findById(2)).thenReturn(Future.successful(Some(child1))) - when(mockCoreRepo.findById(3)).thenReturn(Future.successful(Some(child2))) - - // Root has 5 SNPs, children have 3 and 4 - when(mockVariantRepo.getHaplogroupVariants(1)).thenReturn(Future.successful((1 to 5).map(makeVariant))) - when(mockVariantRepo.getHaplogroupVariants(2)).thenReturn(Future.successful((6 to 8).map(makeVariant))) - when(mockVariantRepo.getHaplogroupVariants(3)).thenReturn(Future.successful((9 to 12).map(makeVariant))) - - whenReady(service.recalculateSubtree(1)) { results => - results must have size 3 // root + 2 children - val rootResult = results.find(_.haplogroupId == 1) - val child1Result = results.find(_.haplogroupId == 2) - val child2Result = results.find(_.haplogroupId == 3) - - rootResult mustBe defined - child1Result mustBe defined - child2Result mustBe defined - - // Root must be at least as old as oldest child - rootResult.get.newEstimate.ybp must be >= child1Result.get.newEstimate.ybp - rootResult.get.newEstimate.ybp must be >= child2Result.get.newEstimate.ybp - } - } - } - - "poissonConfidenceInterval" should { - "return (0, >0) for zero mutations" in { - val (lower, upper) = service.poissonConfidenceInterval(0, 0.95) - lower mustBe 0.0 - upper must be > 0.0 - } - - "return symmetric-ish interval for large counts" in { - val (lower, upper) = service.poissonConfidenceInterval(100, 0.95) - lower must be > 0.0 - upper must be > 100.0 - lower must be < 100.0 - // Should be roughly symmetric for large N - val width = upper - lower - val midpoint = (upper + lower) / 2 - midpoint mustBe 100.0 +- 5.0 - } - - "widen with confidence level" in { - val (l90, u90) = service.poissonConfidenceInterval(50, 0.90) - val (l95, u95) = service.poissonConfidenceInterval(50, 0.95) - // 95% CI should be wider than 90% (currently both use z=1.96, but testing the shape) - (u95 - l95) must be >= (u90 - l90) - } - } - - "temporalResolution" should { - "return ~83 years/SNP for 15 Mbp coverage" in { - val res = service.temporalResolution(15_000_000L) - res mustBe 80.0 +- 5.0 // ~80 years per SNP - } - - "return ~40 years/SNP for 30 Mbp coverage" in { - val res = service.temporalResolution(30_000_000L) - res mustBe 40.0 +- 3.0 - } - } - - "getCallableLociForSample" should { - val sampleGuid = UUID.randomUUID() - - "return per-sample callable loci when available" in { - val loci = BiosampleCallableLoci( - id = Some(1), sampleType = "citizen", sampleId = 42, - sampleGuid = Some(sampleGuid), chromosome = "chrY", - totalCallableBp = 23_000_000L, regionCount = Some(150), - bedFileHash = Some("abc123"), computedAt = now, - sourceTestTypeId = Some(1), - yXdegenCallableBp = Some(10_000_000L), - yAmpliconicCallableBp = Some(8_000_000L), - yPalindromicCallableBp = Some(5_000_000L) - ) - when(mockCallableLociRepo.findBySampleGuid(sampleGuid, "chrY")) - .thenReturn(Future.successful(Some(loci))) - - whenReady(service.getCallableLociForSample(sampleGuid)) { result => - result mustBe 23_000_000L - } - } - - "fall back to default when no per-sample data exists" in { - when(mockCallableLociRepo.findBySampleGuid(sampleGuid, "chrY")) - .thenReturn(Future.successful(None)) - - whenReady(service.getCallableLociForSample(sampleGuid)) { result => - result mustBe service.DefaultCallableLoci - } - } - } - - "calculateAgeForSample" should { - val sampleGuid = UUID.randomUUID() - - "use per-sample callable loci for more accurate estimate" in { - val hg = makeHaplogroup(100, "R-M269") - val variants = (1 to 10).map(makeVariant) - - // Sample has 23 Mbp callable loci (Y Elite test) - val loci = BiosampleCallableLoci( - id = Some(1), sampleType = "citizen", sampleId = 42, - sampleGuid = Some(sampleGuid), chromosome = "chrY", - totalCallableBp = 23_000_000L, regionCount = None, - bedFileHash = None, computedAt = now, - sourceTestTypeId = None, - yXdegenCallableBp = None, yAmpliconicCallableBp = None, yPalindromicCallableBp = None - ) - when(mockCallableLociRepo.findBySampleGuid(sampleGuid, "chrY")) - .thenReturn(Future.successful(Some(loci))) - when(mockCoreRepo.findById(100)).thenReturn(Future.successful(Some(hg))) - when(mockVariantRepo.getHaplogroupVariants(100)).thenReturn(Future.successful(variants)) - - whenReady(service.calculateAgeForSample(100, sampleGuid)) { resultOpt => - resultOpt mustBe defined - val result = resultOpt.get - result.callableLoci mustBe 23_000_000L - result.snpCount mustBe 10 - result.estimate.ybp must be > 0 - } - } - - "produce different estimate than default when callable loci differs" in { - val hg = makeHaplogroup(100, "R-M269") - val variants = (1 to 10).map(makeVariant) - - // With 23 Mbp (more than default 15 Mbp) → younger estimate - val loci = BiosampleCallableLoci( - id = Some(1), sampleType = "citizen", sampleId = 42, - sampleGuid = Some(sampleGuid), chromosome = "chrY", - totalCallableBp = 23_000_000L, regionCount = None, - bedFileHash = None, computedAt = now, - sourceTestTypeId = None, - yXdegenCallableBp = None, yAmpliconicCallableBp = None, yPalindromicCallableBp = None - ) - when(mockCallableLociRepo.findBySampleGuid(sampleGuid, "chrY")) - .thenReturn(Future.successful(Some(loci))) - when(mockCoreRepo.findById(100)).thenReturn(Future.successful(Some(hg))) - when(mockVariantRepo.getHaplogroupVariants(100)).thenReturn(Future.successful(variants)) - - val sampleResult = service.calculateAgeForSample(100, sampleGuid).futureValue.get - val defaultResult = service.calculateFromSnpCount(10, service.DefaultCallableLoci, service.DefaultMutationRate) - - // More callable loci → younger age (same SNPs over larger search space) - sampleResult.estimate.ybp must be < defaultResult.estimate.ybp - } - } - - "AgeEstimate" should { - "convert YBP to calendar year correctly" in { - AgeEstimate(2000).toCalendarYear mustBe -50 // 50 BC - AgeEstimate(100).toCalendarYear mustBe 1850 // 1850 AD - AgeEstimate(1950).toCalendarYear mustBe 0 // Year 0 - } - - "format as human-readable string" in { - AgeEstimate(2000).formatted mustBe "50 BC" - AgeEstimate(100).formatted mustBe "1850 AD" - } - - "format with range" in { - val est = AgeEstimate(1000, Some(800), Some(1200)) - est.formattedWithRange mustBe "950 AD (750 AD – 1150 AD)" - } - } -} diff --git a/test/services/ChipDataRegistrationServiceSpec.scala b/test/services/ChipDataRegistrationServiceSpec.scala deleted file mode 100644 index 43f37863..00000000 --- a/test/services/ChipDataRegistrationServiceSpec.scala +++ /dev/null @@ -1,274 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.HaplogroupType -import models.atmosphere.{HaplogroupAssignments, PrivateVariantData, VariantCall, HaplogroupResult as AtmoHaplogroupResult} -import models.domain.discovery.BiosamplePrivateVariant -import models.domain.genomics.* -import org.mockito.ArgumentMatchers.{any, eq as meq} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.GenotypeDataRepository - -import java.util.UUID -import scala.concurrent.Future - -class ChipDataRegistrationServiceSpec extends ServiceSpec { - - val mockTestTypeService: TestTypeService = mock[TestTypeService] - val mockGenotypeRepo: GenotypeDataRepository = mock[GenotypeDataRepository] - val mockPvService: PrivateVariantExtractionService = mock[PrivateVariantExtractionService] - - val service = new ChipDataRegistrationService( - mockTestTypeService, mockGenotypeRepo, mockPvService - ) - - override def beforeEach(): Unit = { - reset(mockTestTypeService, mockGenotypeRepo, mockPvService) - } - - val chip23andMe: TestTypeRow = TestTypeRow( - id = Some(5), code = "SNP_ARRAY_23ANDME", displayName = "23andMe v5 Chip", - category = DataGenerationMethod.Genotyping, vendor = Some("23andMe"), - targetType = TargetType.Mixed, - expectedMarkerCount = Some(640000), - supportsHaplogroupY = true, supportsHaplogroupMt = true, - supportsAutosomalIbd = true, supportsAncestry = true, - typicalFileFormats = List("TXT", "CSV") - ) - - val goodMetrics: GenotypeMetrics = GenotypeMetrics( - totalMarkersCalled = Some(625000), - totalMarkersPossible = Some(640000), - callRate = Some(0.977), - noCallRate = Some(0.023), - yMarkersCalled = Some(2100), - yMarkersTotal = Some(2200), - mtMarkersCalled = Some(3100), - mtMarkersTotal = Some(3200) - ) - - val sampleGuid: UUID = UUID.randomUUID() - - "ChipDataRegistrationService.assessQuality" should { - - "rate HIGH quality for good metrics" in { - val result = service.assessQuality(goodMetrics, Some(chip23andMe)) - result.overallQuality mustBe "HIGH" - result.noCallRateAcceptable mustBe true - result.yDnaCoverage mustBe Some("SUFFICIENT") - result.mtDnaCoverage mustBe Some("SUFFICIENT") - result.warnings mustBe empty - } - - "rate LOW quality for high no-call rate" in { - val badMetrics = goodMetrics.copy(noCallRate = Some(0.10)) - val result = service.assessQuality(badMetrics, Some(chip23andMe)) - result.overallQuality mustBe "LOW" - result.noCallRateAcceptable mustBe false - result.warnings.exists(_.contains("No-call rate")) mustBe true - } - - "rate LOW quality for insufficient markers" in { - val fewMarkers = goodMetrics.copy(totalMarkersCalled = Some(50000)) - val result = service.assessQuality(fewMarkers, Some(chip23andMe)) - result.overallQuality mustBe "LOW" - result.warnings.exists(_.contains("below minimum")) mustBe true - } - - "warn when markers below expected for chip type" in { - val lowMarkers = goodMetrics.copy(totalMarkersCalled = Some(500000)) - val result = service.assessQuality(lowMarkers, Some(chip23andMe)) - result.overallQuality mustBe "MEDIUM" - result.warnings.exists(_.contains("% of expected")) mustBe true - } - - "report LIMITED Y-DNA coverage for few markers" in { - val fewY = goodMetrics.copy(yMarkersCalled = Some(20)) - val result = service.assessQuality(fewY, Some(chip23andMe)) - result.yDnaCoverage mustBe Some("LIMITED") - } - - "report NONE Y-DNA coverage for zero markers" in { - val noY = goodMetrics.copy(yMarkersCalled = Some(0)) - val result = service.assessQuality(noY, Some(chip23andMe)) - result.yDnaCoverage mustBe Some("NONE") - } - } - - "ChipDataRegistrationService.validateChipData" should { - - "accept valid chip data" in { - when(mockTestTypeService.getByCode("SNP_ARRAY_23ANDME")) - .thenReturn(Future.successful(Some(chip23andMe))) - - whenReady(service.validateChipData("SNP_ARRAY_23ANDME", "23andMe", goodMetrics)) { result => - result mustBe a[Right[?, ?]] - result.toOption.get.code mustBe "SNP_ARRAY_23ANDME" - } - } - - "reject unknown test type" in { - when(mockTestTypeService.getByCode("UNKNOWN")) - .thenReturn(Future.successful(None)) - - whenReady(service.validateChipData("UNKNOWN", "Foo", goodMetrics)) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get.head must include("Unknown test type") - } - } - - "reject non-genotyping test type" in { - val seqType = chip23andMe.copy(category = DataGenerationMethod.Sequencing) - when(mockTestTypeService.getByCode("WGS")) - .thenReturn(Future.successful(Some(seqType))) - - whenReady(service.validateChipData("WGS", "Illumina", goodMetrics)) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get.exists(_.contains("not a genotyping test")) mustBe true - } - } - - "reject blank provider" in { - when(mockTestTypeService.getByCode("SNP_ARRAY_23ANDME")) - .thenReturn(Future.successful(Some(chip23andMe))) - - whenReady(service.validateChipData("SNP_ARRAY_23ANDME", "", goodMetrics)) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get.exists(_.contains("Provider")) mustBe true - } - } - - "reject missing marker count" in { - when(mockTestTypeService.getByCode("SNP_ARRAY_23ANDME")) - .thenReturn(Future.successful(Some(chip23andMe))) - val noMarkers = goodMetrics.copy(totalMarkersCalled = None) - - whenReady(service.validateChipData("SNP_ARRAY_23ANDME", "23andMe", noMarkers)) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get.exists(_.contains("markers called is required")) mustBe true - } - } - } - - "ChipDataRegistrationService.extractPrivateVariantsFromChip" should { - - "extract Y and mtDNA private variants" in { - val yVariants = Seq( - VariantCall("Y", 12345, "A", "G", Some("rs123"), Some("M269"), None, None, None), - VariantCall("Y", 23456, "C", "T", None, None, None, None, None) - ) - val mtVariants = Seq( - VariantCall("MT", 1234, "A", "G", None, None, None, None, None) - ) - - val assignments = HaplogroupAssignments( - yDna = Some(AtmoHaplogroupResult( - haplogroupName = "R-M269", - score = 0.95, - matchingSnps = Some(50), - mismatchingSnps = Some(2), - ancestralMatches = None, - treeDepth = Some(10), - lineagePath = Some(Seq("R", "R1", "R1b", "R-M269")), - privateVariants = Some(PrivateVariantData(Some(yVariants), None, None)) - )), - mtDna = Some(AtmoHaplogroupResult( - haplogroupName = "H2a", - score = 0.99, - matchingSnps = Some(30), - mismatchingSnps = Some(0), - ancestralMatches = None, - treeDepth = Some(8), - lineagePath = Some(Seq("H", "H2", "H2a")), - privateVariants = Some(PrivateVariantData(Some(mtVariants), None, None)) - )) - ) - - when(mockPvService.extractFromCitizenBiosample( - meq(1), meq(sampleGuid), meq("R-M269"), meq(HaplogroupType.Y), any[Seq[VariantCall]] - )).thenReturn(Future.successful(Seq.fill(2)(mock[BiosamplePrivateVariant]))) - - when(mockPvService.extractFromCitizenBiosample( - meq(1), meq(sampleGuid), meq("H2a"), meq(HaplogroupType.MT), any[Seq[VariantCall]] - )).thenReturn(Future.successful(Seq.fill(1)(mock[BiosamplePrivateVariant]))) - - whenReady(service.extractPrivateVariantsFromChip(1, sampleGuid, Some(assignments))) { result => - result.yPrivateVariants mustBe 2 - result.mtPrivateVariants mustBe 1 - result.total mustBe 3 - } - } - - "return zero when no assignments" in { - whenReady(service.extractPrivateVariantsFromChip(1, sampleGuid, None)) { result => - result.yPrivateVariants mustBe 0 - result.mtPrivateVariants mustBe 0 - verify(mockPvService, never()).extractFromCitizenBiosample( - any[Int], any[UUID], any[String], any[HaplogroupType], any[Seq[VariantCall]] - ) - } - } - - "return zero when no private variants in assignments" in { - val assignments = HaplogroupAssignments( - yDna = Some(AtmoHaplogroupResult( - "R-M269", 0.95, Some(50), Some(2), None, Some(10), - Some(Seq("R", "R-M269")), privateVariants = None - )), - mtDna = None - ) - - whenReady(service.extractPrivateVariantsFromChip(1, sampleGuid, Some(assignments))) { result => - result.yPrivateVariants mustBe 0 - result.mtPrivateVariants mustBe 0 - } - } - - "handle extraction failure gracefully" in { - val variants = Seq(VariantCall("Y", 12345, "A", "G", None, None, None, None, None)) - val assignments = HaplogroupAssignments( - yDna = Some(AtmoHaplogroupResult( - "R-M269", 0.95, Some(50), Some(2), None, Some(10), - Some(Seq("R", "R-M269")), - privateVariants = Some(PrivateVariantData(Some(variants), None, None)) - )), - mtDna = None - ) - - when(mockPvService.extractFromCitizenBiosample( - any[Int], any[UUID], any[String], any[HaplogroupType], any[Seq[VariantCall]] - )).thenReturn(Future.failed(new RuntimeException("Tree lookup failed"))) - - whenReady(service.extractPrivateVariantsFromChip(1, sampleGuid, Some(assignments))) { result => - result.yPrivateVariants mustBe 0 - result.mtPrivateVariants mustBe 0 - } - } - } - - "ChipDataRegistrationService.findExistingByHash" should { - - "detect duplicate by source file hash" in { - val existing = GenotypeData( - id = Some(1), sampleGuid = sampleGuid, - sourceFileHash = Some("abc123hash") - ) - when(mockGenotypeRepo.findBySourceFileHash("abc123hash")) - .thenReturn(Future.successful(Some(existing))) - - whenReady(service.findExistingByHash("abc123hash")) { result => - result mustBe defined - result.get.id mustBe Some(1) - } - } - - "return None when no duplicate" in { - when(mockGenotypeRepo.findBySourceFileHash("newhash")) - .thenReturn(Future.successful(None)) - - whenReady(service.findExistingByHash("newhash")) { result => - result mustBe None - } - } - } -} diff --git a/test/services/CoverageExpectationServiceSpec.scala b/test/services/CoverageExpectationServiceSpec.scala deleted file mode 100644 index 60a45d79..00000000 --- a/test/services/CoverageExpectationServiceSpec.scala +++ /dev/null @@ -1,378 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.genomics.* -import org.mockito.ArgumentMatchers.{any, eq as meq} -import org.mockito.Mockito.{reset, when} -import repositories.{CoverageExpectationProfileRepository, TestTypeRepository} - -import scala.concurrent.Future - -class CoverageExpectationServiceSpec extends ServiceSpec { - - val mockProfileRepo: CoverageExpectationProfileRepository = mock[CoverageExpectationProfileRepository] - val mockTestTypeRepo: TestTypeRepository = mock[TestTypeRepository] - - val service = new CoverageExpectationService(mockProfileRepo, mockTestTypeRepo) - - override def beforeEach(): Unit = { - reset(mockProfileRepo, mockTestTypeRepo) - } - - val wgsTestType: TestTypeRow = TestTypeRow( - id = Some(1), code = "WGS", displayName = "Whole Genome Sequencing", - category = DataGenerationMethod.Sequencing, vendor = Some("Illumina"), - targetType = TargetType.WholeGenome, - supportsHaplogroupY = true, supportsHaplogroupMt = true, - supportsAutosomalIbd = true, supportsAncestry = true, - typicalFileFormats = List("BAM", "CRAM") - ) - - val chipTestType: TestTypeRow = TestTypeRow( - id = Some(5), code = "SNP_ARRAY_23ANDME", displayName = "23andMe v5 Chip", - category = DataGenerationMethod.Genotyping, vendor = Some("23andMe"), - targetType = TargetType.Mixed, - expectedMarkerCount = Some(640000), - supportsHaplogroupY = true, supportsHaplogroupMt = true, - supportsAutosomalIbd = true, supportsAncestry = true, - typicalFileFormats = List("TXT", "CSV") - ) - - val wgsYSnpProfile: CoverageExpectationProfile = CoverageExpectationProfile( - id = Some(1), testTypeId = 1, contigName = "Y", variantClass = "SNP", - minDepthHigh = 20.0, minDepthMedium = 10.0, minDepthLow = 5.0, - minCoveragePct = Some(0.95), minMappingQuality = Some(30.0), minCallablePct = Some(0.90) - ) - - val wgsMtSnpProfile: CoverageExpectationProfile = CoverageExpectationProfile( - id = Some(2), testTypeId = 1, contigName = "MT", variantClass = "SNP", - minDepthHigh = 100.0, minDepthMedium = 50.0, minDepthLow = 20.0, - minCoveragePct = Some(0.99), minMappingQuality = Some(30.0), minCallablePct = Some(0.95) - ) - - val wgsYStrProfile: CoverageExpectationProfile = CoverageExpectationProfile( - id = Some(3), testTypeId = 1, contigName = "Y", variantClass = "STR", - minDepthHigh = 30.0, minDepthMedium = 15.0, minDepthLow = 8.0, - minCoveragePct = Some(0.90), minMappingQuality = Some(20.0) - ) - - val chipYSnpProfile: CoverageExpectationProfile = CoverageExpectationProfile( - id = Some(10), testTypeId = 5, contigName = "Y", variantClass = "SNP", - minDepthHigh = 0.0, minDepthMedium = 0.0, minDepthLow = 0.0, - minCoveragePct = Some(0.0) - ) - - "CoverageExpectationService.assessVariantCallingConfidence" should { - - "return HIGH confidence for WGS with good depth" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeId(1)).thenReturn(Future.successful(Seq(wgsYSnpProfile))) - - val metrics = CoverageMetricsInput( - meanDepth = Some(25.0), coveragePctAt1x = Some(0.98), - meanMappingQuality = Some(40.0), callablePct = Some(0.95) - ) - - whenReady(service.assessVariantCallingConfidence("WGS", metrics)) { result => - result mustBe defined - val assessment = result.get - assessment.testTypeCode mustBe "WGS" - assessment.isChipBased mustBe false - assessment.overallConfidence mustBe "high" - assessment.confidences.head.depthConfidence mustBe "high" - assessment.confidences.head.coverageAdequate mustBe true - assessment.confidences.head.mappingQualityAdequate mustBe true - assessment.confidences.head.callableBasesAdequate mustBe true - } - } - - "return MEDIUM confidence for WGS with moderate depth" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeId(1)).thenReturn(Future.successful(Seq(wgsYSnpProfile))) - - val metrics = CoverageMetricsInput( - meanDepth = Some(12.0), coveragePctAt1x = Some(0.96), - meanMappingQuality = Some(35.0), callablePct = Some(0.92) - ) - - whenReady(service.assessVariantCallingConfidence("WGS", metrics)) { result => - result.get.overallConfidence mustBe "medium" - result.get.confidences.head.depthConfidence mustBe "medium" - } - } - - "return LOW confidence for WGS with low depth" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeId(1)).thenReturn(Future.successful(Seq(wgsYSnpProfile))) - - val metrics = CoverageMetricsInput( - meanDepth = Some(6.0), coveragePctAt1x = Some(0.96), - meanMappingQuality = Some(35.0), callablePct = Some(0.92) - ) - - whenReady(service.assessVariantCallingConfidence("WGS", metrics)) { result => - result.get.overallConfidence mustBe "low" - } - } - - "return INSUFFICIENT for WGS with very low depth" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeId(1)).thenReturn(Future.successful(Seq(wgsYSnpProfile))) - - val metrics = CoverageMetricsInput( - meanDepth = Some(2.0), coveragePctAt1x = Some(0.96), - meanMappingQuality = Some(35.0), callablePct = Some(0.92) - ) - - whenReady(service.assessVariantCallingConfidence("WGS", metrics)) { result => - result.get.overallConfidence mustBe "insufficient" - } - } - - "downgrade confidence when coverage is below threshold" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeId(1)).thenReturn(Future.successful(Seq(wgsYSnpProfile))) - - val metrics = CoverageMetricsInput( - meanDepth = Some(25.0), coveragePctAt1x = Some(0.80), - meanMappingQuality = Some(40.0), callablePct = Some(0.95) - ) - - whenReady(service.assessVariantCallingConfidence("WGS", metrics)) { result => - result.get.confidences.head.depthConfidence mustBe "high" - result.get.confidences.head.coverageAdequate mustBe false - result.get.overallConfidence mustBe "medium" - } - } - - "downgrade confidence when mapping quality is low" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeId(1)).thenReturn(Future.successful(Seq(wgsYSnpProfile))) - - val metrics = CoverageMetricsInput( - meanDepth = Some(25.0), coveragePctAt1x = Some(0.98), - meanMappingQuality = Some(15.0), callablePct = Some(0.95) - ) - - whenReady(service.assessVariantCallingConfidence("WGS", metrics)) { result => - result.get.confidences.head.mappingQualityAdequate mustBe false - result.get.overallConfidence mustBe "medium" - } - } - - "downgrade twice when multiple thresholds fail" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeId(1)).thenReturn(Future.successful(Seq(wgsYSnpProfile))) - - val metrics = CoverageMetricsInput( - meanDepth = Some(25.0), coveragePctAt1x = Some(0.80), - meanMappingQuality = Some(15.0), callablePct = Some(0.95) - ) - - whenReady(service.assessVariantCallingConfidence("WGS", metrics)) { result => - result.get.overallConfidence mustBe "low" - } - } - - "use lowest confidence across multiple profiles" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeId(1)).thenReturn( - Future.successful(Seq(wgsYSnpProfile, wgsMtSnpProfile)) - ) - - val metrics = CoverageMetricsInput( - meanDepth = Some(25.0), coveragePctAt1x = Some(0.995), - meanMappingQuality = Some(40.0), callablePct = Some(0.96) - ) - - whenReady(service.assessVariantCallingConfidence("WGS", metrics)) { result => - val assessment = result.get - assessment.confidences.size mustBe 2 - val yConf = assessment.confidences.find(_.contigName == "Y").get - val mtConf = assessment.confidences.find(_.contigName == "MT").get - yConf.depthConfidence mustBe "high" - mtConf.depthConfidence mustBe "low" - assessment.overallConfidence mustBe "low" - } - } - - "return None for unknown test type" in { - when(mockTestTypeRepo.findByCode("UNKNOWN")).thenReturn(Future.successful(None)) - - val metrics = CoverageMetricsInput(meanDepth = Some(25.0)) - - whenReady(service.assessVariantCallingConfidence("UNKNOWN", metrics)) { result => - result mustBe None - } - } - - "return None when no profiles exist" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeId(1)).thenReturn(Future.successful(Seq.empty)) - - val metrics = CoverageMetricsInput(meanDepth = Some(25.0)) - - whenReady(service.assessVariantCallingConfidence("WGS", metrics)) { result => - result mustBe None - } - } - - "handle chip-based test type with marker count" in { - when(mockTestTypeRepo.findByCode("SNP_ARRAY_23ANDME")).thenReturn(Future.successful(Some(chipTestType))) - when(mockProfileRepo.findByTestTypeId(5)).thenReturn(Future.successful(Seq(chipYSnpProfile))) - - val metrics = CoverageMetricsInput(markerCount = Some(2100), noCallRate = Some(0.02)) - - whenReady(service.assessVariantCallingConfidence("SNP_ARRAY_23ANDME", metrics)) { result => - val assessment = result.get - assessment.isChipBased mustBe true - assessment.overallConfidence mustBe "high" - } - } - - "handle chip with low marker count" in { - when(mockTestTypeRepo.findByCode("SNP_ARRAY_23ANDME")).thenReturn(Future.successful(Some(chipTestType))) - when(mockProfileRepo.findByTestTypeId(5)).thenReturn(Future.successful(Seq(chipYSnpProfile))) - - val metrics = CoverageMetricsInput(markerCount = Some(50), noCallRate = Some(0.02)) - - whenReady(service.assessVariantCallingConfidence("SNP_ARRAY_23ANDME", metrics)) { result => - result.get.overallConfidence mustBe "low" - } - } - - "downgrade chip confidence for high no-call rate" in { - when(mockTestTypeRepo.findByCode("SNP_ARRAY_23ANDME")).thenReturn(Future.successful(Some(chipTestType))) - when(mockProfileRepo.findByTestTypeId(5)).thenReturn(Future.successful(Seq(chipYSnpProfile))) - - val metrics = CoverageMetricsInput(markerCount = Some(2100), noCallRate = Some(0.10)) - - whenReady(service.assessVariantCallingConfidence("SNP_ARRAY_23ANDME", metrics)) { result => - result.get.overallConfidence mustBe "medium" - } - } - } - - "CoverageExpectationService.getConfidenceForVariant" should { - - "return confidence level for specific variant type" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeContigAndClass(1, "Y", "SNP")) - .thenReturn(Future.successful(Some(wgsYSnpProfile))) - - whenReady(service.getConfidenceForVariant("WGS", "Y", "SNP", 25.0)) { result => - result mustBe Some("high") - } - } - - "return None when no profile matches" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeContigAndClass(1, "X", "SNP")) - .thenReturn(Future.successful(None)) - - whenReady(service.getConfidenceForVariant("WGS", "X", "SNP", 25.0)) { result => - result mustBe None - } - } - } - - "CoverageExpectationService.getProfilesForTestType" should { - - "return profiles for known test type" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgsTestType))) - when(mockProfileRepo.findByTestTypeId(1)).thenReturn( - Future.successful(Seq(wgsYSnpProfile, wgsMtSnpProfile, wgsYStrProfile)) - ) - - whenReady(service.getProfilesForTestType("WGS")) { result => - result.size mustBe 3 - } - } - - "return empty for unknown test type" in { - when(mockTestTypeRepo.findByCode("UNKNOWN")).thenReturn(Future.successful(None)) - - whenReady(service.getProfilesForTestType("UNKNOWN")) { result => - result mustBe empty - } - } - } - - "CoverageExpectationProfile.confidenceForDepth" should { - - "return high for depth above high threshold" in { - wgsYSnpProfile.confidenceForDepth(25.0) mustBe "high" - } - - "return high at exact high threshold" in { - wgsYSnpProfile.confidenceForDepth(20.0) mustBe "high" - } - - "return medium for depth between medium and high" in { - wgsYSnpProfile.confidenceForDepth(15.0) mustBe "medium" - } - - "return low for depth between low and medium" in { - wgsYSnpProfile.confidenceForDepth(7.0) mustBe "low" - } - - "return insufficient for depth below low threshold" in { - wgsYSnpProfile.confidenceForDepth(3.0) mustBe "insufficient" - } - } - - "CoverageMetricsInput.fromEmbeddedCoverage" should { - - "create metrics from embedded coverage" in { - val ec = EmbeddedCoverage( - meanDepth = Some(25.0), - percentCoverageAt1x = Some(0.98), - meanMappingQuality = Some(40.0), - basesCallable = Some(900000L), - basesNoCoverage = Some(50000L), - basesLowQualityMapping = Some(50000L) - ) - - val input = CoverageMetricsInput.fromEmbeddedCoverage(ec) - input.meanDepth mustBe Some(25.0) - input.coveragePctAt1x mustBe Some(0.98) - input.meanMappingQuality mustBe Some(40.0) - input.callablePct mustBe defined - input.callablePct.get mustBe 0.9 +- 0.01 - } - } - - "CoverageMetricsInput.fromGenotypeMetrics" should { - - "create metrics from genotype metrics" in { - val gm = GenotypeMetrics( - totalMarkersCalled = Some(625000), - noCallRate = Some(0.023) - ) - - val input = CoverageMetricsInput.fromGenotypeMetrics(gm) - input.markerCount mustBe Some(625000) - input.noCallRate mustBe Some(0.023) - input.meanDepth mustBe None - } - } - - "CoverageExpectationService.downgrade" should { - - "downgrade high to medium" in { - service.downgrade("high") mustBe "medium" - } - - "downgrade medium to low" in { - service.downgrade("medium") mustBe "low" - } - - "downgrade low to insufficient" in { - service.downgrade("low") mustBe "insufficient" - } - - "keep insufficient as insufficient" in { - service.downgrade("insufficient") mustBe "insufficient" - } - } -} diff --git a/test/services/DiscoveryProposalServiceSpec.scala b/test/services/DiscoveryProposalServiceSpec.scala deleted file mode 100644 index c0c07341..00000000 --- a/test/services/DiscoveryProposalServiceSpec.scala +++ /dev/null @@ -1,267 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.HaplogroupType -import models.domain.discovery.* -import org.mockito.ArgumentMatchers.{any, anyInt} -import org.mockito.Mockito.{never, reset, verify, when} -import play.api.libs.json.Json -import repositories.{CuratorActionRepository, ProposedBranchRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class DiscoveryProposalServiceSpec extends ServiceSpec { - - val mockProposedBranchRepo: ProposedBranchRepository = mock[ProposedBranchRepository] - val mockCuratorActionRepo: CuratorActionRepository = mock[CuratorActionRepository] - - val service = new DiscoveryProposalService(mockProposedBranchRepo, mockCuratorActionRepo) - - override def beforeEach(): Unit = { - reset(mockProposedBranchRepo, mockCuratorActionRepo) - } - - val now: LocalDateTime = LocalDateTime.of(2025, 6, 1, 12, 0) - val curatorId = "curator@decodingus.org" - - def makeProposal( - id: Int, - status: ProposedBranchStatus = ProposedBranchStatus.Pending, - consensusCount: Int = 1 - ): ProposedBranch = - ProposedBranch( - id = Some(id), - parentHaplogroupId = 100, - haplogroupType = HaplogroupType.Y, - status = status, - consensusCount = consensusCount, - createdAt = now, - updatedAt = now - ) - - "DiscoveryProposalService" should { - - "list proposals by status" in { - val proposals = Seq(makeProposal(1, ProposedBranchStatus.ReadyForReview)) - when(mockProposedBranchRepo.findByStatus(ProposedBranchStatus.ReadyForReview, Some(HaplogroupType.Y))) - .thenReturn(Future.successful(proposals)) - - whenReady(service.listProposals(Some(HaplogroupType.Y), Some(ProposedBranchStatus.ReadyForReview))) { result => - result must have size 1 - result.head.id mustBe Some(1) - } - } - - "list all active proposals when no status filter" in { - when(mockProposedBranchRepo.findByStatus(ProposedBranchStatus.Pending, None)) - .thenReturn(Future.successful(Seq(makeProposal(1)))) - when(mockProposedBranchRepo.findByStatus(ProposedBranchStatus.ReadyForReview, None)) - .thenReturn(Future.successful(Seq(makeProposal(2, ProposedBranchStatus.ReadyForReview)))) - when(mockProposedBranchRepo.findByStatus(ProposedBranchStatus.UnderReview, None)) - .thenReturn(Future.successful(Seq.empty)) - when(mockProposedBranchRepo.findByStatus(ProposedBranchStatus.Accepted, None)) - .thenReturn(Future.successful(Seq.empty)) - - whenReady(service.listProposals(None, None)) { result => - result must have size 2 - } - } - - "get proposal details with variants, evidence, and audit trail" in { - val proposal = makeProposal(10, ProposedBranchStatus.ReadyForReview, 3) - val variants = Seq(ProposedBranchVariant(Some(1), 10, 42, true, 3, now, now)) - val evidence = Seq(ProposedBranchEvidence(Some(1), 10, BiosampleSourceType.External, 1, UUID.randomUUID())) - val actions = Seq(CuratorAction(Some(1), curatorId, CuratorActionType.Review, CuratorTargetType.ProposedBranch, 10)) - - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - when(mockProposedBranchRepo.getVariants(10)).thenReturn(Future.successful(variants)) - when(mockProposedBranchRepo.getEvidence(10)).thenReturn(Future.successful(evidence)) - when(mockCuratorActionRepo.findByTarget(CuratorTargetType.ProposedBranch, 10)) - .thenReturn(Future.successful(actions)) - - whenReady(service.getProposalDetails(10)) { result => - result mustBe defined - val details = result.get - details.proposal.id mustBe Some(10) - details.variants must have size 1 - details.evidence must have size 1 - details.auditTrail must have size 1 - } - } - - "return None for nonexistent proposal details" in { - when(mockProposedBranchRepo.findById(999)).thenReturn(Future.successful(None)) - - whenReady(service.getProposalDetails(999)) { result => - result mustBe empty - } - } - - "accept a proposal under review" in { - val proposal = makeProposal(10, ProposedBranchStatus.UnderReview, 5) - - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - when(mockProposedBranchRepo.update(any[ProposedBranch])).thenReturn(Future.successful(true)) - when(mockCuratorActionRepo.create(any[CuratorAction])).thenAnswer { invocation => - val a = invocation.getArgument[CuratorAction](0) - Future.successful(a.copy(id = Some(1))) - } - - whenReady(service.acceptProposal(10, curatorId, "R-Z1234", Some("Strong evidence"))) { result => - result.status mustBe ProposedBranchStatus.Accepted - result.proposedName mustBe Some("R-Z1234") - result.reviewedBy mustBe Some(curatorId) - verify(mockCuratorActionRepo).create(any[CuratorAction]) - } - } - - "reject accepting a proposal in Pending status" in { - val proposal = makeProposal(10, ProposedBranchStatus.Pending) - - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - - whenReady(service.acceptProposal(10, curatorId, "R-Z1234", None).failed) { ex => - ex mustBe a[IllegalStateException] - ex.getMessage must include("Cannot transition") - verify(mockProposedBranchRepo, never()).update(any[ProposedBranch]) - } - } - - "reject a proposal" in { - val proposal = makeProposal(10, ProposedBranchStatus.UnderReview, 3) - - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - when(mockProposedBranchRepo.update(any[ProposedBranch])).thenReturn(Future.successful(true)) - when(mockCuratorActionRepo.create(any[CuratorAction])).thenAnswer { invocation => - val a = invocation.getArgument[CuratorAction](0) - Future.successful(a.copy(id = Some(1))) - } - - whenReady(service.rejectProposal(10, curatorId, "Insufficient evidence")) { result => - result.status mustBe ProposedBranchStatus.Rejected - result.notes mustBe Some("Insufficient evidence") - result.reviewedBy mustBe Some(curatorId) - verify(mockCuratorActionRepo).create(any[CuratorAction]) - } - } - - "reject a proposal from any reviewable status" in { - // Can reject from Pending, ReadyForReview, UnderReview, or Accepted - for (status <- Seq(ProposedBranchStatus.Pending, ProposedBranchStatus.ReadyForReview, - ProposedBranchStatus.UnderReview, ProposedBranchStatus.Accepted)) { - reset(mockProposedBranchRepo, mockCuratorActionRepo) - val proposal = makeProposal(10, status) - - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - when(mockProposedBranchRepo.update(any[ProposedBranch])).thenReturn(Future.successful(true)) - when(mockCuratorActionRepo.create(any[CuratorAction])).thenAnswer { invocation => - val a = invocation.getArgument[CuratorAction](0) - Future.successful(a.copy(id = Some(1))) - } - - whenReady(service.rejectProposal(10, curatorId, "Rejected")) { result => - result.status mustBe ProposedBranchStatus.Rejected - } - } - } - - "fail to reject an already promoted proposal" in { - val proposal = makeProposal(10, ProposedBranchStatus.Promoted) - - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - - whenReady(service.rejectProposal(10, curatorId, "Too late").failed) { ex => - ex mustBe a[IllegalStateException] - ex.getMessage must include("Cannot transition") - } - } - - "start review of a ReadyForReview proposal" in { - val proposal = makeProposal(10, ProposedBranchStatus.ReadyForReview, 3) - - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - when(mockProposedBranchRepo.update(any[ProposedBranch])).thenReturn(Future.successful(true)) - when(mockCuratorActionRepo.create(any[CuratorAction])).thenAnswer { invocation => - val a = invocation.getArgument[CuratorAction](0) - Future.successful(a.copy(id = Some(1))) - } - - whenReady(service.startReview(10, curatorId)) { result => - result.status mustBe ProposedBranchStatus.UnderReview - result.reviewedBy mustBe Some(curatorId) - } - } - - "fail to start review of a Pending proposal" in { - val proposal = makeProposal(10, ProposedBranchStatus.Pending) - - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - - whenReady(service.startReview(10, curatorId).failed) { ex => - ex mustBe a[IllegalStateException] - } - } - - "fail when proposal not found" in { - when(mockProposedBranchRepo.findById(999)).thenReturn(Future.successful(None)) - - whenReady(service.acceptProposal(999, curatorId, "R-X1", None).failed) { ex => - ex mustBe a[NoSuchElementException] - ex.getMessage must include("not found") - } - } - - "get audit trail" in { - val actions = Seq( - CuratorAction(Some(1), curatorId, CuratorActionType.Review, CuratorTargetType.ProposedBranch, 10), - CuratorAction(Some(2), curatorId, CuratorActionType.Accept, CuratorTargetType.ProposedBranch, 10) - ) - when(mockCuratorActionRepo.findByTarget(CuratorTargetType.ProposedBranch, 10)) - .thenReturn(Future.successful(actions)) - - whenReady(service.getAuditTrail(10)) { result => - result must have size 2 - } - } - } - - "validateStatusTransition" should { - "allow Pending -> ReadyForReview" in { - noException should be thrownBy { - service.validateStatusTransition(ProposedBranchStatus.Pending, ProposedBranchStatus.ReadyForReview) - } - } - - "allow ReadyForReview -> UnderReview" in { - noException should be thrownBy { - service.validateStatusTransition(ProposedBranchStatus.ReadyForReview, ProposedBranchStatus.UnderReview) - } - } - - "allow UnderReview -> Accepted" in { - noException should be thrownBy { - service.validateStatusTransition(ProposedBranchStatus.UnderReview, ProposedBranchStatus.Accepted) - } - } - - "allow Accepted -> Promoted" in { - noException should be thrownBy { - service.validateStatusTransition(ProposedBranchStatus.Accepted, ProposedBranchStatus.Promoted) - } - } - - "reject Pending -> Accepted" in { - an[IllegalStateException] should be thrownBy { - service.validateStatusTransition(ProposedBranchStatus.Pending, ProposedBranchStatus.Accepted) - } - } - - "reject Promoted -> anything" in { - an[IllegalStateException] should be thrownBy { - service.validateStatusTransition(ProposedBranchStatus.Promoted, ProposedBranchStatus.Rejected) - } - } - } -} diff --git a/test/services/ExternalBiosampleServiceSpec.scala b/test/services/ExternalBiosampleServiceSpec.scala deleted file mode 100644 index be4983a9..00000000 --- a/test/services/ExternalBiosampleServiceSpec.scala +++ /dev/null @@ -1,210 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.api.* -import models.domain.genomics.{Biosample, BiosampleType, SpecimenDonor} -import org.mockito.ArgumentMatchers.{any, anyString} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.BiosampleRepository - -import java.util.UUID -import scala.concurrent.Future - -class ExternalBiosampleServiceSpec extends ServiceSpec { - - val mockBiosampleRepo: BiosampleRepository = mock[BiosampleRepository] - val mockDataService: BiosampleDataService = mock[BiosampleDataService] - val mockBiosampleService: BiosampleService = mock[BiosampleService] - - val service = new ExternalBiosampleService(mockBiosampleRepo, mockDataService, mockBiosampleService) - - override def beforeEach(): Unit = { - reset(mockBiosampleRepo, mockDataService, mockBiosampleService) - } - - val testSequenceData: SequenceDataInfo = SequenceDataInfo( - reads = Some(1000), - readLength = Some(150), - coverage = Some(30.0), - platformName = "Illumina", - testType = "WGS", - files = Seq.empty - ) - - def makeRequest( - accession: String = "SAMEA001", - publication: Option[PublicationInfo] = None, - donorIdentifier: Option[String] = Some("DONOR_1"), - citizenDid: Option[String] = None - ): ExternalBiosampleRequest = ExternalBiosampleRequest( - sampleAccession = accession, - sourceSystem = "test", - description = "Test sample", - alias = Some("alias"), - centerName = "TestCenter", - sex = None, - latitude = None, - longitude = None, - citizenDid = citizenDid, - atUri = None, - donorIdentifier = donorIdentifier, - donorType = None, - publication = publication, - haplogroups = None, - sequenceData = testSequenceData - ) - - val testGuid: UUID = UUID.randomUUID() - - val testBiosample: Biosample = Biosample( - id = Some(1), - sampleGuid = testGuid, - sampleAccession = "SAMEA001", - description = "Test", - alias = Some("alias"), - centerName = "TestCenter", - specimenDonorId = None, - locked = false, - sourcePlatform = Some("test") - ) - - "ExternalBiosampleService" should { - - "create new biosample with data" in { - val request = makeRequest() - - when(mockBiosampleService.createOrUpdateSpecimenDonor(anyString, anyString, any[BiosampleType], any, any, any, any, any)) - .thenReturn(Future.successful(Some(1))) - when(mockBiosampleRepo.findByAccession("SAMEA001")).thenReturn(Future.successful(None)) - when(mockBiosampleService.createBiosample(any[UUID], anyString, anyString, any, anyString, any, any)) - .thenReturn(Future.successful(testBiosample)) - when(mockDataService.addSequenceData(any[UUID], any[SequenceDataInfo])) - .thenReturn(Future.successful(())) - - whenReady(service.createBiosampleWithData(request)) { guid => - guid mustBe a[UUID] - verify(mockBiosampleService).createBiosample(any[UUID], anyString, anyString, any, anyString, any, any) - verify(mockDataService).addSequenceData(any[UUID], any[SequenceDataInfo]) - } - } - - "update existing biosample when accession found" in { - val request = makeRequest() - - when(mockBiosampleService.createOrUpdateSpecimenDonor(anyString, anyString, any[BiosampleType], any, any, any, any, any)) - .thenReturn(Future.successful(Some(1))) - when(mockBiosampleRepo.findByAccession("SAMEA001")).thenReturn(Future.successful(Some((testBiosample, None)))) - when(mockBiosampleRepo.update(any[Biosample])).thenReturn(Future.successful(true)) - when(mockDataService.replaceSequenceData(any[UUID], any[SequenceDataInfo])) - .thenReturn(Future.successful(())) - - whenReady(service.createBiosampleWithData(request)) { guid => - guid mustBe testGuid - verify(mockBiosampleRepo).update(any[Biosample]) - verify(mockDataService).replaceSequenceData(any[UUID], any[SequenceDataInfo]) - } - } - - "skip publication linkage when none provided" in { - val request = makeRequest(publication = None) - - when(mockBiosampleService.createOrUpdateSpecimenDonor(anyString, anyString, any[BiosampleType], any, any, any, any, any)) - .thenReturn(Future.successful(None)) - when(mockBiosampleRepo.findByAccession("SAMEA001")).thenReturn(Future.successful(None)) - when(mockBiosampleService.createBiosample(any[UUID], anyString, anyString, any, anyString, any, any)) - .thenReturn(Future.successful(testBiosample)) - when(mockDataService.addSequenceData(any[UUID], any[SequenceDataInfo])) - .thenReturn(Future.successful(())) - - whenReady(service.createBiosampleWithData(request)) { guid => - guid mustBe a[UUID] - verify(mockDataService, never()).linkPublication(any[UUID], any[PublicationInfo]) - } - } - - "wrap publication linkage failure in PublicationLinkageException" in { - val pubInfo = PublicationInfo(doi = Some("10.1234/test"), pubmedId = None, originalHaplogroups = None) - val request = makeRequest(publication = Some(pubInfo)) - - when(mockBiosampleService.createOrUpdateSpecimenDonor(anyString, anyString, any[BiosampleType], any, any, any, any, any)) - .thenReturn(Future.successful(None)) - when(mockBiosampleRepo.findByAccession("SAMEA001")).thenReturn(Future.successful(None)) - when(mockBiosampleService.createBiosample(any[UUID], anyString, anyString, any, anyString, any, any)) - .thenReturn(Future.successful(testBiosample)) - when(mockDataService.linkPublication(any[UUID], any[PublicationInfo])) - .thenReturn(Future.failed(new RuntimeException("DOI not found"))) - - whenReady(service.createBiosampleWithData(request).failed) { ex => - ex mustBe a[PublicationLinkageException] - } - } - - "wrap sequence data failure in SequenceDataValidationException" in { - val request = makeRequest() - - when(mockBiosampleService.createOrUpdateSpecimenDonor(anyString, anyString, any[BiosampleType], any, any, any, any, any)) - .thenReturn(Future.successful(None)) - when(mockBiosampleRepo.findByAccession("SAMEA001")).thenReturn(Future.successful(None)) - when(mockBiosampleService.createBiosample(any[UUID], anyString, anyString, any, anyString, any, any)) - .thenReturn(Future.successful(testBiosample)) - when(mockDataService.addSequenceData(any[UUID], any[SequenceDataInfo])) - .thenReturn(Future.failed(new RuntimeException("Invalid format"))) - - whenReady(service.createBiosampleWithData(request).failed) { ex => - ex mustBe a[SequenceDataValidationException] - } - } - - "wrap unexpected exceptions in RuntimeException" in { - val request = makeRequest() - - when(mockBiosampleService.createOrUpdateSpecimenDonor(anyString, anyString, any[BiosampleType], any, any, any, any, any)) - .thenReturn(Future.failed(new NullPointerException("unexpected"))) - - whenReady(service.createBiosampleWithData(request).failed) { ex => - ex mustBe a[RuntimeException] - ex.getMessage must include("Failed to process biosample") - } - } - - "delete biosample when found and owned by DID" in { - val donor = SpecimenDonor( - id = Some(1), donorIdentifier = "D1", originBiobank = "Bank", - donorType = BiosampleType.Standard, sex = None, geocoord = None, - atUri = Some("did:plc:abc123") - ) - - when(mockBiosampleRepo.findByAccession("SAMEA001")).thenReturn(Future.successful(Some((testBiosample, Some(donor))))) - when(mockDataService.fullyDeleteBiosampleAndDependencies(any[Int], any[UUID])) - .thenReturn(Future.successful(())) - - whenReady(service.deleteBiosample("SAMEA001", "did:plc:abc123")) { result => - result mustBe true - verify(mockDataService).fullyDeleteBiosampleAndDependencies(any[Int], any[UUID]) - } - } - - "return false when biosample not found" in { - when(mockBiosampleRepo.findByAccession("SAMEA999")).thenReturn(Future.successful(None)) - - whenReady(service.deleteBiosample("SAMEA999", "did:plc:abc123")) { result => - result mustBe false - } - } - - "return false when DID does not match" in { - val donor = SpecimenDonor( - id = Some(1), donorIdentifier = "D1", originBiobank = "Bank", - donorType = BiosampleType.Standard, sex = None, geocoord = None, - atUri = Some("did:plc:other") - ) - - when(mockBiosampleRepo.findByAccession("SAMEA001")).thenReturn(Future.successful(Some((testBiosample, Some(donor))))) - - whenReady(service.deleteBiosample("SAMEA001", "did:plc:abc123")) { result => - result mustBe false - verify(mockDataService, never()).fullyDeleteBiosampleAndDependencies(any[Int], any[UUID]) - } - } - } -} diff --git a/test/services/GenealogicalAnchorServiceSpec.scala b/test/services/GenealogicalAnchorServiceSpec.scala deleted file mode 100644 index 90f11155..00000000 --- a/test/services/GenealogicalAnchorServiceSpec.scala +++ /dev/null @@ -1,235 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.haplogroups.{AgeEstimate, AnchorType, GenealogicalAnchor} -import org.mockito.ArgumentMatchers.{any, anyInt} -import org.mockito.Mockito.{reset, verify, when} -import repositories.GenealogicalAnchorRepository - -import java.time.LocalDateTime -import scala.concurrent.Future - -class GenealogicalAnchorServiceSpec extends ServiceSpec { - - val mockRepo: GenealogicalAnchorRepository = mock[GenealogicalAnchorRepository] - val service = new GenealogicalAnchorService(mockRepo) - - override def beforeEach(): Unit = { - reset(mockRepo) - } - - val now: LocalDateTime = LocalDateTime.of(2025, 6, 1, 12, 0) - - def makeAnchor( - id: Int, - haplogroupId: Int, - anchorType: AnchorType, - dateCe: Int, - uncertainty: Int = 50, - confidence: Double = 0.8, - carbonDateBp: Option[Int] = None, - carbonDateSigma: Option[Int] = None - ): GenealogicalAnchor = - GenealogicalAnchor( - id = Some(id), haplogroupId = haplogroupId, anchorType = anchorType, - dateCe = dateCe, dateUncertaintyYears = Some(uncertainty), - confidence = Some(BigDecimal(confidence)), - description = Some("Test anchor"), source = Some("Test"), - carbonDateBp = carbonDateBp, carbonDateSigma = carbonDateSigma, - createdBy = Some("curator@test"), createdAt = now - ) - - "GenealogicalAnchorService" should { - - "getAnchorsForHaplogroup" should { - - "return anchors sorted by confidence descending" in { - val anchors = Seq( - makeAnchor(1, 100, AnchorType.Mdka, 1200, confidence = 0.5), - makeAnchor(2, 100, AnchorType.KnownMrca, 1400, confidence = 0.9), - makeAnchor(3, 100, AnchorType.AncientDna, -500, confidence = 0.7, - carbonDateBp = Some(2500), carbonDateSigma = Some(50)) - ) - when(mockRepo.findByHaplogroup(100)).thenReturn(Future.successful(anchors)) - - whenReady(service.getAnchorsForHaplogroup(100)) { result => - result must have size 3 - result.head.id mustBe Some(2) // highest confidence - result.last.id mustBe Some(1) // lowest confidence - } - } - - "return empty for haplogroup with no anchors" in { - when(mockRepo.findByHaplogroup(999)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.getAnchorsForHaplogroup(999)) { result => - result mustBe empty - } - } - } - - "createAnchor" should { - - "create a valid anchor" in { - val anchor = makeAnchor(0, 100, AnchorType.KnownMrca, 1400).copy(id = None) - when(mockRepo.create(any[GenealogicalAnchor])) - .thenReturn(Future.successful(anchor.copy(id = Some(1)))) - - whenReady(service.createAnchor(anchor)) { result => - result.id mustBe Some(1) - verify(mockRepo).create(any[GenealogicalAnchor]) - } - } - - "reject anchor with invalid confidence" in { - val anchor = makeAnchor(0, 100, AnchorType.KnownMrca, 1400, confidence = 1.5) - an[IllegalArgumentException] must be thrownBy { - service.createAnchor(anchor) - } - } - - "reject ancient DNA anchor without carbon date or valid date_ce" in { - val anchor = GenealogicalAnchor( - haplogroupId = 100, anchorType = AnchorType.AncientDna, - dateCe = 0, dateUncertaintyYears = None, - confidence = None, description = None, source = None, - carbonDateBp = None, carbonDateSigma = None, - createdBy = None - ) - an[IllegalArgumentException] must be thrownBy { - service.createAnchor(anchor) - } - } - } - - "deleteAnchor" should { - - "delete an existing anchor" in { - when(mockRepo.delete(1)).thenReturn(Future.successful(true)) - - whenReady(service.deleteAnchor(1)) { result => - result mustBe true - } - } - } - } - - "applyAnchorConstraints" should { - - "return unconstrained estimate when no anchors exist" in { - when(mockRepo.findByHaplogroup(100)).thenReturn(Future.successful(Seq.empty)) - - val estimate = AgeEstimate(1000, Some(700), Some(1300)) - whenReady(service.applyAnchorConstraints(100, estimate)) { result => - result.constrained mustBe false - result.estimate mustBe estimate - } - } - - "apply KNOWN_MRCA as lower bound on age" in { - // Known MRCA from 1400 CE = 550 YBP, uncertainty ±50 → lower bound 500 YBP - val anchor = makeAnchor(1, 100, AnchorType.KnownMrca, 1400, uncertainty = 50) - when(mockRepo.findByHaplogroup(100)).thenReturn(Future.successful(Seq(anchor))) - - // Estimate is too young (300 YBP) - val estimate = AgeEstimate(300, Some(200), Some(400)) - whenReady(service.applyAnchorConstraints(100, estimate)) { result => - result.constrained mustBe true - result.estimate.ybp must be >= 500 // adjusted to at least anchor lower bound - result.estimate.ybpLower.get must be >= 500 - } - } - - "not constrain when estimate already satisfies anchor" in { - // Known MRCA from 1400 CE = 550 YBP - val anchor = makeAnchor(1, 100, AnchorType.KnownMrca, 1400, uncertainty = 50) - when(mockRepo.findByHaplogroup(100)).thenReturn(Future.successful(Seq(anchor))) - - // Estimate is already older (2000 YBP) than anchor - val estimate = AgeEstimate(2000, Some(1500), Some(2500)) - whenReady(service.applyAnchorConstraints(100, estimate)) { result => - result.constrained mustBe false - result.estimate.ybp mustBe 2000 - } - } - - "apply MDKA as lower bound" in { - // MDKA from 1700 CE = 250 YBP, uncertainty ±30 → lower bound 220 YBP - val anchor = makeAnchor(1, 100, AnchorType.Mdka, 1700, uncertainty = 30) - when(mockRepo.findByHaplogroup(100)).thenReturn(Future.successful(Seq(anchor))) - - val estimate = AgeEstimate(100, Some(50), Some(150)) - whenReady(service.applyAnchorConstraints(100, estimate)) { result => - result.constrained mustBe true - result.estimate.ybp must be >= 220 - } - } - - "apply ANCIENT_DNA with carbon dating as hard lower bound" in { - // Carbon date: 4500 BP ± 50 → 2-sigma lower bound = 4400 YBP - val anchor = makeAnchor(1, 100, AnchorType.AncientDna, -2550, - carbonDateBp = Some(4500), carbonDateSigma = Some(50)) - when(mockRepo.findByHaplogroup(100)).thenReturn(Future.successful(Seq(anchor))) - - val estimate = AgeEstimate(3000, Some(2500), Some(3500)) - whenReady(service.applyAnchorConstraints(100, estimate)) { result => - result.constrained mustBe true - result.estimate.ybp must be >= 4400 - result.estimate.ybpLower.get must be >= 4400 - } - } - - "apply multiple anchors taking the strictest constraint" in { - val anchor1 = makeAnchor(1, 100, AnchorType.KnownMrca, 1400, uncertainty = 50) // 500 YBP lower - val anchor2 = makeAnchor(2, 100, AnchorType.Mdka, 1200, uncertainty = 50) // 700 YBP lower - when(mockRepo.findByHaplogroup(100)).thenReturn(Future.successful(Seq(anchor1, anchor2))) - - val estimate = AgeEstimate(300, Some(200), Some(400)) - whenReady(service.applyAnchorConstraints(100, estimate)) { result => - result.constrained mustBe true - // The MDKA at 700 YBP is stricter than KNOWN_MRCA at 500 YBP - result.estimate.ybp must be >= 700 - result.estimate.ybpLower.get must be >= 700 - } - } - } - - "GenealogicalAnchor" should { - - "convert date_ce to YBP correctly" in { - makeAnchor(1, 100, AnchorType.KnownMrca, 1400).toYbp mustBe 550 - makeAnchor(1, 100, AnchorType.KnownMrca, -500).toYbp mustBe 2450 - makeAnchor(1, 100, AnchorType.KnownMrca, 1950).toYbp mustBe 0 - } - - "convert to AgeEstimate" in { - val anchor = makeAnchor(1, 100, AnchorType.KnownMrca, 1400, uncertainty = 50) - val est = anchor.toAgeEstimate - est.ybp mustBe 550 - est.ybpLower mustBe Some(500) - est.ybpUpper mustBe Some(600) - } - } - - "validateAnchor" should { - - "accept valid KNOWN_MRCA anchor" in { - noException must be thrownBy { - service.validateAnchor(makeAnchor(0, 100, AnchorType.KnownMrca, 1400)) - } - } - - "accept valid ANCIENT_DNA with carbon date" in { - noException must be thrownBy { - service.validateAnchor(makeAnchor(0, 100, AnchorType.AncientDna, -2500, - carbonDateBp = Some(4500), carbonDateSigma = Some(50))) - } - } - - "reject zero haplogroup ID" in { - an[IllegalArgumentException] must be thrownBy { - service.validateAnchor(makeAnchor(0, 0, AnchorType.KnownMrca, 1400)) - } - } - } -} diff --git a/test/services/GenomeRegionsServiceSpec.scala b/test/services/GenomeRegionsServiceSpec.scala deleted file mode 100644 index 6723b0c8..00000000 --- a/test/services/GenomeRegionsServiceSpec.scala +++ /dev/null @@ -1,49 +0,0 @@ -package services - -import org.scalatest.funspec.AnyFunSpec -import org.scalatest.matchers.must.Matchers - -class GenomeRegionsServiceSpec extends AnyFunSpec with Matchers { - - describe("GenomeRegionsService") { - - describe("ETag generation") { - it("should generate consistent ETags for same input") { - val etag1 = generateTestETag("GRCh38", "2024.12.1") - val etag2 = generateTestETag("GRCh38", "2024.12.1") - - etag1 mustBe etag2 - } - - it("should generate different ETags for different builds") { - val etagGrch38 = generateTestETag("GRCh38", "2024.12.1") - val etagGrch37 = generateTestETag("GRCh37", "2024.12.1") - - etagGrch38 must not be etagGrch37 - } - - it("should generate different ETags for different versions") { - val etag1 = generateTestETag("GRCh38", "2024.12.1") - val etag2 = generateTestETag("GRCh38", "2024.12.2") - - etag1 must not be etag2 - } - - it("should wrap ETag in quotes") { - val etag = generateTestETag("GRCh38", "2024.12.1") - - etag must startWith("\"") - etag must endWith("\"") - } - } - } - - // Helper method to test ETag generation independently of the service - private def generateTestETag(buildName: String, dataVersion: String): String = { - import java.security.MessageDigest - val input = s"$buildName:$dataVersion" - val md5 = MessageDigest.getInstance("MD5") - val hash = md5.digest(input.getBytes("UTF-8")).map("%02x".format(_)).mkString - s""""$hash"""" - } -} diff --git a/test/services/GroupProjectServiceSpec.scala b/test/services/GroupProjectServiceSpec.scala deleted file mode 100644 index 0e22e356..00000000 --- a/test/services/GroupProjectServiceSpec.scala +++ /dev/null @@ -1,350 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.{GroupProject, GroupProjectMember, MemberVisibility} -import org.mockito.ArgumentMatchers.{any, eq as meq} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.{GroupProjectMemberRepository, GroupProjectRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class GroupProjectServiceSpec extends ServiceSpec { - - val mockProjectRepo: GroupProjectRepository = mock[GroupProjectRepository] - val mockMemberRepo: GroupProjectMemberRepository = mock[GroupProjectMemberRepository] - - val service = new GroupProjectService(mockProjectRepo, mockMemberRepo) - - override def beforeEach(): Unit = { - reset(mockProjectRepo, mockMemberRepo) - } - - val testProject: GroupProject = GroupProject( - id = Some(1), - projectGuid = UUID.randomUUID(), - projectName = "R-CTS4466 Project", - projectType = "HAPLOGROUP", - targetHaplogroup = Some("R-CTS4466"), - targetLineage = Some("Y_DNA"), - joinPolicy = "APPROVAL_REQUIRED", - ownerDid = "did:plc:admin1" - ) - - val openProject: GroupProject = testProject.copy(id = Some(2), joinPolicy = "OPEN") - - val adminMember: GroupProjectMember = GroupProjectMember( - id = Some(1), groupProjectId = 1, citizenDid = "did:plc:admin1", - role = "ADMIN", status = "ACTIVE", joinedAt = Some(LocalDateTime.now()) - ) - - val coAdminMember: GroupProjectMember = GroupProjectMember( - id = Some(2), groupProjectId = 1, citizenDid = "did:plc:coadmin1", - role = "CO_ADMIN", status = "ACTIVE", joinedAt = Some(LocalDateTime.now()) - ) - - val regularMember: GroupProjectMember = GroupProjectMember( - id = Some(3), groupProjectId = 1, citizenDid = "did:plc:member1", - role = "MEMBER", status = "ACTIVE", joinedAt = Some(LocalDateTime.now()) - ) - - val pendingMember: GroupProjectMember = GroupProjectMember( - id = Some(4), groupProjectId = 1, citizenDid = "did:plc:pending1", - role = "MEMBER", status = "PENDING_APPROVAL" - ) - - "GroupProjectService.createProject" should { - - "create project and add creator as admin" in { - val newProject = testProject.copy(id = None) - val createdProject = testProject.copy(id = Some(1)) - - when(mockProjectRepo.create(any[GroupProject])).thenReturn(Future.successful(createdProject)) - when(mockMemberRepo.create(any[GroupProjectMember])).thenReturn(Future.successful(adminMember)) - - whenReady(service.createProject(newProject, "did:plc:admin1")) { result => - result mustBe a[Right[?, ?]] - result.toOption.get.id mustBe Some(1) - verify(mockMemberRepo).create(any[GroupProjectMember]) - } - } - - "reject invalid project type" in { - val invalid = testProject.copy(id = None, projectType = "INVALID") - - whenReady(service.createProject(invalid, "did:plc:admin1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Invalid project type") - } - } - - "reject short project name" in { - val invalid = testProject.copy(id = None, projectName = "AB") - - whenReady(service.createProject(invalid, "did:plc:admin1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("at least 3 characters") - } - } - - "reject HAPLOGROUP_VERIFIED without requirement" in { - val invalid = testProject.copy(id = None, joinPolicy = "HAPLOGROUP_VERIFIED", haplogroupRequirement = None) - - whenReady(service.createProject(invalid, "did:plc:admin1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Haplogroup requirement") - } - } - } - - "GroupProjectService.requestMembership" should { - - "create pending membership for approval-required project" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:new1")).thenReturn(Future.successful(None)) - when(mockMemberRepo.create(any[GroupProjectMember])).thenReturn(Future.successful(pendingMember)) - - whenReady(service.requestMembership(1, "did:plc:new1")) { result => - result mustBe a[Right[?, ?]] - } - } - - "auto-activate for open project" in { - when(mockProjectRepo.findById(2)).thenReturn(Future.successful(Some(openProject))) - when(mockMemberRepo.findByProjectAndCitizen(2, "did:plc:new1")).thenReturn(Future.successful(None)) - when(mockMemberRepo.create(any[GroupProjectMember])).thenAnswer { inv => - val m = inv.getArgument[GroupProjectMember](0) - Future.successful(m.copy(id = Some(10))) - } - - whenReady(service.requestMembership(2, "did:plc:new1")) { result => - result mustBe a[Right[?, ?]] - result.toOption.get.status mustBe "ACTIVE" - } - } - - "reject if already active member" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:member1")) - .thenReturn(Future.successful(Some(regularMember))) - - whenReady(service.requestMembership(1, "did:plc:member1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Already a member") - } - } - - "reject if previously removed" in { - val removed = regularMember.copy(status = "REMOVED") - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:member1")) - .thenReturn(Future.successful(Some(removed))) - - whenReady(service.requestMembership(1, "did:plc:member1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("revoked") - } - } - - "return error for non-existent project" in { - when(mockProjectRepo.findById(99)).thenReturn(Future.successful(None)) - - whenReady(service.requestMembership(99, "did:plc:new1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("not found") - } - } - } - - "GroupProjectService.approveMembership" should { - - "approve pending membership when admin" in { - when(mockMemberRepo.findById(4)).thenReturn(Future.successful(Some(pendingMember))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:admin1")) - .thenReturn(Future.successful(Some(adminMember))) - when(mockMemberRepo.update(any[GroupProjectMember])).thenReturn(Future.successful(true)) - - whenReady(service.approveMembership(4, "did:plc:admin1")) { result => - result mustBe a[Right[?, ?]] - result.toOption.get.status mustBe "ACTIVE" - } - } - - "reject if not admin" in { - when(mockMemberRepo.findById(4)).thenReturn(Future.successful(Some(pendingMember))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:member1")) - .thenReturn(Future.successful(Some(regularMember))) - - whenReady(service.approveMembership(4, "did:plc:member1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("permissions") - } - } - - "reject if membership is not pending" in { - when(mockMemberRepo.findById(3)).thenReturn(Future.successful(Some(regularMember))) - - whenReady(service.approveMembership(3, "did:plc:admin1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Cannot approve") - } - } - } - - "GroupProjectService.leaveProject" should { - - "allow regular member to leave" in { - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:member1")) - .thenReturn(Future.successful(Some(regularMember))) - when(mockMemberRepo.updateStatus(3, "LEFT")).thenReturn(Future.successful(true)) - - whenReady(service.leaveProject(1, "did:plc:member1")) { result => - result mustBe a[Right[?, ?]] - } - } - - "prevent admin from leaving" in { - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:admin1")) - .thenReturn(Future.successful(Some(adminMember))) - - whenReady(service.leaveProject(1, "did:plc:admin1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Admin cannot leave") - } - } - } - - "GroupProjectService.removeMember" should { - - "allow admin to remove member" in { - when(mockMemberRepo.findById(3)).thenReturn(Future.successful(Some(regularMember))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:admin1")) - .thenReturn(Future.successful(Some(adminMember))) - when(mockMemberRepo.updateStatus(3, "REMOVED")).thenReturn(Future.successful(true)) - - whenReady(service.removeMember(3, "did:plc:admin1")) { result => - result mustBe a[Right[?, ?]] - } - } - - "prevent removing the admin" in { - when(mockMemberRepo.findById(1)).thenReturn(Future.successful(Some(adminMember))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:admin1")) - .thenReturn(Future.successful(Some(adminMember))) - - whenReady(service.removeMember(1, "did:plc:admin1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Cannot remove the project admin") - } - } - } - - "GroupProjectService.assignRole" should { - - "allow admin to assign co-admin role" in { - when(mockMemberRepo.findById(3)).thenReturn(Future.successful(Some(regularMember))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:admin1")) - .thenReturn(Future.successful(Some(adminMember))) - when(mockMemberRepo.updateRole(3, "CO_ADMIN")).thenReturn(Future.successful(true)) - - whenReady(service.assignRole(3, "CO_ADMIN", "did:plc:admin1")) { result => - result mustBe a[Right[?, ?]] - } - } - - "reject assigning ADMIN role" in { - when(mockMemberRepo.findById(3)).thenReturn(Future.successful(Some(regularMember))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:admin1")) - .thenReturn(Future.successful(Some(adminMember))) - - whenReady(service.assignRole(3, "ADMIN", "did:plc:admin1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Cannot assign ADMIN") - } - } - - "reject invalid role" in { - whenReady(service.assignRole(3, "SUPERUSER", "did:plc:admin1")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Invalid role") - } - } - } - - "GroupProjectService.getProjectMembers" should { - - "return members when requester has access" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:member1")) - .thenReturn(Future.successful(Some(regularMember))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(adminMember, regularMember))) - - whenReady(service.getProjectMembers(1, "did:plc:member1")) { result => - result mustBe a[Right[?, ?]] - result.toOption.get.size mustBe 2 - } - } - - "deny non-member access to MEMBERS_ONLY list" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:outsider")) - .thenReturn(Future.successful(None)) - - whenReady(service.getProjectMembers(1, "did:plc:outsider")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("permissions") - } - } - } - - "GroupProjectService.getPendingRequests" should { - - "return pending members when admin" in { - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:admin1")) - .thenReturn(Future.successful(Some(adminMember))) - when(mockMemberRepo.findByProjectAndStatus(1, "PENDING_APPROVAL")) - .thenReturn(Future.successful(Seq(pendingMember))) - - whenReady(service.getPendingRequests(1, "did:plc:admin1")) { result => - result mustBe a[Right[?, ?]] - result.toOption.get.size mustBe 1 - } - } - - "deny regular member access to pending requests" in { - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:member1")) - .thenReturn(Future.successful(Some(regularMember))) - - whenReady(service.getPendingRequests(1, "did:plc:member1")) { result => - result mustBe a[Left[?, ?]] - } - } - } - - "GroupProjectService.hasPermission" should { - - "grant all permissions to admin" in { - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:admin1")) - .thenReturn(Future.successful(Some(adminMember))) - - whenReady(service.hasPermission(1, "did:plc:admin1", "APPROVE_MEMBERS")) { _ mustBe true } - } - - "grant limited permissions to co-admin" in { - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:coadmin1")) - .thenReturn(Future.successful(Some(coAdminMember))) - - whenReady(service.hasPermission(1, "did:plc:coadmin1", "APPROVE_MEMBERS")) { _ mustBe true } - whenReady(service.hasPermission(1, "did:plc:coadmin1", "MANAGE_ROLES")) { _ mustBe false } - } - - "deny permissions to regular member" in { - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:member1")) - .thenReturn(Future.successful(Some(regularMember))) - - whenReady(service.hasPermission(1, "did:plc:member1", "APPROVE_MEMBERS")) { _ mustBe false } - } - } -} diff --git a/test/services/HaplogroupTreeMergeServiceSpec.scala b/test/services/HaplogroupTreeMergeServiceSpec.scala deleted file mode 100644 index 647900e7..00000000 --- a/test/services/HaplogroupTreeMergeServiceSpec.scala +++ /dev/null @@ -1,901 +0,0 @@ -package services - -import models.HaplogroupType -import models.api.haplogroups.* -import models.domain.genomics.VariantV2 -import models.domain.haplogroups.{ChangeSet, ChangeSetStatus, Haplogroup, HaplogroupProvenance, RelationshipRevisionMetadata} -import org.mockito.ArgumentMatchers.{any, anyInt, anyString} -import org.mockito.Mockito.{never, reset, verify, when} -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.ScalaFutures -import org.scalatest.time.{Millis, Seconds, Span} -import org.scalatestplus.mockito.MockitoSugar -import org.scalatestplus.play.PlaySpec -import play.api.libs.json.Json -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantV2Repository, HaplogroupRevisionMetadataRepository, HaplogroupVariantMetadataRepository, WipTreeRepository} -import services.tree.{TreeMergeAlgorithmService, TreeMergePreviewService, TreeMergeProvenanceService, VariantMatchingService} - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -class HaplogroupTreeMergeServiceSpec extends PlaySpec with MockitoSugar with ScalaFutures with BeforeAndAfterEach { - - implicit val ec: ExecutionContext = ExecutionContext.global - implicit val patience: PatienceConfig = PatienceConfig(timeout = Span(5, Seconds), interval = Span(100, Millis)) - - // Mocks - var mockHaplogroupRepo: HaplogroupCoreRepository = _ - var mockVariantRepo: HaplogroupVariantRepository = _ - var mockVariantV2Repository: VariantV2Repository = _ - var mockHaplogroupRevisionMetadataRepo: HaplogroupRevisionMetadataRepository = _ - var mockHaplogroupVariantMetadataRepo: HaplogroupVariantMetadataRepository = _ - var mockTreeVersioningService: TreeVersioningService = _ - var mockWipTreeRepository: WipTreeRepository = _ - var stagingHelper: TreeMergeStagingHelper = _ - var provenanceService: TreeMergeProvenanceService = _ - var variantMatchingService: VariantMatchingService = _ - var previewService: TreeMergePreviewService = _ - var algorithmService: TreeMergeAlgorithmService = _ - var service: HaplogroupTreeMergeService = _ - - // Test fixtures - val now: LocalDateTime = LocalDateTime.now() - - def createHaplogroup( - id: Int, - name: String, - haplogroupType: HaplogroupType = HaplogroupType.Y, - source: String = "ISOGG", - provenance: Option[HaplogroupProvenance] = None - ): Haplogroup = Haplogroup( - id = Some(id), - name = name, - lineage = None, - description = None, - haplogroupType = haplogroupType, - revisionId = 1, - source = source, - confidenceLevel = "high", - validFrom = now.minusDays(30), - validUntil = None, - provenance = provenance - ) - - def createPhyloNode( - name: String, - variants: List[String] = List.empty, - children: List[PhyloNodeInput] = List.empty, - formedYbp: Option[Int] = None - ): PhyloNodeInput = PhyloNodeInput( - name = name, - variants = variants.map(v => VariantInput(v)), // Convert strings to VariantInput - children = children, - formedYbp = formedYbp - ) - - override def beforeEach(): Unit = { - mockHaplogroupRepo = mock[HaplogroupCoreRepository] - mockVariantRepo = mock[HaplogroupVariantRepository] - mockVariantV2Repository = mock[VariantV2Repository] - mockHaplogroupRevisionMetadataRepo = mock[HaplogroupRevisionMetadataRepository] - mockHaplogroupVariantMetadataRepo = mock[HaplogroupVariantMetadataRepository] - mockTreeVersioningService = mock[TreeVersioningService] - mockWipTreeRepository = mock[WipTreeRepository] - stagingHelper = new TreeMergeStagingHelper( - mockHaplogroupRepo, - mockVariantRepo, - mockWipTreeRepository - ) - provenanceService = new TreeMergeProvenanceService(mockHaplogroupRepo) - variantMatchingService = new VariantMatchingService(mockHaplogroupRepo, mockVariantRepo) - previewService = new TreeMergePreviewService(variantMatchingService, provenanceService) - algorithmService = new TreeMergeAlgorithmService( - mockHaplogroupRepo, - mockVariantRepo, - mockVariantV2Repository, - stagingHelper, - provenanceService, - variantMatchingService - ) - service = new HaplogroupTreeMergeService( - mockTreeVersioningService, - algorithmService, - provenanceService, - variantMatchingService, - previewService - ) - - // Default mock behaviors for new metadata repositories - when(mockHaplogroupRevisionMetadataRepo.addRelationshipRevisionMetadata(any())) - .thenReturn(Future.successful(1)) - when(mockHaplogroupVariantMetadataRepo.addVariantRevisionMetadata(any())) - .thenReturn(Future.successful(1)) - - // Default mock behaviors for tree versioning service - // By default, return a failed Future to skip change tracking (simulating no active change set allowed) - when(mockTreeVersioningService.createChangeSet(any(), anyString(), any(), anyString())) - .thenReturn(Future.failed(new IllegalStateException("Change set creation disabled in tests"))) - when(mockTreeVersioningService.finalizeChangeSet(anyInt(), any(), any())) - .thenReturn(Future.successful(true)) - when(mockTreeVersioningService.recordCreate(anyInt(), anyString(), any(), any(), any())) - .thenReturn(Future.successful(1)) - when(mockTreeVersioningService.recordReparent(anyInt(), anyInt(), any(), anyInt(), any(), any())) - .thenReturn(Future.successful(1)) - } - - "HaplogroupTreeMergeService" should { - - // ========================================================================= - // Preview Tests - // ========================================================================= - - "preview a simple tree merge with no existing haplogroups" in { - // Setup: Empty existing tree - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21", "S145"), - children = List( - createPhyloNode("R1b-DF13", variants = List("DF13")) - ) - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net" - ) - - whenReady(service.previewMerge(request)) { result => - result.statistics.nodesProcessed mustBe 2 - result.statistics.nodesCreated mustBe 2 - result.statistics.nodesUnchanged mustBe 0 - result.newNodes must contain allOf ("R1b-L21", "R1b-DF13") - result.conflicts mustBe empty - } - } - - "preview identifies existing nodes for update" in { - // Setup: Existing tree with R1b-L21 - val existingHaplogroup = createHaplogroup(1, "R1b-L21", source = "DecodingUs") - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq("L21", "S145")) - ))) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21", "S145"), - children = List( - createPhyloNode("R1b-DF13", variants = List("DF13")) - ) - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net", - priorityConfig = Some(SourcePriorityConfig(List("ytree.net", "DecodingUs"))) - ) - - whenReady(service.previewMerge(request)) { result => - result.statistics.nodesProcessed mustBe 2 - result.statistics.nodesCreated mustBe 1 // DF13 is new - result.newNodes must contain("R1b-DF13") - // R1b-L21 exists but ytree.net has higher priority, so it might be marked for update - // depending on whether there are differences - } - } - - "preview detects age estimate conflicts" in { - // Setup: Existing tree with different age estimate - val existingHaplogroup = createHaplogroup(1, "R1b-L21").copy(formedYbp = Some(4500)) - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq("L21")) - ))) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21"), - formedYbp = Some(4800) // Different from existing - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net", - priorityConfig = Some(SourcePriorityConfig(List("ytree.net", "ISOGG"))) - ) - - whenReady(service.previewMerge(request)) { result => - result.conflicts.size mustBe 1 - result.conflicts.head.field mustBe "formedYbp" - result.conflicts.head.existingValue mustBe "4500" - result.conflicts.head.newValue mustBe "4800" - } - } - - // ========================================================================= - // Variant-Based Matching Tests - // ========================================================================= - - "match nodes by variants, not names" in { - // Setup: Existing "R-L21" should match incoming "R1b-L21" by variant - val existingHaplogroup = createHaplogroup(1, "R-L21") // Different name - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq("L21")) // Same variant - ))) - - val sourceTree = createPhyloNode( - name = "R1b-L21", // Different name but same variant - variants = List("L21") - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net" - ) - - whenReady(service.previewMerge(request)) { result => - // Should recognize as existing node (unchanged), not new - result.statistics.nodesCreated mustBe 0 - result.unchangedNodes must contain("R-L21") - } - } - - "fall back to name matching when no variant match found" in { - // Setup: Existing node with same name but no variants - val existingHaplogroup = createHaplogroup(1, "R1b-L21") - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq.empty) // No variants - ))) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21", "S145") // Has variants but no match in DB - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net" - ) - - whenReady(service.previewMerge(request)) { result => - // Should match by name - result.statistics.nodesCreated mustBe 0 - result.unchangedNodes must contain("R1b-L21") - } - } - - // ========================================================================= - // Credit Assignment Tests - // ========================================================================= - - "preserve ISOGG credit on existing nodes" in { - // Setup: Existing node with ISOGG provenance - val isoggProvenance = HaplogroupProvenance( - primaryCredit = "ISOGG", - nodeProvenance = Set("ISOGG") - ) - val existingHaplogroup = createHaplogroup(1, "R1b-L21", provenance = Some(isoggProvenance)) - - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq("L21")) - ))) - when(mockHaplogroupRepo.updateProvenance(anyInt(), any[HaplogroupProvenance])) - .thenReturn(Future.successful(true)) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21") - ) - - val request = TreeMergeRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net", - dryRun = true // Use dry run for this test - ) - - whenReady(service.mergeFullTree(request)) { result => - result.success mustBe true - // ISOGG credit should be preserved (verified via mock) - } - } - - "assign incoming source credit for new nodes" in { - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val sourceTree = createPhyloNode( - name = "R1b-NEW", - variants = List("NEW123") - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net" - ) - - whenReady(service.previewMerge(request)) { result => - result.newNodes must contain("R1b-NEW") - // New nodes get incoming source credit (ytree.net) - } - } - - "update primary credit when incoming source has higher priority and existing is not ISOGG" in { - val existingProvenance = HaplogroupProvenance(primaryCredit = "DecodingUs", nodeProvenance = Set("DecodingUs")) - val existingHaplogroup = createHaplogroup(1, "R1b-L21", provenance = Some(existingProvenance)) - - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq("L21")) - ))) - when(mockHaplogroupRepo.updateProvenance(anyInt(), any[HaplogroupProvenance])) - .thenReturn(Future.successful(true)) - when(mockHaplogroupRepo.getParent(anyInt())) - .thenReturn(Future.successful(None)) - when(mockVariantRepo.getHaplogroupVariantIds(anyInt())) - .thenReturn(Future.successful(Seq.empty)) - when(mockVariantV2Repository.searchByName(anyString())) - .thenReturn(Future.successful(Seq.empty)) - when(mockVariantV2Repository.searchByNames(any[Seq[String]])) - .thenReturn(Future.successful(Map.empty[String, Seq[VariantV2]])) - when(mockHaplogroupRepo.getAllRelationships(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21") - ) - - val request = TreeMergeRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ISOGG", - priorityConfig = Some(SourcePriorityConfig(List("ISOGG", "DecodingUs"))), // ISOGG higher - dryRun = false - ) - - whenReady(service.mergeFullTree(request)) { result => - result.success mustBe true - verify(mockHaplogroupRepo).updateProvenance(anyInt(), org.mockito.ArgumentMatchers.argThat { (p: HaplogroupProvenance) => - p.primaryCredit == "ISOGG" - }) - } - } - - // ========================================================================= - // Priority Configuration Tests - // ========================================================================= - - "respect source priority for conflict resolution" in { - val existingHaplogroup = createHaplogroup(1, "R1b-L21", source = "DecodingUs") - .copy(formedYbp = Some(4500)) - - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq("L21")) - ))) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21"), - formedYbp = Some(4800) - ) - - // Higher priority = lower index. ytree.net at index 0 beats DecodingUs at index 1 - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net", - priorityConfig = Some(SourcePriorityConfig(List("ytree.net", "DecodingUs"))) - ) - - whenReady(service.previewMerge(request)) { result => - result.conflicts.head.resolution mustBe "will_update" - } - } - - "keep existing values when existing source has higher priority" in { - val existingProvenance = HaplogroupProvenance(primaryCredit = "ISOGG", nodeProvenance = Set("ISOGG")) - val existingHaplogroup = createHaplogroup(1, "R1b-L21", provenance = Some(existingProvenance)) - .copy(formedYbp = Some(4500)) - - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq("L21")) - ))) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21"), - formedYbp = Some(4800) - ) - - // ISOGG at index 0 beats ytree.net at index 1 - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net", - priorityConfig = Some(SourcePriorityConfig(List("ISOGG", "ytree.net"))) - ) - - whenReady(service.previewMerge(request)) { result => - result.conflicts.head.resolution mustBe "will_keep_existing" - } - } - - // ========================================================================= - // Subtree Merge Tests - // ========================================================================= - - "merge subtree under specified anchor" in { - val anchorHaplogroup = createHaplogroup(100, "R1b") - - when(mockHaplogroupRepo.getHaplogroupByName("R1b", HaplogroupType.Y)) - .thenReturn(Future.successful(Some(anchorHaplogroup))) - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (anchorHaplogroup, Seq("M269")) - ))) - when(mockHaplogroupRepo.createWithParent(any[Haplogroup], any[Option[Int]], anyString())) - .thenReturn(Future.successful((101, None))) - when(mockHaplogroupRepo.updateProvenance(anyInt(), any[HaplogroupProvenance])) - .thenReturn(Future.successful(true)) - when(mockVariantV2Repository.searchByName(anyString())) - .thenReturn(Future.successful(Seq.empty)) - when(mockVariantV2Repository.searchByNames(any[Seq[String]])) - .thenReturn(Future.successful(Map.empty[String, Seq[VariantV2]])) - - // Mocks for context loading - when(mockHaplogroupRepo.getDescendants(anyInt())) - .thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getVariantsForHaplogroups(any[Seq[Int]])) - .thenReturn(Future.successful(Seq.empty)) - when(mockHaplogroupRepo.getAllRelationships(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21") - ) - - val request = SubtreeMergeRequest( - haplogroupType = HaplogroupType.Y, - anchorHaplogroupName = "R1b", - sourceTree = sourceTree, - sourceName = "ytree.net" - ) - - whenReady(service.mergeSubtree(request)) { result => - result.success mustBe true - result.statistics.nodesCreated mustBe 1 - verify(mockHaplogroupRepo).createWithParent(any[Haplogroup], any[Option[Int]], anyString()) - } - } - - "fail subtree merge when anchor not found" in { - // The mergeSubtree now first builds a variant index to find the anchor - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) // No haplogroups, so anchor won't be found - - val sourceTree = createPhyloNode(name = "Test") - - val request = SubtreeMergeRequest( - haplogroupType = HaplogroupType.Y, - anchorHaplogroupName = "NONEXISTENT", - sourceTree = sourceTree, - sourceName = "ytree.net" - ) - - whenReady(service.mergeSubtree(request).failed) { ex => - ex mustBe a[IllegalArgumentException] - ex.getMessage must include("not found") - } - } - - // ========================================================================= - // Dry Run Tests - // ========================================================================= - - "not modify database on dry run" in { - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val sourceTree = createPhyloNode( - name = "R1b-NEW", - variants = List("NEW123") - ) - - val request = TreeMergeRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net", - dryRun = true - ) - - whenReady(service.mergeFullTree(request)) { result => - result.success mustBe true - // Verify no write operations were called - verify(mockHaplogroupRepo, never()).createWithParent(any[Haplogroup], any[Option[Int]], anyString()) - verify(mockHaplogroupRepo, never()).update(any[Haplogroup]) - verify(mockHaplogroupRepo, never()).updateProvenance(anyInt(), any[HaplogroupProvenance]) - } - } - - // ========================================================================= - // Recursive Tree Processing Tests - // ========================================================================= - - "process deeply nested tree structures" in { - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - // Create a 4-level deep tree - val deepTree = createPhyloNode( - name = "Level1", - variants = List("V1"), - children = List( - createPhyloNode( - name = "Level2", - variants = List("V2"), - children = List( - createPhyloNode( - name = "Level3", - variants = List("V3"), - children = List( - createPhyloNode("Level4", variants = List("V4")) - ) - ) - ) - ) - ) - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = deepTree, - sourceName = "test" - ) - - whenReady(service.previewMerge(request)) { result => - result.statistics.nodesProcessed mustBe 4 - result.statistics.nodesCreated mustBe 4 - result.newNodes must have size 4 - } - } - - "process tree with multiple children at each level" in { - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val wideTree = createPhyloNode( - name = "Parent", - variants = List("P1"), - children = List( - createPhyloNode("Child1", variants = List("C1")), - createPhyloNode("Child2", variants = List("C2")), - createPhyloNode("Child3", variants = List("C3")) - ) - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = wideTree, - sourceName = "test" - ) - - whenReady(service.previewMerge(request)) { result => - result.statistics.nodesProcessed mustBe 4 - result.statistics.nodesCreated mustBe 4 - } - } - - // ========================================================================= - // MT DNA Tests - // ========================================================================= - - "handle MT DNA haplogroup type" in { - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.MT)) - .thenReturn(Future.successful(Seq.empty)) - - val sourceTree = createPhyloNode( - name = "H1", - variants = List("H1-defining") - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.MT, - sourceTree = sourceTree, - sourceName = "mtDNA-tree" - ) - - whenReady(service.previewMerge(request)) { result => - result.statistics.nodesCreated mustBe 1 - verify(mockHaplogroupRepo).getAllWithVariantNames(HaplogroupType.MT) - } - } - - // ========================================================================= - // Conflict Strategy Tests - // ========================================================================= - - "apply KeepExisting conflict strategy" in { - val existingHaplogroup = createHaplogroup(1, "R1b-L21") - .copy(formedYbp = Some(4500)) - - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq("L21")) - ))) - when(mockHaplogroupRepo.updateProvenance(anyInt(), any[HaplogroupProvenance])) - .thenReturn(Future.successful(true)) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21"), - formedYbp = Some(4800) - ) - - val request = TreeMergeRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net", - conflictStrategy = Some(ConflictStrategy.KeepExisting), - dryRun = true - ) - - whenReady(service.mergeFullTree(request)) { result => - result.success mustBe true - // With KeepExisting, should not update even with conflicts - result.statistics.nodesUpdated mustBe 0 - } - } - - "apply AlwaysUpdate conflict strategy" in { - val existingHaplogroup = createHaplogroup(1, "R1b-L21", source = "low-priority") - .copy(formedYbp = Some(4500)) - - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq("L21")) - ))) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21"), - formedYbp = Some(4800) - ) - - // With AlwaysUpdate, should update regardless of priority - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "ytree.net", - priorityConfig = Some(SourcePriorityConfig(List("low-priority", "ytree.net"))) // ytree.net is lower priority - ) - - whenReady(service.previewMerge(request)) { result => - // Preview shows conflict would be kept (default strategy) - result.conflicts.nonEmpty mustBe true - } - } - - // ========================================================================= - // Edge Cases - // ========================================================================= - - "handle empty source tree gracefully" in { - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val emptyTree = createPhyloNode(name = "SingleNode") - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = emptyTree, - sourceName = "test" - ) - - whenReady(service.previewMerge(request)) { result => - result.statistics.nodesProcessed mustBe 1 - } - } - - "handle nodes with no variants" in { - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val noVariantsTree = createPhyloNode( - name = "NoVariants", - variants = List.empty - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = noVariantsTree, - sourceName = "test" - ) - - whenReady(service.previewMerge(request)) { result => - result.statistics.nodesCreated mustBe 1 - } - } - - "handle case-insensitive variant matching" in { - val existingHaplogroup = createHaplogroup(1, "R1b-L21") - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (existingHaplogroup, Seq("l21")) // lowercase - ))) - - val sourceTree = createPhyloNode( - name = "R1b-L21", - variants = List("L21") // uppercase - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "test" - ) - - whenReady(service.previewMerge(request)) { result => - // Should match despite case difference - result.statistics.nodesCreated mustBe 0 - result.unchangedNodes must contain("R1b-L21") - } - } - - // ========================================================================= - // Statistics Accuracy Tests - // ========================================================================= - - "accurately count variant additions for new nodes" in { - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val sourceTree = createPhyloNode( - name = "Test", - variants = List("V1", "V2", "V3") // 3 variants - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "test" - ) - - whenReady(service.previewMerge(request)) { result => - result.statistics.variantsAdded mustBe 3 - } - } - - "count relationship creations correctly" in { - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val treeWithChildren = createPhyloNode( - name = "Parent", - children = List( - createPhyloNode("Child1"), - createPhyloNode("Child2") - ) - ) - - val request = MergePreviewRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = treeWithChildren, - sourceName = "test" - ) - - whenReady(service.previewMerge(request)) { result => - // Parent has 1 relationship (to anchor or none) - // Child1 and Child2 each have 1 relationship to Parent - result.statistics.relationshipsCreated mustBe 3 - } - } - - "allow reparenting of existing node when incoming source has priority" in { - // 1. Setup existing nodes: Root(1) -> Child(2) - val root = createHaplogroup(1, "Root", source = "LowPrioritySource") - val child = createHaplogroup(2, "Child", source = "LowPrioritySource") - - // Mocks for existing state - when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - (root, Seq("RootVar")), - (child, Seq("ChildVar")) - ))) - - // Mock getParent for Child(2) -> Root(1) - when(mockHaplogroupRepo.getParent(2)).thenReturn(Future.successful(Some(root))) - // Mock getParent for Root(1) -> None - when(mockHaplogroupRepo.getParent(1)).thenReturn(Future.successful(None)) - - // Mock creation of NewParent. Assume it gets ID 3. - // NOTE: The mock needs to match the call arguments exactly or be generic. - // The service calls createWithParent with (node, parentId, source). - when(mockHaplogroupRepo.createWithParent(any[Haplogroup], any[Option[Int]], anyString())) - .thenAnswer { invocation => - val hg = invocation.getArgument[Haplogroup](0) - if (hg.name == "NewParent") Future.successful((3, None)) - else Future.successful((0, None)) // Should not happen for others in this test - } - - when(mockHaplogroupRepo.updateParent(anyInt(), anyInt(), anyString())) - .thenReturn(Future.successful(1)) - - when(mockHaplogroupRepo.updateProvenance(anyInt(), any[HaplogroupProvenance])) - .thenReturn(Future.successful(true)) - - // Ensure findById works for checking parent names in logging/conflict logic - when(mockHaplogroupRepo.findById(3)).thenReturn(Future.successful(Some(createHaplogroup(3, "NewParent")))) - when(mockHaplogroupRepo.findById(1)).thenReturn(Future.successful(Some(root))) - - // Mock variant repository calls to avoid NPE - when(mockVariantRepo.getHaplogroupVariantIds(anyInt())) - .thenReturn(Future.successful(Seq.empty)) - when(mockVariantV2Repository.searchByName(anyString())) - .thenReturn(Future.successful(Seq.empty)) - when(mockVariantV2Repository.searchByNames(any[Seq[String]])) - .thenReturn(Future.successful(Map.empty[String, Seq[VariantV2]])) - when(mockHaplogroupRepo.getAllRelationships(HaplogroupType.Y)) - .thenReturn(Future.successful(Seq((2, 1)))) // Child(2) -> Root(1) - - when(mockHaplogroupRevisionMetadataRepo.addNextRelationshipRevisionMetadata( - anyInt(), anyString(), any(), anyString(), anyString() - )).thenReturn(Future.successful( - RelationshipRevisionMetadata( - haplogroup_relationship_id = 1, - revisionId = 1, - author = "test", - timestamp = LocalDateTime.now(), - comment = "comment", - changeType = "update", - previousRevisionId = None - ) - )) - - // Source Tree: Root -> NewParent -> Child - val sourceTree = createPhyloNode( - name = "Root", - variants = List("RootVar"), - children = List( - createPhyloNode( - name = "NewParent", - variants = List("NewParentVar"), - children = List( - createPhyloNode("Child", variants = List("ChildVar")) - ) - ) - ) - ) - - val request = TreeMergeRequest( - haplogroupType = HaplogroupType.Y, - sourceTree = sourceTree, - sourceName = "HighPrioritySource", - priorityConfig = Some(SourcePriorityConfig(List("HighPrioritySource", "LowPrioritySource"))), - dryRun = false - ) - - whenReady(service.mergeFullTree(request)) { result => - result.success mustBe true - // Verify Child (ID 2) was reparented to NewParent (ID 3) - // May be called multiple times due to SUBTREE_LOOK_AHEAD and NODE_CONTRACTION mechanisms - verify(mockHaplogroupRepo, org.mockito.Mockito.atLeast(1)).updateParent(2, 3, "HighPrioritySource") - } - } - } -} diff --git a/test/services/HaplogroupTreeServiceSpec.scala b/test/services/HaplogroupTreeServiceSpec.scala deleted file mode 100644 index 341ed027..00000000 --- a/test/services/HaplogroupTreeServiceSpec.scala +++ /dev/null @@ -1,266 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.HaplogroupType -import models.api.* -import models.domain.genomics.{MutationType, NamingStatus, VariantV2} -import models.domain.haplogroups.Haplogroup -import org.mockito.ArgumentMatchers.{any, anyInt, anyString} -import org.mockito.Mockito.{never, reset, verify, when} -import play.api.libs.json.Json -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository} - -import java.time.{Instant, LocalDateTime, ZoneId} -import scala.concurrent.Future - -class HaplogroupTreeServiceSpec extends ServiceSpec { - - val mockCoreRepo: HaplogroupCoreRepository = mock[HaplogroupCoreRepository] - val mockVariantRepo: HaplogroupVariantRepository = mock[HaplogroupVariantRepository] - - val service = new HaplogroupTreeService(mockCoreRepo, mockVariantRepo) - - override def beforeEach(): Unit = { - reset(mockCoreRepo, mockVariantRepo) - } - - val now: LocalDateTime = LocalDateTime.of(2025, 1, 1, 0, 0) - - def makeHaplogroup(id: Int, name: String, source: String = "backbone"): Haplogroup = Haplogroup( - id = Some(id), name = name, lineage = Some(s"root>$name"), - description = None, haplogroupType = HaplogroupType.Y, - revisionId = 1, source = source, confidenceLevel = "high", - validFrom = now, validUntil = None, - formedYbp = Some(5000), tmrcaYbp = Some(4500) - ) - - val rootHg: Haplogroup = makeHaplogroup(1, "R") - val childHg: Haplogroup = makeHaplogroup(2, "R-M269") - val grandchildHg: Haplogroup = makeHaplogroup(3, "R-L151", source = "community") - - def makeVariant(id: Int, name: String): VariantV2 = VariantV2( - variantId = Some(id), - canonicalName = Some(name), - mutationType = MutationType.SNP, - namingStatus = NamingStatus.Unnamed, - aliases = Json.obj("rs_ids" -> Seq("rs12345"), "common_names" -> Seq(name)), - coordinates = Json.obj( - "GRCh38" -> Json.obj("contig" -> "chrY", "position" -> 1000, "ref" -> "A", "alt" -> "G") - ) - ) - - val testVariant: VariantV2 = makeVariant(100, "M269") - - "HaplogroupTreeService" should { - - "buildTreeResponse with ApiRoute includes variants" in { - when(mockCoreRepo.getHaplogroupByName("R", HaplogroupType.Y)).thenReturn(Future.successful(Some(rootHg))) - when(mockCoreRepo.getAncestors(1)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getHaplogroupVariants(1)).thenReturn(Future.successful(Seq(testVariant))) - when(mockCoreRepo.getDirectChildren(1)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeResponse("R", HaplogroupType.Y, ApiRoute)) { tree => - tree.name mustBe "R" - tree.crumbs mustBe empty - tree.subclade mustBe defined - tree.subclade.get.variants must have size 1 - tree.subclade.get.variants.head.name mustBe "M269" - } - } - - "buildTreeResponse with FragmentRoute excludes variants" in { - when(mockCoreRepo.getHaplogroupByName("R", HaplogroupType.Y)).thenReturn(Future.successful(Some(rootHg))) - when(mockCoreRepo.getAncestors(1)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.countHaplogroupVariants(1)).thenReturn(Future.successful(3)) - when(mockCoreRepo.getDirectChildren(1)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeResponse("R", HaplogroupType.Y, FragmentRoute)) { tree => - tree.subclade.get.variants mustBe empty - tree.subclade.get.variantCount mustBe Some(3) - } - } - - "buildTreeResponse includes ancestor breadcrumbs" in { - when(mockCoreRepo.getHaplogroupByName("R-M269", HaplogroupType.Y)).thenReturn(Future.successful(Some(childHg))) - when(mockCoreRepo.getAncestors(2)).thenReturn(Future.successful(Seq(rootHg))) - when(mockVariantRepo.getHaplogroupVariants(2)).thenReturn(Future.successful(Seq.empty)) - when(mockCoreRepo.getDirectChildren(2)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeResponse("R-M269", HaplogroupType.Y, ApiRoute)) { tree => - tree.crumbs must have size 1 - tree.crumbs.head.label mustBe "R" - } - } - - "buildTreeResponse recursively builds children" in { - when(mockCoreRepo.getHaplogroupByName("R", HaplogroupType.Y)).thenReturn(Future.successful(Some(rootHg))) - when(mockCoreRepo.getAncestors(1)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getHaplogroupVariants(1)).thenReturn(Future.successful(Seq.empty)) - when(mockCoreRepo.getDirectChildren(1)).thenReturn(Future.successful(Seq(childHg))) - // child level - when(mockVariantRepo.getHaplogroupVariants(2)).thenReturn(Future.successful(Seq(testVariant))) - when(mockCoreRepo.getDirectChildren(2)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeResponse("R", HaplogroupType.Y, ApiRoute)) { tree => - tree.subclade.get.children must have size 1 - tree.subclade.get.children.head.name mustBe "R-M269" - tree.subclade.get.children.head.variants must have size 1 - } - } - - "buildTreeResponse resolves variant query to haplogroup name" in { - // First lookup by name fails, second lookup (after resolution) succeeds - when(mockCoreRepo.getHaplogroupByName("M269", HaplogroupType.Y)).thenReturn(Future.successful(None)) - when(mockCoreRepo.getHaplogroupByName("R-M269", HaplogroupType.Y)).thenReturn(Future.successful(Some(childHg))) - // Variant search succeeds (normalizeVariantId returns original for non-rs/non-chr:pos) - when(mockVariantRepo.findVariants("M269")).thenReturn(Future.successful(Seq(testVariant))) - when(mockVariantRepo.findHaplogroupsByDefiningVariant("100", HaplogroupType.Y)) - .thenReturn(Future.successful(Seq(childHg))) - // Then builds tree for resolved name - when(mockCoreRepo.getAncestors(2)).thenReturn(Future.successful(Seq(rootHg))) - when(mockVariantRepo.getHaplogroupVariants(2)).thenReturn(Future.successful(Seq.empty)) - when(mockCoreRepo.getDirectChildren(2)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeResponse("M269", HaplogroupType.Y, ApiRoute)) { tree => - tree.name mustBe "R-M269" - } - } - - "buildTreeResponse fails when haplogroup and variant not found" in { - when(mockCoreRepo.getHaplogroupByName("UNKNOWN", HaplogroupType.Y)).thenReturn(Future.successful(None)) - // normalizeVariantId lowercases the query - when(mockVariantRepo.findVariants(anyString)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeResponse("UNKNOWN", HaplogroupType.Y, ApiRoute).failed) { ex => - ex mustBe a[IllegalArgumentException] - ex.getMessage must include("not found") - } - } - - "buildTreeFromVariant returns tree for defining haplogroup" in { - when(mockVariantRepo.findHaplogroupsByDefiningVariant("M269", HaplogroupType.Y)) - .thenReturn(Future.successful(Seq(childHg))) - when(mockCoreRepo.getHaplogroupByName("R-M269", HaplogroupType.Y)).thenReturn(Future.successful(Some(childHg))) - when(mockCoreRepo.getAncestors(2)).thenReturn(Future.successful(Seq(rootHg))) - when(mockVariantRepo.getHaplogroupVariants(2)).thenReturn(Future.successful(Seq.empty)) - when(mockCoreRepo.getDirectChildren(2)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeFromVariant("M269", HaplogroupType.Y, ApiRoute)) { result => - result mustBe defined - result.get.name mustBe "R-M269" - } - } - - "buildTreeFromVariant returns None when no haplogroups found" in { - when(mockVariantRepo.findHaplogroupsByDefiningVariant("FAKE", HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeFromVariant("FAKE", HaplogroupType.Y, ApiRoute)) { result => - result mustBe None - } - } - - "buildTreesFromVariant returns trees for all matching haplogroups" in { - when(mockVariantRepo.findHaplogroupsByDefiningVariant("M269", HaplogroupType.Y)) - .thenReturn(Future.successful(Seq(childHg, grandchildHg))) - // For childHg tree - when(mockCoreRepo.getHaplogroupByName("R-M269", HaplogroupType.Y)).thenReturn(Future.successful(Some(childHg))) - when(mockCoreRepo.getAncestors(2)).thenReturn(Future.successful(Seq(rootHg))) - when(mockVariantRepo.getHaplogroupVariants(2)).thenReturn(Future.successful(Seq.empty)) - when(mockCoreRepo.getDirectChildren(2)).thenReturn(Future.successful(Seq.empty)) - // For grandchildHg tree - when(mockCoreRepo.getHaplogroupByName("R-L151", HaplogroupType.Y)).thenReturn(Future.successful(Some(grandchildHg))) - when(mockCoreRepo.getAncestors(3)).thenReturn(Future.successful(Seq(rootHg, childHg))) - when(mockVariantRepo.getHaplogroupVariants(3)).thenReturn(Future.successful(Seq.empty)) - when(mockCoreRepo.getDirectChildren(3)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreesFromVariant("M269", HaplogroupType.Y, ApiRoute)) { trees => - trees must have size 2 - } - } - - "findHaplogroupWithVariants returns haplogroup and variants" in { - when(mockCoreRepo.getHaplogroupByName("R-M269", HaplogroupType.Y)).thenReturn(Future.successful(Some(childHg))) - when(mockVariantRepo.getHaplogroupVariants(2)).thenReturn(Future.successful(Seq(testVariant))) - - whenReady(service.findHaplogroupWithVariants("R-M269", HaplogroupType.Y)) { case (hgOpt, variants) => - hgOpt mustBe defined - hgOpt.get.name mustBe "R-M269" - variants must have size 1 - } - } - - "findHaplogroupWithVariants returns None when haplogroup not found" in { - when(mockCoreRepo.getHaplogroupByName("NOPE", HaplogroupType.Y)).thenReturn(Future.successful(None)) - when(mockVariantRepo.getHaplogroupVariants(0)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.findHaplogroupWithVariants("NOPE", HaplogroupType.Y)) { case (hgOpt, variants) => - hgOpt mustBe None - variants mustBe empty - } - } - - "mapApiResponse flattens tree to subclade sequence" in { - val zonedNow = now.atZone(ZoneId.systemDefault()) - val child = TreeNodeDTO("R-M269", Seq.empty, List.empty, zonedNow) - val root = TreeNodeDTO("R", Seq.empty, List(child), zonedNow, isBackbone = true) - - val result = service.mapApiResponse(Some(root)) - result must have size 2 - result.head.name mustBe "R" - result.head.parentName mustBe None - result(1).name mustBe "R-M269" - result(1).parentName mustBe Some("R") - } - - "mapApiResponse returns empty for None input" in { - val result = service.mapApiResponse(None) - result mustBe empty - } - - "extractCoordinates maps JSONB to GenomicCoordinate map" in { - when(mockCoreRepo.getHaplogroupByName("R", HaplogroupType.Y)).thenReturn(Future.successful(Some(rootHg))) - when(mockCoreRepo.getAncestors(1)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getHaplogroupVariants(1)).thenReturn(Future.successful(Seq(testVariant))) - when(mockCoreRepo.getDirectChildren(1)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeResponse("R", HaplogroupType.Y, ApiRoute)) { tree => - val variant = tree.subclade.get.variants.head - variant.coordinates must contain key "chrY [b38]" - val coord = variant.coordinates("chrY [b38]") - coord.start mustBe 1000 - coord.anc mustBe "A" - coord.der mustBe "G" - } - } - - "extractAliases maps JSONB to alias map" in { - when(mockCoreRepo.getHaplogroupByName("R", HaplogroupType.Y)).thenReturn(Future.successful(Some(rootHg))) - when(mockCoreRepo.getAncestors(1)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getHaplogroupVariants(1)).thenReturn(Future.successful(Seq(testVariant))) - when(mockCoreRepo.getDirectChildren(1)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeResponse("R", HaplogroupType.Y, ApiRoute)) { tree => - val variant = tree.subclade.get.variants.head - variant.aliases must contain key "rsId" - variant.aliases("rsId") must contain("rs12345") - variant.aliases must contain key "commonName" - variant.aliases("commonName") must contain("M269") - } - } - - "isBackbone flag is set correctly based on haplogroup source" in { - when(mockCoreRepo.getHaplogroupByName("R", HaplogroupType.Y)).thenReturn(Future.successful(Some(rootHg))) - when(mockCoreRepo.getAncestors(1)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getHaplogroupVariants(1)).thenReturn(Future.successful(Seq.empty)) - when(mockCoreRepo.getDirectChildren(1)).thenReturn(Future.successful(Seq(grandchildHg))) - when(mockVariantRepo.getHaplogroupVariants(3)).thenReturn(Future.successful(Seq.empty)) - when(mockCoreRepo.getDirectChildren(3)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.buildTreeResponse("R", HaplogroupType.Y, ApiRoute)) { tree => - tree.subclade.get.isBackbone mustBe true // rootHg source = "backbone" - tree.subclade.get.children.head.isBackbone mustBe false // grandchildHg source = "community" - } - } - } -} diff --git a/test/services/InstrumentProposalServiceSpec.scala b/test/services/InstrumentProposalServiceSpec.scala deleted file mode 100644 index 722dae52..00000000 --- a/test/services/InstrumentProposalServiceSpec.scala +++ /dev/null @@ -1,314 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.api.genomics.AssociateLabWithInstrumentResponse -import models.domain.genomics.* -import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.* - -import java.time.LocalDateTime -import scala.concurrent.Future - -class InstrumentProposalServiceSpec extends ServiceSpec { - - val mockObservationRepo: InstrumentObservationRepository = mock[InstrumentObservationRepository] - val mockProposalRepo: InstrumentProposalRepository = mock[InstrumentProposalRepository] - val mockInstrumentRepo: SequencerInstrumentRepository = mock[SequencerInstrumentRepository] - val mockLabRepo: SequencingLabRepository = mock[SequencingLabRepository] - - val service = new InstrumentProposalService( - mockObservationRepo, mockProposalRepo, mockInstrumentRepo, mockLabRepo - ) - - override def beforeEach(): Unit = { - reset(mockObservationRepo, mockProposalRepo, mockInstrumentRepo, mockLabRepo) - } - - def makeObservation( - instrumentId: String = "A00123", - labName: String = "Dante Labs", - biosampleRef: String = s"at://did:plc:citizen1/us.decoding.biosample/1", - confidence: ObservationConfidence = ObservationConfidence.Known, - createdAt: LocalDateTime = LocalDateTime.now() - ): InstrumentObservation = - InstrumentObservation( - id = Some(1), - atUri = s"at://did:plc:citizen1/us.decoding.instrument.observation/${System.nanoTime()}", - instrumentId = instrumentId, - labName = labName, - biosampleRef = biosampleRef, - confidence = confidence, - createdAt = createdAt - ) - - "InstrumentProposalService" should { - - "return None when fewer than min observations" in { - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(Seq(makeObservation()))) - - whenReady(service.aggregateObservations("A00123")) { result => - result mustBe None - } - } - - "aggregate observations for a single lab" in { - val obs = (1 to 5).map(i => - makeObservation(biosampleRef = s"at://did:plc:citizen$i/us.decoding.biosample/1") - ) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(obs)) - - whenReady(service.aggregateObservations("A00123")) { result => - result mustBe defined - val agg = result.get - agg.dominantLabName mustBe "Dante Labs" - agg.observationCount mustBe 5 - agg.distinctCitizenCount mustBe 5 - agg.conflict mustBe None - } - } - - "detect conflicts when multiple labs claim same instrument" in { - val danteObs = (1 to 3).map(i => - makeObservation(labName = "Dante Labs", biosampleRef = s"at://did:plc:citizen$i/us.decoding.biosample/1") - ) - val nebulaObs = (4 to 5).map(i => - makeObservation(labName = "Nebula Genomics", biosampleRef = s"at://did:plc:citizen$i/us.decoding.biosample/1") - ) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(danteObs ++ nebulaObs)) - - whenReady(service.aggregateObservations("A00123")) { result => - result mustBe defined - val agg = result.get - agg.dominantLabName mustBe "Dante Labs" - agg.conflict mustBe defined - agg.conflict.get.dominantRatio mustBe 0.6 +- 0.01 - agg.conflict.get.proposals must have size 2 - } - } - - "calculate confidence score correctly" in { - val obs = (1 to 10).map(i => - makeObservation( - biosampleRef = s"at://did:plc:citizen$i/us.decoding.biosample/1", - confidence = ObservationConfidence.Known, - createdAt = LocalDateTime.now() - ) - ) - - val result = service.buildAggregation("A00123", obs) - - // 10 obs / 10 threshold = 1.0 * 0.4 = 0.4 - // 10 citizens / 3 min = capped 1.0 * 0.3 = 0.3 - // Recent = 1.0 * 0.2 = 0.2 - // Known = 1.0 * 0.1 = 0.1 - // Total = 1.0 - result.confidenceScore mustBe 1.0 +- 0.01 - } - - "weight GUESSED confidence lower" in { - val obs = (1 to 4).map(i => - makeObservation( - biosampleRef = s"at://did:plc:citizen$i/us.decoding.biosample/1", - confidence = ObservationConfidence.Guessed, - createdAt = LocalDateTime.now() - ) - ) - - val result = service.buildAggregation("A00123", obs) - - // 4/10 * 0.4 = 0.16 - // 4/3 capped 1.0 * 0.3 = 0.3 - // Recent 1.0 * 0.2 = 0.2 - // Guessed 0.3 * 0.1 = 0.03 - // Total = 0.69 - result.confidenceScore mustBe 0.69 +- 0.01 - } - - "calculate recency score as 1.0 for recent observations" in { - val recentObs = Seq(makeObservation(createdAt = LocalDateTime.now())) - val score = service.calculateRecencyScore(recentObs) - score mustBe 1.0 - } - - "calculate recency score as decayed for old observations" in { - val oldObs = Seq(makeObservation(createdAt = LocalDateTime.now().minusDays(60))) - val score = service.calculateRecencyScore(oldObs) - score must be < 1.0 - score must be > 0.0 - } - - "calculate average confidence level" in { - val obs = Seq( - makeObservation(confidence = ObservationConfidence.Known), - makeObservation(confidence = ObservationConfidence.Inferred), - makeObservation(confidence = ObservationConfidence.Guessed) - ) - val avg = service.calculateAvgConfidenceLevel(obs) - // (1.0 + 0.7 + 0.3) / 3 = 0.667 - avg mustBe 0.667 +- 0.01 - } - - "create new proposal when none exists" in { - val obs = (1 to 3).map(i => - makeObservation(biosampleRef = s"at://did:plc:citizen$i/us.decoding.biosample/1") - ) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(obs)) - when(mockProposalRepo.findActiveByInstrumentId("A00123")) - .thenReturn(Future.successful(None)) - when(mockProposalRepo.create(any[InstrumentAssociationProposal])) - .thenAnswer { invocation => - val p = invocation.getArgument[InstrumentAssociationProposal](0) - Future.successful(p.copy(id = Some(1))) - } - - whenReady(service.createOrUpdateProposal("A00123")) { result => - result mustBe defined - result.get.proposedLabName mustBe "Dante Labs" - result.get.status mustBe ProposalStatus.Pending - verify(mockProposalRepo).create(any[InstrumentAssociationProposal]) - } - } - - "create proposal with READY_FOR_REVIEW when threshold met" in { - val obs = (1 to 5).map(i => - makeObservation(biosampleRef = s"at://did:plc:citizen$i/us.decoding.biosample/1") - ) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(obs)) - when(mockProposalRepo.findActiveByInstrumentId("A00123")) - .thenReturn(Future.successful(None)) - when(mockProposalRepo.create(any[InstrumentAssociationProposal])) - .thenAnswer { invocation => - val p = invocation.getArgument[InstrumentAssociationProposal](0) - Future.successful(p.copy(id = Some(1))) - } - - whenReady(service.createOrUpdateProposal("A00123")) { result => - result mustBe defined - result.get.status mustBe ProposalStatus.ReadyForReview - } - } - - "update existing proposal when one exists" in { - val obs = (1 to 4).map(i => - makeObservation(biosampleRef = s"at://did:plc:citizen$i/us.decoding.biosample/1") - ) - val existingProposal = InstrumentAssociationProposal( - id = Some(1), instrumentId = "A00123", proposedLabName = "Dante Labs", - observationCount = 2, distinctCitizenCount = 2, status = ProposalStatus.Pending - ) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(obs)) - when(mockProposalRepo.findActiveByInstrumentId("A00123")) - .thenReturn(Future.successful(Some(existingProposal))) - when(mockProposalRepo.update(any[InstrumentAssociationProposal])) - .thenReturn(Future.successful(true)) - - whenReady(service.createOrUpdateProposal("A00123")) { result => - result mustBe defined - result.get.observationCount mustBe 4 - verify(mockProposalRepo).update(any[InstrumentAssociationProposal]) - verify(mockProposalRepo, never()).create(any[InstrumentAssociationProposal]) - } - } - - "not change status of UNDER_REVIEW proposals" in { - val result = service.evaluateThreshold( - AggregationResult("A00123", "Dante Labs", 10, 5, 0.9, None, None, None, None, None), - ProposalStatus.UnderReview - ) - result mustBe ProposalStatus.UnderReview - } - - "accept a proposal and create lab association" in { - val proposal = InstrumentAssociationProposal( - id = Some(1), instrumentId = "A00123", proposedLabName = "Dante Labs", - status = ProposalStatus.ReadyForReview - ) - when(mockProposalRepo.findById(1)).thenReturn(Future.successful(Some(proposal))) - when(mockInstrumentRepo.associateLabWithInstrument("A00123", "Dante Labs", None, None)) - .thenReturn(Future.successful(AssociateLabWithInstrumentResponse( - instrumentId = "A00123", labId = 10, labName = "Dante Labs", - manufacturer = None, model = None, isNewLab = false, message = "ok" - ))) - when(mockProposalRepo.update(any[InstrumentAssociationProposal])) - .thenReturn(Future.successful(true)) - - whenReady(service.acceptProposal(1, "curator@test.com", "Dante Labs", None, None, Some("Confirmed"))) { result => - result mustBe a[Right[?, ?]] - val accepted = result.toOption.get - accepted.status mustBe ProposalStatus.Accepted - accepted.reviewedBy mustBe Some("curator@test.com") - accepted.acceptedLabId mustBe Some(10) - } - } - - "reject accepting an already-accepted proposal" in { - val proposal = InstrumentAssociationProposal( - id = Some(1), instrumentId = "A00123", proposedLabName = "Dante Labs", - status = ProposalStatus.Accepted - ) - when(mockProposalRepo.findById(1)).thenReturn(Future.successful(Some(proposal))) - - whenReady(service.acceptProposal(1, "curator@test.com", "Dante Labs", None, None, None)) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("already accepted") - } - } - - "reject a proposal" in { - val proposal = InstrumentAssociationProposal( - id = Some(1), instrumentId = "A00123", proposedLabName = "Dante Labs", - status = ProposalStatus.ReadyForReview - ) - when(mockProposalRepo.findById(1)).thenReturn(Future.successful(Some(proposal))) - when(mockProposalRepo.update(any[InstrumentAssociationProposal])) - .thenReturn(Future.successful(true)) - - whenReady(service.rejectProposal(1, "curator@test.com", "Insufficient evidence")) { result => - result mustBe a[Right[?, ?]] - val rejected = result.toOption.get - rejected.status mustBe ProposalStatus.Rejected - rejected.reviewNotes mustBe Some("Insufficient evidence") - } - } - - "return error when rejecting nonexistent proposal" in { - when(mockProposalRepo.findById(99)).thenReturn(Future.successful(None)) - - whenReady(service.rejectProposal(99, "curator@test.com", "No reason")) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("not found") - } - } - - "detect conflicts across pending proposals" in { - val proposal = InstrumentAssociationProposal( - id = Some(1), instrumentId = "A00123", proposedLabName = "Dante Labs", - status = ProposalStatus.Pending - ) - val danteObs = (1 to 3).map(i => - makeObservation(labName = "Dante Labs", biosampleRef = s"at://did:plc:citizen$i/us.decoding.biosample/1") - ) - val nebulaObs = (4 to 5).map(i => - makeObservation(labName = "Nebula Genomics", biosampleRef = s"at://did:plc:citizen$i/us.decoding.biosample/1") - ) - - when(mockProposalRepo.findPending()).thenReturn(Future.successful(Seq(proposal))) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(danteObs ++ nebulaObs)) - - whenReady(service.detectConflicts()) { conflicts => - conflicts must have size 1 - conflicts.head.instrumentId mustBe "A00123" - conflicts.head.dominantLabName mustBe "Dante Labs" - conflicts.head.dominantRatio mustBe 0.6 +- 0.01 - } - } - } -} diff --git a/test/services/MemberVisibilityServiceSpec.scala b/test/services/MemberVisibilityServiceSpec.scala deleted file mode 100644 index c135d437..00000000 --- a/test/services/MemberVisibilityServiceSpec.scala +++ /dev/null @@ -1,406 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.* -import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito.{reset, when} -import repositories.{GroupProjectMemberRepository, GroupProjectRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class MemberVisibilityServiceSpec extends ServiceSpec { - - val mockProjectRepo: GroupProjectRepository = mock[GroupProjectRepository] - val mockMemberRepo: GroupProjectMemberRepository = mock[GroupProjectMemberRepository] - - val service = new MemberVisibilityService(mockProjectRepo, mockMemberRepo) - - override def beforeEach(): Unit = { - reset(mockProjectRepo, mockMemberRepo) - } - - val project: GroupProject = GroupProject( - id = Some(1), projectName = "Test Project", projectType = "HAPLOGROUP", - joinPolicy = "APPROVAL_REQUIRED", ownerDid = "did:plc:admin1", - memberListVisibility = "MEMBERS_ONLY", strPolicy = "DISTANCE_ONLY", - snpPolicy = "TERMINAL_ONLY", publicTreeView = false - ) - - val fullVisProject: GroupProject = project.copy( - snpPolicy = "WITH_PRIVATE_VARIANTS", strPolicy = "MEMBERS_ONLY_RAW", - memberListVisibility = "PUBLIC", publicTreeView = true - ) - - val hiddenSnpProject: GroupProject = project.copy(snpPolicy = "HIDDEN", strPolicy = "HIDDEN") - - val activeMember: GroupProjectMember = GroupProjectMember( - id = Some(10), groupProjectId = 1, citizenDid = "did:plc:member1", - role = "MEMBER", status = "ACTIVE", - displayName = Some("John"), - kitId = Some("KIT-123"), - visibility = MemberVisibility( - showInMemberList = true, showInTree = true, - shareTerminalHaplogroup = true, shareFullLineagePath = true, - sharePrivateVariants = true, ancestorVisibility = "FULL", - strVisibility = "FULL_TO_MEMBERS", allowDirectContact = true, - showDisplayName = true - ), - joinedAt = Some(LocalDateTime.now()) - ) - - val restrictiveMember: GroupProjectMember = activeMember.copy( - id = Some(11), citizenDid = "did:plc:member2", - visibility = MemberVisibility( - showInMemberList = false, showInTree = false, - shareTerminalHaplogroup = false, shareFullLineagePath = false, - sharePrivateVariants = false, ancestorVisibility = "NONE", - strVisibility = "NONE", allowDirectContact = false, - showDisplayName = false - ) - ) - - val adminMember: GroupProjectMember = GroupProjectMember( - id = Some(1), groupProjectId = 1, citizenDid = "did:plc:admin1", - role = "ADMIN", status = "ACTIVE", joinedAt = Some(LocalDateTime.now()) - ) - - val fullAncestor: AncestorData = AncestorData( - name = Some("Johann Mueller"), surname = Some("Mueller"), - birthYear = Some(1745), birthCentury = Some("18th century"), - birthDecade = Some("1740s"), birthCountry = Some("Germany"), - birthRegion = Some("Bavaria"), birthPlace = Some("Munich"), - additionalInfo = Some("Farmer in Munich") - ) - - // --- EffectiveVisibility.compute tests --- - - "EffectiveVisibility.compute" should { - - "respect more restrictive of project and member (project restricts SNP)" in { - val effective = EffectiveVisibility.compute(project, activeMember.visibility) - effective.shareTerminalHaplogroup mustBe true // TERMINAL_ONLY allows terminal - effective.shareFullLineagePath mustBe false // TERMINAL_ONLY blocks full path - effective.sharePrivateVariants mustBe false // TERMINAL_ONLY blocks private - } - - "allow everything when project is permissive and member opts in" in { - val effective = EffectiveVisibility.compute(fullVisProject, activeMember.visibility) - effective.shareTerminalHaplogroup mustBe true - effective.shareFullLineagePath mustBe true - effective.sharePrivateVariants mustBe true - effective.showInMemberList mustBe true - } - - "restrict everything when member opts out" in { - val effective = EffectiveVisibility.compute(fullVisProject, restrictiveMember.visibility) - effective.shareTerminalHaplogroup mustBe false - effective.shareFullLineagePath mustBe false - effective.sharePrivateVariants mustBe false - effective.showInMemberList mustBe false - effective.showInTree mustBe false - effective.allowDirectContact mustBe false - effective.showDisplayName mustBe false - } - - "restrict everything when project hides SNP/STR" in { - val effective = EffectiveVisibility.compute(hiddenSnpProject, activeMember.visibility) - effective.shareTerminalHaplogroup mustBe false - effective.strVisibility mustBe "NONE" - } - - "use more restrictive STR level between project and member" in { - // project allows DISTANCE_ONLY, member wants FULL_TO_MEMBERS → DISTANCE_CALCULATION_ONLY - val effective = EffectiveVisibility.compute(project, activeMember.visibility) - effective.strVisibility mustBe "DISTANCE_CALCULATION_ONLY" - } - - "use more restrictive ancestor level" in { - val memberWithRegion = activeMember.visibility.copy(ancestorVisibility = "REGION_ONLY") - val effective = EffectiveVisibility.compute(project, memberWithRegion) - effective.ancestorVisibility mustBe "REGION_ONLY" - } - } - - // --- AncestorData.filter tests --- - - "AncestorData.filter" should { - - "return empty for NONE" in { - val filtered = AncestorData.filter(fullAncestor, "NONE") - filtered.name mustBe None - filtered.surname mustBe None - filtered.birthCountry mustBe None - } - - "return only century for CENTURY_ONLY" in { - val filtered = AncestorData.filter(fullAncestor, "CENTURY_ONLY") - filtered.birthCentury mustBe Some("18th century") - filtered.name mustBe None - filtered.surname mustBe None - filtered.birthCountry mustBe None - } - - "return country and region for REGION_ONLY" in { - val filtered = AncestorData.filter(fullAncestor, "REGION_ONLY") - filtered.birthCountry mustBe Some("Germany") - filtered.birthRegion mustBe Some("Bavaria") - filtered.name mustBe None - filtered.birthCentury mustBe None - } - - "return only country for COUNTRY_ONLY" in { - val filtered = AncestorData.filter(fullAncestor, "COUNTRY_ONLY") - filtered.birthCountry mustBe Some("Germany") - filtered.birthRegion mustBe None - } - - "return surname and century for SURNAME_ONLY" in { - val filtered = AncestorData.filter(fullAncestor, "SURNAME_ONLY") - filtered.surname mustBe Some("Mueller") - filtered.birthCentury mustBe Some("18th century") - filtered.name mustBe None - filtered.birthCountry mustBe None - } - - "return everything for FULL" in { - val filtered = AncestorData.filter(fullAncestor, "FULL") - filtered mustBe fullAncestor - } - } - - // --- MemberVisibilityService.updateVisibility tests --- - - "MemberVisibilityService.updateVisibility" should { - - "allow member to update own visibility" in { - when(mockMemberRepo.findById(10)).thenReturn(Future.successful(Some(activeMember))) - when(mockMemberRepo.update(any[GroupProjectMember])).thenReturn(Future.successful(true)) - - val newVis = MemberVisibility(showInTree = false) - whenReady(service.updateVisibility(10, "did:plc:member1", newVis)) { result => - result mustBe a[Right[?, ?]] - result.toOption.get.visibility.showInTree mustBe false - } - } - - "reject update from different user" in { - when(mockMemberRepo.findById(10)).thenReturn(Future.successful(Some(activeMember))) - - val newVis = MemberVisibility(showInTree = false) - whenReady(service.updateVisibility(10, "did:plc:other", newVis)) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Only the member") - } - } - - "reject invalid ancestor visibility" in { - when(mockMemberRepo.findById(10)).thenReturn(Future.successful(Some(activeMember))) - - val badVis = MemberVisibility(ancestorVisibility = "EVERYTHING") - whenReady(service.updateVisibility(10, "did:plc:member1", badVis)) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Invalid ancestor visibility") - } - } - - "reject invalid STR visibility" in { - when(mockMemberRepo.findById(10)).thenReturn(Future.successful(Some(activeMember))) - - val badVis = MemberVisibility(strVisibility = "RAW_PUBLIC") - whenReady(service.updateVisibility(10, "did:plc:member1", badVis)) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("Invalid STR visibility") - } - } - - "reject update for inactive membership" in { - val suspended = activeMember.copy(status = "SUSPENDED") - when(mockMemberRepo.findById(10)).thenReturn(Future.successful(Some(suspended))) - - whenReady(service.updateVisibility(10, "did:plc:member1", MemberVisibility())) { result => - result mustBe a[Left[?, ?]] - result.left.toOption.get must include("active") - } - } - } - - // --- MemberVisibilityService.getEffectiveVisibility tests --- - - "MemberVisibilityService.getEffectiveVisibility" should { - - "return computed effective visibility" in { - when(mockMemberRepo.findById(10)).thenReturn(Future.successful(Some(activeMember))) - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(project))) - - whenReady(service.getEffectiveVisibility(10)) { result => - result mustBe defined - val eff = result.get - eff.shareTerminalHaplogroup mustBe true - eff.shareFullLineagePath mustBe false // project is TERMINAL_ONLY - } - } - - "return None for unknown member" in { - when(mockMemberRepo.findById(99)).thenReturn(Future.successful(None)) - - whenReady(service.getEffectiveVisibility(99)) { _ mustBe None } - } - } - - // --- MemberVisibilityService.getFilteredMemberView tests --- - - "MemberVisibilityService.getFilteredMemberView" should { - - "filter haplogroup data based on effective visibility" in { - when(mockMemberRepo.findById(10)).thenReturn(Future.successful(Some(activeMember))) - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(project))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:viewer")) - .thenReturn(Future.successful(Some(adminMember.copy(citizenDid = "did:plc:viewer", role = "MEMBER")))) - - whenReady(service.getFilteredMemberView( - 10, "did:plc:viewer", - haplogroup = Some("R-CTS4466"), - lineagePath = Some(Seq("R", "R1b", "R-M269", "R-CTS4466")), - privateVariantCount = Some(5), - ancestor = fullAncestor - )) { result => - result mustBe defined - val view = result.get - view.terminalHaplogroup mustBe Some("R-CTS4466") // allowed by TERMINAL_ONLY - view.lineagePath mustBe None // blocked by TERMINAL_ONLY - view.privateVariantCount mustBe None // blocked by TERMINAL_ONLY - view.displayName mustBe Some("John") - view.ancestor.name mustBe Some("Johann Mueller") // member allows FULL - } - } - - "show all data when project and member both allow" in { - when(mockMemberRepo.findById(10)).thenReturn(Future.successful(Some(activeMember))) - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(fullVisProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:viewer")) - .thenReturn(Future.successful(Some(adminMember.copy(citizenDid = "did:plc:viewer", role = "MEMBER")))) - - whenReady(service.getFilteredMemberView( - 10, "did:plc:viewer", - haplogroup = Some("R-CTS4466"), - lineagePath = Some(Seq("R", "R1b", "R-CTS4466")), - privateVariantCount = Some(5) - )) { result => - val view = result.get - view.terminalHaplogroup mustBe Some("R-CTS4466") - view.lineagePath mustBe Some(Seq("R", "R1b", "R-CTS4466")) - view.privateVariantCount mustBe Some(5) - } - } - - "hide everything for restrictive member" in { - when(mockMemberRepo.findById(11)).thenReturn(Future.successful(Some(restrictiveMember))) - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(fullVisProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:viewer")) - .thenReturn(Future.successful(Some(adminMember.copy(citizenDid = "did:plc:viewer", role = "MEMBER")))) - - whenReady(service.getFilteredMemberView( - 11, "did:plc:viewer", - haplogroup = Some("R-CTS4466"), - ancestor = fullAncestor - )) { result => - val view = result.get - view.terminalHaplogroup mustBe None - view.displayName mustBe None - view.ancestor.name mustBe None - view.ancestor.birthCountry mustBe None - view.allowDirectContact mustBe false - } - } - - "only allow direct contact from members" in { - when(mockMemberRepo.findById(10)).thenReturn(Future.successful(Some(activeMember))) - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(project))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:outsider")) - .thenReturn(Future.successful(None)) - - whenReady(service.getFilteredMemberView(10, "did:plc:outsider")) { result => - result.get.allowDirectContact mustBe false - } - } - } - - // --- MemberVisibilityService.getFilteredMembersForProject tests --- - - "MemberVisibilityService.getFilteredMembersForProject" should { - - "filter out hidden members for non-admins" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(project))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:viewer")) - .thenReturn(Future.successful(Some(adminMember.copy(citizenDid = "did:plc:viewer", role = "MEMBER")))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(activeMember, restrictiveMember))) - - val data = Map( - 10 -> MemberSupplementalData(haplogroup = Some("R-CTS4466")), - 11 -> MemberSupplementalData(haplogroup = Some("R-L21")) - ) - - whenReady(service.getFilteredMembersForProject(1, "did:plc:viewer", data)) { result => - result mustBe a[Right[?, ?]] - val views = result.toOption.get - views.size mustBe 1 // restrictiveMember hidden from member list - views.head.terminalHaplogroup mustBe Some("R-CTS4466") - } - } - - "show hidden members to admin" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(project))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:admin1")) - .thenReturn(Future.successful(Some(adminMember))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(activeMember, restrictiveMember))) - - whenReady(service.getFilteredMembersForProject(1, "did:plc:admin1", Map.empty)) { result => - result.toOption.get.size mustBe 2 - } - } - - "deny non-member access to MEMBERS_ONLY list" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(project))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:outsider")) - .thenReturn(Future.successful(None)) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(activeMember))) - - whenReady(service.getFilteredMembersForProject(1, "did:plc:outsider", Map.empty)) { result => - result mustBe a[Left[?, ?]] - } - } - } - - // --- MemberVisibility helpers --- - - "MemberVisibility.moreRestrictiveAncestor" should { - - "pick NONE over FULL" in { - MemberVisibility.moreRestrictiveAncestor("NONE", "FULL") mustBe "NONE" - } - - "pick CENTURY_ONLY over SURNAME_ONLY" in { - MemberVisibility.moreRestrictiveAncestor("CENTURY_ONLY", "SURNAME_ONLY") mustBe "CENTURY_ONLY" - } - - "pick same when equal" in { - MemberVisibility.moreRestrictiveAncestor("REGION_ONLY", "REGION_ONLY") mustBe "REGION_ONLY" - } - } - - "MemberVisibility.moreRestrictiveStr" should { - - "pick NONE over FULL_PUBLIC" in { - MemberVisibility.moreRestrictiveStr("NONE", "FULL_PUBLIC") mustBe "NONE" - } - - "pick DISTANCE_CALCULATION_ONLY over MODAL_COMPARISON_ONLY" in { - MemberVisibility.moreRestrictiveStr("DISTANCE_CALCULATION_ONLY", "MODAL_COMPARISON_ONLY") mustBe "DISTANCE_CALCULATION_ONLY" - } - } -} diff --git a/test/services/PDSRegistrationServiceSpec.scala b/test/services/PDSRegistrationServiceSpec.scala deleted file mode 100644 index 861a93d7..00000000 --- a/test/services/PDSRegistrationServiceSpec.scala +++ /dev/null @@ -1,259 +0,0 @@ -package services - -import models.PDSRegistration -import org.mockito.ArgumentMatchers.{any, anyString} -import org.mockito.Mockito.{never, verify, when} -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer -import org.scalatestplus.mockito.MockitoSugar -import org.scalatest.concurrent.ScalaFutures -import org.scalatestplus.play.PlaySpec -import repositories.PDSRegistrationRepository - -import java.time.ZonedDateTime -import scala.concurrent.{ExecutionContext, Future} - -class PDSRegistrationServiceSpec extends PlaySpec with MockitoSugar with ScalaFutures { - - implicit val ec: ExecutionContext = ExecutionContext.global - - def createMocks(): (ATProtocolClient, PDSRegistrationRepository) = { - (mock[ATProtocolClient], mock[PDSRegistrationRepository]) - } - - "PDSRegistrationService" should { - - "register a new PDS successfully" in { - val (atClient, repo) = createMocks() - - val did = "did:plc:test123" - val handle = "user.bsky.social" - val pdsUrl = "https://pds.example.com" - val rToken = "auth-token" - - // PDS not already registered - when(repo.findByDid(did)) - .thenReturn(Future.successful(None)) - - // AT Protocol verification succeeds - when(atClient.getLatestCommit(pdsUrl, did, rToken)) - .thenReturn(Future.successful(Some(LatestCommitResponse( - cid = "bafyreib123", - rev = "rev-001", - seq = 42L - )))) - - // Repository create succeeds - when(repo.create(any[PDSRegistration])) - .thenAnswer(new Answer[Future[PDSRegistration]] { - override def answer(invocation: InvocationOnMock): Future[PDSRegistration] = { - Future.successful(invocation.getArgument[PDSRegistration](0)) - } - }) - - val service = new PDSRegistrationService(atClient, repo) - - whenReady(service.registerPDS(did, handle, pdsUrl, rToken)) { result => - result mustBe a[Right[_, _]] - val registration = result.toOption.get - registration.did mustBe did - registration.pdsUrl mustBe pdsUrl - registration.handle mustBe handle - registration.lastCommitCid mustBe Some("bafyreib123") - registration.lastCommitSeq mustBe Some(42L) - - verify(repo).create(any[PDSRegistration]) - } - } - - "fail registration when PDS already registered" in { - val (atClient, repo) = createMocks() - - val did = "did:plc:existing" - val existingRegistration = PDSRegistration( - did = did, - pdsUrl = "https://existing.pds.com", - handle = "existing.user", - lastCommitCid = Some("abc"), - lastCommitSeq = Some(10L), - cursor = 0L, - createdAt = ZonedDateTime.now(), - updatedAt = ZonedDateTime.now() - ) - - when(repo.findByDid(did)) - .thenReturn(Future.successful(Some(existingRegistration))) - - val service = new PDSRegistrationService(atClient, repo) - - whenReady(service.registerPDS(did, "handle", "https://pds.com", "token")) { result => - result mustBe a[Left[_, _]] - result.left.getOrElse("") must include("already registered") - - verify(atClient, never).getLatestCommit(anyString(), anyString(), anyString()) - verify(repo, never).create(any[PDSRegistration]) - } - } - - "fail registration when AT Protocol verification fails" in { - val (atClient, repo) = createMocks() - - val did = "did:plc:unverifiable" - val pdsUrl = "https://unreachable.pds.com" - - when(repo.findByDid(did)) - .thenReturn(Future.successful(None)) - - // AT Protocol verification fails - when(atClient.getLatestCommit(pdsUrl, did, "token")) - .thenReturn(Future.successful(None)) - - val service = new PDSRegistrationService(atClient, repo) - - whenReady(service.registerPDS(did, "handle", pdsUrl, "token")) { result => - result mustBe a[Left[_, _]] - result.left.getOrElse("") must include("Failed to verify") - - verify(repo, never).create(any[PDSRegistration]) - } - } - - "retrieve PDS by DID" in { - val (atClient, repo) = createMocks() - - val did = "did:plc:test123" - val registration = PDSRegistration( - did = did, - pdsUrl = "https://pds.example.com", - handle = "user.test", - lastCommitCid = Some("cid"), - lastCommitSeq = Some(100L), - cursor = 50L, - createdAt = ZonedDateTime.now(), - updatedAt = ZonedDateTime.now() - ) - - when(repo.findByDid(did)) - .thenReturn(Future.successful(Some(registration))) - - val service = new PDSRegistrationService(atClient, repo) - - whenReady(service.getPDSByDid(did)) { result => - result mustBe defined - result.get.did mustBe did - result.get.handle mustBe "user.test" - } - } - - "retrieve PDS by handle" in { - val (atClient, repo) = createMocks() - - val handle = "user.bsky.social" - val registration = PDSRegistration( - did = "did:plc:abc", - pdsUrl = "https://pds.example.com", - handle = handle, - lastCommitCid = None, - lastCommitSeq = None, - cursor = 0L, - createdAt = ZonedDateTime.now(), - updatedAt = ZonedDateTime.now() - ) - - when(repo.findByHandle(handle)) - .thenReturn(Future.successful(Some(registration))) - - val service = new PDSRegistrationService(atClient, repo) - - whenReady(service.getPDSByHandle(handle)) { result => - result mustBe defined - result.get.handle mustBe handle - } - } - - "list all registered PDS entries" in { - val (atClient, repo) = createMocks() - - val registrations = Seq( - PDSRegistration("did:1", "https://pds1.com", "user1", None, None, 0L, ZonedDateTime.now(), ZonedDateTime.now()), - PDSRegistration("did:2", "https://pds2.com", "user2", None, None, 0L, ZonedDateTime.now(), ZonedDateTime.now()) - ) - - when(repo.listAll) - .thenReturn(Future.successful(registrations)) - - val service = new PDSRegistrationService(atClient, repo) - - whenReady(service.listAllPDS()) { result => - result must have size 2 - result.map(_.did) must contain allOf("did:1", "did:2") - } - } - - "update PDS cursor successfully" in { - val (atClient, repo) = createMocks() - - val did = "did:plc:test" - val newCid = "newCid123" - val newCursor = 200L - - when(repo.updateCursor(did, newCid, newCursor)) - .thenReturn(Future.successful(1)) - - val service = new PDSRegistrationService(atClient, repo) - - whenReady(service.updatePDSCursor(did, newCid, newCursor)) { result => - result mustBe Right(()) - verify(repo).updateCursor(did, newCid, newCursor) - } - } - - "fail cursor update when PDS not found" in { - val (atClient, repo) = createMocks() - - val did = "did:plc:nonexistent" - - when(repo.updateCursor(did, "cid", 100L)) - .thenReturn(Future.successful(0)) // No rows affected - - val service = new PDSRegistrationService(atClient, repo) - - whenReady(service.updatePDSCursor(did, "cid", 100L)) { result => - result mustBe a[Left[_, _]] - result.left.getOrElse("") must include("not found") - } - } - - "delete PDS registration successfully" in { - val (atClient, repo) = createMocks() - - val did = "did:plc:todelete" - - when(repo.delete(did)) - .thenReturn(Future.successful(1)) - - val service = new PDSRegistrationService(atClient, repo) - - whenReady(service.deletePDS(did)) { result => - result mustBe Right(()) - verify(repo).delete(did) - } - } - - "fail deletion when PDS not found" in { - val (atClient, repo) = createMocks() - - val did = "did:plc:nonexistent" - - when(repo.delete(did)) - .thenReturn(Future.successful(0)) - - val service = new PDSRegistrationService(atClient, repo) - - whenReady(service.deletePDS(did)) { result => - result mustBe a[Left[_, _]] - result.left.getOrElse("") must include("not found") - } - } - } -} diff --git a/test/services/PatronageServiceSpec.scala b/test/services/PatronageServiceSpec.scala deleted file mode 100644 index bc6d3ba9..00000000 --- a/test/services/PatronageServiceSpec.scala +++ /dev/null @@ -1,269 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.billing.PatronSubscription -import org.mockito.ArgumentMatchers.{any, eq as meq} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.PatronSubscriptionRepository - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class PatronageServiceSpec extends ServiceSpec { - - val mockRepo: PatronSubscriptionRepository = mock[PatronSubscriptionRepository] - val service = new PatronageService(mockRepo) - - val userId: UUID = UUID.randomUUID() - val now: LocalDateTime = LocalDateTime.now() - - val testSubscription: PatronSubscription = PatronSubscription( - id = Some(1), - userId = userId, - patronTier = "SUPPORTER", - status = "ACTIVE", - paymentProvider = "STRIPE", - providerSubscriptionId = Some("sub_123"), - providerCustomerId = Some("cus_456"), - amountCents = 200, - billingInterval = "MONTHLY", - currentPeriodStart = Some(now), - currentPeriodEnd = Some(now.plusMonths(1)) - ) - - override def beforeEach(): Unit = { - reset(mockRepo) - } - - "PatronageService.createSubscription" should { - - "create a monthly supporter subscription" in { - when(mockRepo.findActiveByUserId(userId)).thenReturn(Future.successful(None)) - when(mockRepo.create(any[PatronSubscription])) - .thenAnswer(inv => Future.successful(inv.getArgument[PatronSubscription](0).copy(id = Some(1)))) - - val result = service.createSubscription(userId, "SUPPORTER", "MONTHLY", "STRIPE").futureValue - result.isRight mustBe true - result.toOption.get.amountCents mustBe 200 - result.toOption.get.patronTier mustBe "SUPPORTER" - } - - "create a yearly sustainer subscription" in { - when(mockRepo.findActiveByUserId(userId)).thenReturn(Future.successful(None)) - when(mockRepo.create(any[PatronSubscription])) - .thenAnswer(inv => Future.successful(inv.getArgument[PatronSubscription](0).copy(id = Some(2)))) - - val result = service.createSubscription(userId, "SUSTAINER", "YEARLY", "STRIPE").futureValue - result.isRight mustBe true - result.toOption.get.amountCents mustBe 10000 - } - - "reject invalid tier" in { - val result = service.createSubscription(userId, "INVALID", "MONTHLY", "STRIPE").futureValue - result mustBe Left("Invalid patron tier: INVALID") - verify(mockRepo, never()).findActiveByUserId(any()) - } - - "reject invalid billing interval" in { - val result = service.createSubscription(userId, "SUPPORTER", "WEEKLY", "STRIPE").futureValue - result mustBe Left("Invalid billing interval: WEEKLY") - } - - "reject invalid payment provider" in { - val result = service.createSubscription(userId, "SUPPORTER", "MONTHLY", "BITCOIN").futureValue - result mustBe Left("Invalid payment provider: BITCOIN") - } - - "reject when user already has active subscription" in { - when(mockRepo.findActiveByUserId(userId)).thenReturn(Future.successful(Some(testSubscription))) - - val result = service.createSubscription(userId, "CONTRIBUTOR", "MONTHLY", "STRIPE").futureValue - result.isLeft mustBe true - result.swap.toOption.get must include("already has an active subscription") - } - - "set correct period end for monthly" in { - when(mockRepo.findActiveByUserId(userId)).thenReturn(Future.successful(None)) - when(mockRepo.create(any[PatronSubscription])) - .thenAnswer(inv => Future.successful(inv.getArgument[PatronSubscription](0).copy(id = Some(3)))) - - val result = service.createSubscription(userId, "SUPPORTER", "MONTHLY", "STRIPE").futureValue - val sub = result.toOption.get - sub.currentPeriodStart mustBe defined - sub.currentPeriodEnd mustBe defined - } - } - - "PatronageService.cancelSubscription" should { - - "cancel an active subscription" in { - when(mockRepo.findById(1)).thenReturn(Future.successful(Some(testSubscription))) - when(mockRepo.cancel(1)).thenReturn(Future.successful(true)) - - val result = service.cancelSubscription(1, userId).futureValue - result mustBe Right(true) - } - - "reject cancellation of non-existent subscription" in { - when(mockRepo.findById(999)).thenReturn(Future.successful(None)) - - val result = service.cancelSubscription(999, userId).futureValue - result mustBe Left("Subscription not found") - } - - "reject cancellation by wrong user" in { - val otherUserId = UUID.randomUUID() - when(mockRepo.findById(1)).thenReturn(Future.successful(Some(testSubscription))) - - val result = service.cancelSubscription(1, otherUserId).futureValue - result mustBe Left("Not authorized to cancel this subscription") - } - - "reject cancellation of already cancelled subscription" in { - val cancelled = testSubscription.copy(status = "CANCELLED") - when(mockRepo.findById(1)).thenReturn(Future.successful(Some(cancelled))) - - val result = service.cancelSubscription(1, userId).futureValue - result.isLeft mustBe true - result.swap.toOption.get must include("Cannot cancel") - } - } - - "PatronageService.handlePaymentWebhook" should { - - "handle subscription renewal" in { - when(mockRepo.findByProviderSubscriptionId("STRIPE", "sub_123")) - .thenReturn(Future.successful(Some(testSubscription))) - when(mockRepo.updateStatus(1, "ACTIVE")).thenReturn(Future.successful(true)) - when(mockRepo.updatePeriod(meq(1), any[LocalDateTime], any[LocalDateTime])) - .thenReturn(Future.successful(true)) - - val event = WebhookEvent( - eventType = "subscription.renewed", - providerSubscriptionId = "sub_123", - periodStart = Some(now), - periodEnd = Some(now.plusMonths(1)) - ) - - val result = service.handlePaymentWebhook(event, "STRIPE").futureValue - result mustBe Right(true) - } - - "handle subscription cancellation" in { - when(mockRepo.findByProviderSubscriptionId("STRIPE", "sub_123")) - .thenReturn(Future.successful(Some(testSubscription))) - when(mockRepo.cancel(1)).thenReturn(Future.successful(true)) - - val event = WebhookEvent( - eventType = "subscription.cancelled", - providerSubscriptionId = "sub_123" - ) - - val result = service.handlePaymentWebhook(event, "STRIPE").futureValue - result mustBe Right(true) - } - - "handle payment failure" in { - when(mockRepo.findByProviderSubscriptionId("STRIPE", "sub_123")) - .thenReturn(Future.successful(Some(testSubscription))) - when(mockRepo.updateStatus(1, "PAST_DUE")).thenReturn(Future.successful(true)) - - val event = WebhookEvent( - eventType = "invoice.payment_failed", - providerSubscriptionId = "sub_123" - ) - - val result = service.handlePaymentWebhook(event, "STRIPE").futureValue - result mustBe Right(true) - } - - "return error for unknown subscription" in { - when(mockRepo.findByProviderSubscriptionId("STRIPE", "sub_unknown")) - .thenReturn(Future.successful(None)) - - val event = WebhookEvent( - eventType = "subscription.renewed", - providerSubscriptionId = "sub_unknown" - ) - - val result = service.handlePaymentWebhook(event, "STRIPE").futureValue - result mustBe Left("Subscription not found") - } - } - - "PatronageService.expireOverdueSubscriptions" should { - - "expire subscriptions past their period end" in { - val expired = testSubscription.copy( - currentPeriodEnd = Some(now.minusDays(1)) - ) - when(mockRepo.findByStatus("ACTIVE")).thenReturn(Future.successful(Seq(expired))) - when(mockRepo.updateStatus(1, "EXPIRED")).thenReturn(Future.successful(true)) - - val result = service.expireOverdueSubscriptions().futureValue - result mustBe 1 - } - - "not expire subscriptions within their period" in { - val current = testSubscription.copy( - currentPeriodEnd = Some(now.plusDays(15)) - ) - when(mockRepo.findByStatus("ACTIVE")).thenReturn(Future.successful(Seq(current))) - - val result = service.expireOverdueSubscriptions().futureValue - result mustBe 0 - } - } - - "PatronageService.getPatronSummary" should { - - "compute summary with active patrons" in { - when(mockRepo.countActive()).thenReturn(Future.successful(10)) - when(mockRepo.countByTier()).thenReturn(Future.successful( - Map("SUPPORTER" -> 5, "CONTRIBUTOR" -> 3, "SUSTAINER" -> 2) - )) - - val result = service.getPatronSummary.futureValue - result.activePatrons mustBe 10 - result.tierCounts("SUPPORTER") mustBe 5 - // Monthly revenue: 5*200 + 3*500 + 2*1000 = 1000 + 1500 + 2000 = 4500 - result.monthlyRevenueCents mustBe 4500 - } - - "handle empty patron base" in { - when(mockRepo.countActive()).thenReturn(Future.successful(0)) - when(mockRepo.countByTier()).thenReturn(Future.successful(Map.empty)) - - val result = service.getPatronSummary.futureValue - result.activePatrons mustBe 0 - result.monthlyRevenueCents mustBe 0 - } - } - - "PatronageService.isPatron" should { - - "return true for active patron" in { - when(mockRepo.findActiveByUserId(userId)).thenReturn(Future.successful(Some(testSubscription))) - service.isPatron(userId).futureValue mustBe true - } - - "return false for non-patron" in { - when(mockRepo.findActiveByUserId(userId)).thenReturn(Future.successful(None)) - service.isPatron(userId).futureValue mustBe false - } - } - - "PatronageService.getPatronTier" should { - - "return tier for active patron" in { - when(mockRepo.findActiveByUserId(userId)).thenReturn(Future.successful(Some(testSubscription))) - service.getPatronTier(userId).futureValue mustBe Some("SUPPORTER") - } - - "return None for non-patron" in { - when(mockRepo.findActiveByUserId(userId)).thenReturn(Future.successful(None)) - service.getPatronTier(userId).futureValue mustBe None - } - } -} diff --git a/test/services/PdsFleetServiceSpec.scala b/test/services/PdsFleetServiceSpec.scala deleted file mode 100644 index 97a438a1..00000000 --- a/test/services/PdsFleetServiceSpec.scala +++ /dev/null @@ -1,301 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.pds.* -import org.mockito.ArgumentMatchers.{any, eq as meq} -import org.mockito.Mockito.{never, reset, verify, when} -import play.api.libs.json.Json -import repositories.{PdsFleetConfigRepository, PdsHeartbeatLogRepository, PdsNodeRepository} - -import java.time.LocalDateTime -import scala.concurrent.Future - -class PdsFleetServiceSpec extends ServiceSpec { - - val mockNodeRepo: PdsNodeRepository = mock[PdsNodeRepository] - val mockHeartbeatRepo: PdsHeartbeatLogRepository = mock[PdsHeartbeatLogRepository] - val mockConfigRepo: PdsFleetConfigRepository = mock[PdsFleetConfigRepository] - - val service = new PdsFleetService(mockNodeRepo, mockHeartbeatRepo, mockConfigRepo) - - override def beforeEach(): Unit = { - reset(mockNodeRepo, mockHeartbeatRepo, mockConfigRepo) - } - - val now: LocalDateTime = LocalDateTime.now() - - val testNode: PdsNode = PdsNode( - id = Some(1), - did = "did:plc:user1", - pdsUrl = "https://pds.user1.example.com", - handle = Some("user1.example.com"), - nodeName = Some("User1 PDS"), - softwareVersion = Some("0.1.0"), - status = "ONLINE", - capabilities = Json.obj("haplogroup_analysis" -> true, "str_analysis" -> false), - lastHeartbeat = Some(now), - createdAt = now, - updatedAt = now - ) - - val testNode2: PdsNode = PdsNode( - id = Some(2), - did = "did:plc:user2", - pdsUrl = "https://pds.user2.example.com", - softwareVersion = Some("0.0.9"), - status = "OFFLINE", - createdAt = now, - updatedAt = now - ) - - val targetVersionConfig: PdsFleetConfig = PdsFleetConfig( - id = Some(1), configKey = "target_software_version", configValue = "0.1.0" - ) - - val offlineThresholdConfig: PdsFleetConfig = PdsFleetConfig( - id = Some(3), configKey = "offline_threshold_seconds", configValue = "900" - ) - - "PdsFleetService.processHeartbeat" should { - - "register new node on first heartbeat" in { - val request = HeartbeatRequest( - did = "did:plc:newuser", - pdsUrl = "https://pds.newuser.example.com", - handle = Some("newuser.example.com"), - softwareVersion = Some("0.1.0"), - status = "ONLINE" - ) - - when(mockNodeRepo.findByDid("did:plc:newuser")).thenReturn(Future.successful(None)) - when(mockNodeRepo.create(any[PdsNode])).thenReturn( - Future.successful(PdsNode(id = Some(10), did = "did:plc:newuser", - pdsUrl = "https://pds.newuser.example.com", status = "ONLINE"))) - when(mockHeartbeatRepo.create(any[PdsHeartbeatLog])) - .thenReturn(Future.successful(PdsHeartbeatLog(id = Some(1), pdsNodeId = 10, status = "ONLINE"))) - - val result = service.processHeartbeat(request).futureValue - result.isRight mustBe true - result.toOption.get.did mustBe "did:plc:newuser" - - verify(mockNodeRepo).create(any[PdsNode]) - verify(mockHeartbeatRepo).create(any[PdsHeartbeatLog]) - } - - "update existing node on subsequent heartbeat" in { - val request = HeartbeatRequest( - did = "did:plc:user1", - pdsUrl = "https://pds.user1.example.com", - softwareVersion = Some("0.1.1"), - status = "ONLINE", - lastCommitCid = Some("bafyabc123") - ) - - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - when(mockNodeRepo.updateHeartbeat(meq(1), meq("ONLINE"), meq(Some("0.1.1")), - meq(Some("bafyabc123")), any[Option[String]])).thenReturn(Future.successful(true)) - when(mockNodeRepo.update(any[PdsNode])).thenReturn(Future.successful(true)) - when(mockNodeRepo.findById(1)).thenReturn(Future.successful(Some(testNode.copy(softwareVersion = Some("0.1.1"))))) - when(mockHeartbeatRepo.create(any[PdsHeartbeatLog])) - .thenReturn(Future.successful(PdsHeartbeatLog(id = Some(2), pdsNodeId = 1, status = "ONLINE"))) - - val result = service.processHeartbeat(request).futureValue - result.isRight mustBe true - - verify(mockNodeRepo).updateHeartbeat(meq(1), meq("ONLINE"), meq(Some("0.1.1")), - meq(Some("bafyabc123")), any[Option[String]]) - verify(mockNodeRepo, never()).create(any[PdsNode]) - } - - "reject invalid status" in { - val request = HeartbeatRequest( - did = "did:plc:user1", pdsUrl = "https://example.com", status = "INVALID" - ) - - val result = service.processHeartbeat(request).futureValue - result mustBe Left("Invalid status: INVALID") - } - - "record error heartbeat with message" in { - val request = HeartbeatRequest( - did = "did:plc:user1", - pdsUrl = "https://pds.user1.example.com", - status = "ERROR", - errorMessage = Some("Disk full") - ) - - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - when(mockNodeRepo.updateHeartbeat(meq(1), meq("ERROR"), any(), any(), any())) - .thenReturn(Future.successful(true)) - when(mockNodeRepo.update(any[PdsNode])).thenReturn(Future.successful(true)) - when(mockNodeRepo.findById(1)).thenReturn(Future.successful(Some(testNode.copy(status = "ERROR")))) - when(mockHeartbeatRepo.create(any[PdsHeartbeatLog])) - .thenReturn(Future.successful(PdsHeartbeatLog(id = Some(3), pdsNodeId = 1, status = "ERROR", - errorMessage = Some("Disk full")))) - - val result = service.processHeartbeat(request).futureValue - result.isRight mustBe true - result.toOption.get.status mustBe "ERROR" - } - } - - "PdsFleetService.getFleetSummary" should { - - "compute fleet summary with status counts and version compliance" in { - when(mockNodeRepo.countByStatus()).thenReturn(Future.successful( - Map("ONLINE" -> 5, "OFFLINE" -> 2, "BUSY" -> 1, "ERROR" -> 1) - )) - when(mockConfigRepo.findByKey("target_software_version")) - .thenReturn(Future.successful(Some(targetVersionConfig))) - when(mockNodeRepo.findAll()).thenReturn(Future.successful(Seq( - testNode.copy(softwareVersion = Some("0.1.0")), - testNode2.copy(softwareVersion = Some("0.0.9")), - testNode.copy(id = Some(3), did = "did:plc:user3", softwareVersion = Some("0.1.0")), - testNode.copy(id = Some(4), did = "did:plc:user4", softwareVersion = Some("0.1.0")), - testNode.copy(id = Some(5), did = "did:plc:user5", softwareVersion = Some("0.1.0")), - testNode.copy(id = Some(6), did = "did:plc:user6", softwareVersion = Some("0.0.9")), - testNode.copy(id = Some(7), did = "did:plc:user7", softwareVersion = Some("0.1.0")), - testNode.copy(id = Some(8), did = "did:plc:user8", softwareVersion = Some("0.1.0")), - testNode.copy(id = Some(9), did = "did:plc:user9", softwareVersion = Some("0.1.0")) - ))) - - val summary = service.getFleetSummary.futureValue - summary.totalNodes mustBe 9 - summary.onlineNodes mustBe 5 - summary.offlineNodes mustBe 2 - summary.busyNodes mustBe 1 - summary.errorNodes mustBe 1 - summary.targetVersion mustBe Some("0.1.0") - summary.nodesOnTargetVersion mustBe 7 - summary.nodesOutdated mustBe 2 - } - - "handle empty fleet" in { - when(mockNodeRepo.countByStatus()).thenReturn(Future.successful(Map.empty)) - when(mockConfigRepo.findByKey("target_software_version")).thenReturn(Future.successful(None)) - when(mockNodeRepo.findAll()).thenReturn(Future.successful(Seq.empty)) - - val summary = service.getFleetSummary.futureValue - summary.totalNodes mustBe 0 - summary.onlineNodes mustBe 0 - summary.targetVersion mustBe None - } - } - - "PdsFleetService.markStaleNodesOffline" should { - - "mark nodes without recent heartbeat as offline" in { - val staleNode = testNode.copy( - id = Some(5), status = "ONLINE", - lastHeartbeat = Some(now.minusMinutes(20)) - ) - - when(mockConfigRepo.findByKey("offline_threshold_seconds")) - .thenReturn(Future.successful(Some(offlineThresholdConfig))) - when(mockNodeRepo.findStaleNodes(any[LocalDateTime])).thenReturn(Future.successful(Seq(staleNode))) - when(mockNodeRepo.updateStatus(5, "OFFLINE")).thenReturn(Future.successful(true)) - - val count = service.markStaleNodesOffline().futureValue - count mustBe 1 - verify(mockNodeRepo).updateStatus(5, "OFFLINE") - } - - "use default threshold when config is missing" in { - when(mockConfigRepo.findByKey("offline_threshold_seconds")) - .thenReturn(Future.successful(None)) - when(mockNodeRepo.findStaleNodes(any[LocalDateTime])).thenReturn(Future.successful(Seq.empty)) - - val count = service.markStaleNodesOffline().futureValue - count mustBe 0 - } - } - - "PdsFleetService.listNodes" should { - - "list all nodes without filter" in { - when(mockNodeRepo.findAll()).thenReturn(Future.successful(Seq(testNode, testNode2))) - - val result = service.listNodes().futureValue - result.size mustBe 2 - } - - "filter nodes by status" in { - when(mockNodeRepo.findByStatus("ONLINE")).thenReturn(Future.successful(Seq(testNode))) - - val result = service.listNodes(Some("ONLINE")).futureValue - result.size mustBe 1 - result.head.status mustBe "ONLINE" - } - } - - "PdsFleetService.getNode" should { - - "return node by DID" in { - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - - val result = service.getNode("did:plc:user1").futureValue - result mustBe defined - result.get.pdsUrl mustBe "https://pds.user1.example.com" - } - - "return None for unknown DID" in { - when(mockNodeRepo.findByDid("did:plc:unknown")).thenReturn(Future.successful(None)) - - val result = service.getNode("did:plc:unknown").futureValue - result mustBe None - } - } - - "PdsFleetService.updateFleetConfig" should { - - "update existing config" in { - when(mockConfigRepo.upsert("target_software_version", "0.2.0", Some("curator1"))) - .thenReturn(Future.successful(true)) - - val result = service.updateFleetConfig("target_software_version", "0.2.0", Some("curator1")).futureValue - result mustBe Right(true) - } - } - - "PdsFleetService.removeNode" should { - - "remove existing node" in { - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - when(mockNodeRepo.delete(1)).thenReturn(Future.successful(true)) - - val result = service.removeNode("did:plc:user1").futureValue - result mustBe Right(true) - } - - "return error for unknown node" in { - when(mockNodeRepo.findByDid("did:plc:unknown")).thenReturn(Future.successful(None)) - - val result = service.removeNode("did:plc:unknown").futureValue - result mustBe Left("Node not found") - } - } - - "PdsFleetService.getNodeHeartbeatHistory" should { - - "return heartbeat history for a node" in { - val logs = Seq( - PdsHeartbeatLog(id = Some(1), pdsNodeId = 1, status = "ONLINE", recordedAt = now), - PdsHeartbeatLog(id = Some(2), pdsNodeId = 1, status = "BUSY", recordedAt = now.minusMinutes(5)) - ) - when(mockHeartbeatRepo.findByNode(1, 100)).thenReturn(Future.successful(logs)) - - val result = service.getNodeHeartbeatHistory(1).futureValue - result.size mustBe 2 - } - } - - "PdsFleetService.pruneHeartbeatLogs" should { - - "delete old heartbeat logs" in { - when(mockHeartbeatRepo.deleteOlderThan(any[LocalDateTime])).thenReturn(Future.successful(150)) - - val result = service.pruneHeartbeatLogs(30).futureValue - result mustBe 150 - } - } -} diff --git a/test/services/PdsSignatureVerifierSpec.scala b/test/services/PdsSignatureVerifierSpec.scala deleted file mode 100644 index 82d005c5..00000000 --- a/test/services/PdsSignatureVerifierSpec.scala +++ /dev/null @@ -1,302 +0,0 @@ -package services - -import helpers.ServiceSpec -import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito.{reset, when} -import play.api.{Configuration, Logging} -import play.api.libs.json.Json -import play.api.mvc.AnyContentAsJson -import play.api.test.FakeRequest -import utils.Base58 - -import java.nio.charset.StandardCharsets -import java.security.* -import java.time.Instant -import java.util.Base64 -import scala.concurrent.Future - -class PdsSignatureVerifierSpec extends ServiceSpec with Logging { - - val mockAtProtoClient: ATProtocolClient = mock[ATProtocolClient] - val config: Configuration = Configuration.from(Map( - "pds.auth.timestamp.window.seconds" -> 300 - )) - - val verifier = new PdsSignatureVerifier(mockAtProtoClient, config) - - override def beforeEach(): Unit = { - reset(mockAtProtoClient) - } - - // Generate a test Ed25519 key pair - private def generateEd25519KeyPair(): KeyPair = { - val kpg = KeyPairGenerator.getInstance("Ed25519") - kpg.generateKeyPair() - } - - // Generate a test P-256 key pair - private def generateP256KeyPair(): KeyPair = { - val kpg = KeyPairGenerator.getInstance("EC") - kpg.initialize(256) - kpg.generateKeyPair() - } - - // Sign data with a private key - private def sign(privateKey: PrivateKey, data: Array[Byte]): Array[Byte] = { - val algorithm = privateKey.getAlgorithm match { - case "Ed25519" | "EdDSA" => "Ed25519" - case "EC" => "SHA256withECDSA" - } - val sig = Signature.getInstance(algorithm) - sig.initSign(privateKey) - sig.update(data) - sig.sign() - } - - // Extract raw Ed25519 public key bytes and encode as multibase - private def ed25519ToMultibase(publicKey: PublicKey): String = { - val encoded = publicKey.getEncoded - // X.509 DER encoded Ed25519 public key: 12-byte prefix + 32-byte key - val rawKey = encoded.drop(12) - val multicodecKey = Array(0xed.toByte, 0x01.toByte) ++ rawKey - "z" + Base58.encode(multicodecKey) - } - - // Build a mock DidDocument with a verification method - private def mockDidDocument(did: String, multibaseKey: String): DidDocument = { - DidDocument( - id = did, - verificationMethod = Some(Seq( - DidVerificationMethod( - id = s"$did#atproto", - `type` = "Multikey", - controller = did, - publicKeyMultibase = Some(multibaseKey) - ) - )), - service = Some(Seq( - DidService( - id = "#atproto_pds", - `type` = "AtprotoPersonalDataServer", - serviceEndpoint = "https://pds.example.com" - ) - )) - ) - } - - "PdsSignatureVerifier.isTimestampValid" should { - - "accept a current timestamp" in { - verifier.isTimestampValid(Instant.now().toString) mustBe true - } - - "accept a timestamp within the window" in { - val twoMinutesAgo = Instant.now().minusSeconds(120).toString - verifier.isTimestampValid(twoMinutesAgo) mustBe true - } - - "reject an expired timestamp" in { - val tenMinutesAgo = Instant.now().minusSeconds(600).toString - verifier.isTimestampValid(tenMinutesAgo) mustBe false - } - - "reject a future timestamp beyond window" in { - val tenMinutesAhead = Instant.now().plusSeconds(600).toString - verifier.isTimestampValid(tenMinutesAhead) mustBe false - } - - "reject an invalid timestamp string" in { - verifier.isTimestampValid("not-a-timestamp") mustBe false - } - } - - "PdsSignatureVerifier.checkAndRecordNonce" should { - - "accept a new nonce" in { - verifier.checkAndRecordNonce("unique-nonce-1") mustBe true - } - - "reject a duplicate nonce" in { - val nonce = "duplicate-nonce-test" - verifier.checkAndRecordNonce(nonce) mustBe true - verifier.checkAndRecordNonce(nonce) mustBe false - } - } - - "PdsSignatureVerifier.buildSigningInput" should { - - "build signing input without nonce" in { - val result = verifier.buildSigningInput("POST", "/api/heartbeat", "2026-01-01T00:00:00Z", "abc123", None) - result mustBe "POST\n/api/heartbeat\n2026-01-01T00:00:00Z\nabc123" - } - - "build signing input with nonce" in { - val result = verifier.buildSigningInput("POST", "/api/heartbeat", "2026-01-01T00:00:00Z", "abc123", Some("nonce1")) - result mustBe "POST\n/api/heartbeat\n2026-01-01T00:00:00Z\nabc123\nnonce1" - } - - "uppercase the method" in { - val result = verifier.buildSigningInput("post", "/api/test", "ts", "hash", None) - result mustBe "POST\n/api/test\nts\nhash" - } - } - - "PdsSignatureVerifier.hashBody" should { - - "hash a JSON body" in { - val body = Json.obj("key" -> "value") - val request = FakeRequest("POST", "/test").withBody(body) - val hash = verifier.hashBody(request) - hash must not be empty - } - - "produce consistent hashes for same content" in { - val body = Json.obj("key" -> "value") - val req1 = FakeRequest("POST", "/test").withBody(body) - val req2 = FakeRequest("POST", "/test").withBody(body) - verifier.hashBody(req1) mustBe verifier.hashBody(req2) - } - - "produce different hashes for different content" in { - val req1 = FakeRequest("POST", "/test").withBody(Json.obj("a" -> 1)) - val req2 = FakeRequest("POST", "/test").withBody(Json.obj("b" -> 2)) - verifier.hashBody(req1) must not be verifier.hashBody(req2) - } - } - - "PdsSignatureVerifier.parseMultibaseKey" should { - - "parse an Ed25519 multibase key" in { - val kp = generateEd25519KeyPair() - val multibase = ed25519ToMultibase(kp.getPublic) - val parsed = verifier.parseMultibaseKey(multibase) - parsed.getAlgorithm must (equal("Ed25519") or equal("EdDSA")) - } - - "reject non-base58btc prefix" in { - an[IllegalArgumentException] must be thrownBy { - verifier.parseMultibaseKey("Minvalid") - } - } - - "reject unsupported multicodec prefix" in { - val fakeKey = Array(0xff.toByte, 0xff.toByte) ++ Array.fill(32)(0.toByte) - val multibase = "z" + Base58.encode(fakeKey) - an[IllegalArgumentException] must be thrownBy { - verifier.parseMultibaseKey(multibase) - } - } - } - - "PdsSignatureVerifier.verifySignature" should { - - "verify a valid Ed25519 signature" in { - val did = "did:plc:testuser1" - val kp = generateEd25519KeyPair() - val multibase = ed25519ToMultibase(kp.getPublic) - - when(mockAtProtoClient.resolveDid(did)) - .thenReturn(Future.successful(Some(mockDidDocument(did, multibase)))) - - val signingInput = "POST\n/api/heartbeat\n2026-01-01T00:00:00Z\nhash123" - val signature = sign(kp.getPrivate, signingInput.getBytes(StandardCharsets.UTF_8)) - val signatureBase64 = Base64.getEncoder.encodeToString(signature) - - val result = verifier.verifySignature(did, signingInput, signatureBase64).futureValue - result mustBe true - } - - "reject a signature with wrong data" in { - val did = "did:plc:testuser2" - val kp = generateEd25519KeyPair() - val multibase = ed25519ToMultibase(kp.getPublic) - - when(mockAtProtoClient.resolveDid(did)) - .thenReturn(Future.successful(Some(mockDidDocument(did, multibase)))) - - val signature = sign(kp.getPrivate, "correct-data".getBytes(StandardCharsets.UTF_8)) - val signatureBase64 = Base64.getEncoder.encodeToString(signature) - - val result = verifier.verifySignature(did, "wrong-data", signatureBase64).futureValue - result mustBe false - } - - "reject a signature from a different key" in { - val did = "did:plc:testuser3" - val kp1 = generateEd25519KeyPair() - val kp2 = generateEd25519KeyPair() - val multibase = ed25519ToMultibase(kp1.getPublic) - - when(mockAtProtoClient.resolveDid(did)) - .thenReturn(Future.successful(Some(mockDidDocument(did, multibase)))) - - val signingInput = "POST\n/api/test\nts\nhash" - val signature = sign(kp2.getPrivate, signingInput.getBytes(StandardCharsets.UTF_8)) - val signatureBase64 = Base64.getEncoder.encodeToString(signature) - - val result = verifier.verifySignature(did, signingInput, signatureBase64).futureValue - result mustBe false - } - - "return false when DID cannot be resolved" in { - when(mockAtProtoClient.resolveDid("did:plc:unknown")) - .thenReturn(Future.successful(None)) - - val result = verifier.verifySignature("did:plc:unknown", "data", "sig").futureValue - result mustBe false - } - - "return false when DID document has no verification method" in { - val did = "did:plc:nokeys" - val doc = DidDocument(id = did, verificationMethod = None) - - when(mockAtProtoClient.resolveDid(did)) - .thenReturn(Future.successful(Some(doc))) - - val result = verifier.verifySignature(did, "data", "sig").futureValue - result mustBe false - } - - "return false when verification method has no publicKeyMultibase" in { - val did = "did:plc:nokey" - val doc = DidDocument( - id = did, - verificationMethod = Some(Seq( - DidVerificationMethod(id = "#key", `type` = "Multikey", controller = did, publicKeyMultibase = None) - )) - ) - - when(mockAtProtoClient.resolveDid(did)) - .thenReturn(Future.successful(Some(doc))) - - val result = verifier.verifySignature(did, "data", "sig").futureValue - result mustBe false - } - } - - "PdsSignatureVerifier end-to-end" should { - - "verify a complete request signing flow" in { - val did = "did:plc:e2e-test" - val kp = generateEd25519KeyPair() - val multibase = ed25519ToMultibase(kp.getPublic) - - when(mockAtProtoClient.resolveDid(did)) - .thenReturn(Future.successful(Some(mockDidDocument(did, multibase)))) - - val body = Json.obj("status" -> "ONLINE", "softwareVersion" -> "0.1.0") - val request = FakeRequest("POST", "/api/pds/heartbeat").withBody(body) - val timestamp = Instant.now().toString - val nonce = java.util.UUID.randomUUID().toString - - val bodyHash = verifier.hashBody(request) - val signingInput = verifier.buildSigningInput("POST", "/api/pds/heartbeat", timestamp, bodyHash, Some(nonce)) - val signature = sign(kp.getPrivate, signingInput.getBytes(StandardCharsets.UTF_8)) - val signatureBase64 = Base64.getEncoder.encodeToString(signature) - - verifier.isTimestampValid(timestamp) mustBe true - verifier.checkAndRecordNonce(nonce) mustBe true - verifier.verifySignature(did, signingInput, signatureBase64).futureValue mustBe true - } - } -} diff --git a/test/services/PgpBiosampleServiceSpec.scala b/test/services/PgpBiosampleServiceSpec.scala deleted file mode 100644 index d7e99b57..00000000 --- a/test/services/PgpBiosampleServiceSpec.scala +++ /dev/null @@ -1,131 +0,0 @@ -package services - -import models.domain.genomics.{Biosample, BiosampleType, SpecimenDonor} -import org.mockito.ArgumentMatchers.{any, anyString} -import org.mockito.Mockito.{never, verify, when} -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer -import org.scalatestplus.mockito.MockitoSugar -import org.scalatest.concurrent.ScalaFutures -import org.scalatestplus.play.PlaySpec -import repositories.{BiosampleRepository, SpecimenDonorRepository} // AccessionNumberGenerator removed - -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -class PgpBiosampleServiceSpec extends PlaySpec with MockitoSugar with ScalaFutures { - - implicit val ec: ExecutionContext = ExecutionContext.global - - "PgpBiosampleService" should { - - "create a new PGP biosample successfully" in { - // Mocks - val mockBiosampleRepo = mock[BiosampleRepository] - val mockAccessionGen = mock[AccessionNumberGenerator] - val mockBiosampleService = mock[BiosampleService] // Changed from mockDonorRepo - - // Test Data - val participantId = "hu123456" - val description = "Test Sample" - val centerName = "PGP-Harvard" - val generatedAccession = "SAMEA123456" - val sampleGuid = UUID.randomUUID() - - // Mock Behavior - when(mockBiosampleRepo.findByAliasOrAccession(participantId)) - .thenReturn(Future.successful(None)) - - when(mockAccessionGen.generateAccession(any[BiosampleType], any[AccessionMetadata])) - .thenReturn(Future.successful(generatedAccession)) - - // Mock BiosampleService.createOrUpdateSpecimenDonor - it should return None here - when(mockBiosampleService.createOrUpdateSpecimenDonor(anyString, anyString, any[BiosampleType], any, any, any, any, any)) - .thenReturn(Future.successful(None)) - - // Mock BiosampleService.createBiosample - it should return a Biosample with the given sampleGuid - when(mockBiosampleService.createBiosample(any[UUID], anyString, anyString, any, anyString, any, any)) - .thenAnswer(new Answer[Future[Biosample]] { - override def answer(invocation: InvocationOnMock): Future[Biosample] = { - val sGuid = invocation.getArgument[UUID](0) - val b = Biosample(id = Some(100), sampleGuid = sGuid, sampleAccession = generatedAccession, description = description, alias = Some(participantId), centerName = centerName, specimenDonorId = None, locked = false, sourcePlatform = Some("PGP")) - Future.successful(b.copy(id = Some(100))) - } - }) - - val service = new PgpBiosampleService(mockBiosampleRepo, mockAccessionGen, mockBiosampleService) // Changed - - // Execute - val resultFuture = service.createPgpBiosample(participantId, description, centerName) - - // Verify - whenReady(resultFuture) { guid => - guid mustBe a [UUID] - verify(mockBiosampleService).createBiosample(any[UUID], anyString, anyString, any, anyString, any, any) - verify(mockBiosampleService, never).createOrUpdateSpecimenDonor(anyString, anyString, any[BiosampleType], any, any, any, any, any) - } - } - - "create a donor and biosample when metadata is provided" in { - // Mocks - val mockBiosampleRepo = mock[BiosampleRepository] - val mockAccessionGen = mock[AccessionNumberGenerator] - val mockBiosampleService = mock[BiosampleService] // Changed from mockDonorRepo - - val participantId = "hu987654" - val lat = Some(42.0) - val lon = Some(-71.0) - val donorId = Some(50) - - when(mockBiosampleRepo.findByAliasOrAccession(participantId)) - .thenReturn(Future.successful(None)) - - when(mockAccessionGen.generateAccession(any[BiosampleType], any[AccessionMetadata])) - .thenReturn(Future.successful("SAMEA999")) - - // Mock BiosampleService.createOrUpdateSpecimenDonor - when(mockBiosampleService.createOrUpdateSpecimenDonor(anyString, anyString, any[BiosampleType], any, any, any, any, any)) - .thenReturn(Future.successful(donorId)) - - // Mock BiosampleService.createBiosample - when(mockBiosampleService.createBiosample(any[UUID], anyString, anyString, any, anyString, any, any)) - .thenAnswer(new Answer[Future[Biosample]] { - override def answer(invocation: InvocationOnMock): Future[Biosample] = { - val sGuid = invocation.getArgument[UUID](0) - val b = Biosample(id = Some(100), sampleGuid = sGuid, sampleAccession = "SAMEA999", description = "Desc", alias = Some(participantId), centerName = "PGP", specimenDonorId = donorId, locked = false, sourcePlatform = Some("PGP")) - Future.successful(b) - } - }) - - val service = new PgpBiosampleService(mockBiosampleRepo, mockAccessionGen, mockBiosampleService) // Changed - - val resultFuture = service.createPgpBiosample(participantId, "Desc", "PGP", latitude = lat, longitude = lon) - - whenReady(resultFuture) { guid => - verify(mockBiosampleService).createOrUpdateSpecimenDonor(anyString, anyString, any[BiosampleType], any, any, any, any, any) - verify(mockBiosampleService).createBiosample(any[UUID], anyString, anyString, any, anyString, any, any) - } - } - - "fail when participant already exists" in { - val mockBiosampleRepo = mock[BiosampleRepository] - val mockAccessionGen = mock[AccessionNumberGenerator] - val mockBiosampleService = mock[BiosampleService] // Changed from mockDonorRepo - val participantId = "huExisting" - - // Mock finding an existing one - val existingBiosample = Biosample(Some(1), UUID.randomUUID(), "ACC1", "Desc", Some(participantId), "Center", None, false, None) - when(mockBiosampleRepo.findByAliasOrAccession(participantId)) - .thenReturn(Future.successful(Some((existingBiosample, None)))) - - val service = new PgpBiosampleService(mockBiosampleRepo, mockAccessionGen, mockBiosampleService) // Changed - - val resultFuture = service.createPgpBiosample(participantId, "Desc", "Center") - - whenReady(resultFuture.failed) { e => - e mustBe a [DuplicateParticipantException] - e.getMessage must include ("already has a biosample") - } - } - } -} diff --git a/test/services/PrivateVariantExtractionServiceSpec.scala b/test/services/PrivateVariantExtractionServiceSpec.scala deleted file mode 100644 index c077011b..00000000 --- a/test/services/PrivateVariantExtractionServiceSpec.scala +++ /dev/null @@ -1,166 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.HaplogroupType -import models.atmosphere.VariantCall -import models.domain.discovery.* -import models.domain.genomics.{MutationType, NamingStatus, VariantV2} -import models.domain.haplogroups.Haplogroup -import org.mockito.ArgumentMatchers.{any, anyInt, anyString} -import org.mockito.Mockito.{never, reset, verify, when} -import play.api.libs.json.Json -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, PrivateVariantRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class PrivateVariantExtractionServiceSpec extends ServiceSpec { - - val mockPrivateVariantRepo: PrivateVariantRepository = mock[PrivateVariantRepository] - val mockVariantRepo: HaplogroupVariantRepository = mock[HaplogroupVariantRepository] - val mockCoreRepo: HaplogroupCoreRepository = mock[HaplogroupCoreRepository] - - val service = new PrivateVariantExtractionService(mockPrivateVariantRepo, mockVariantRepo, mockCoreRepo) - - override def beforeEach(): Unit = { - reset(mockPrivateVariantRepo, mockVariantRepo, mockCoreRepo) - } - - val now: LocalDateTime = LocalDateTime.of(2025, 1, 1, 0, 0) - val sampleGuid: UUID = UUID.randomUUID() - - val terminalHg: Haplogroup = Haplogroup( - id = Some(100), name = "R-M269", lineage = Some("R>R-M269"), - description = None, haplogroupType = HaplogroupType.Y, - revisionId = 1, source = "backbone", confidenceLevel = "high", - validFrom = now, validUntil = None - ) - - val ancestorHg: Haplogroup = Haplogroup( - id = Some(1), name = "R", lineage = Some("R"), - description = None, haplogroupType = HaplogroupType.Y, - revisionId = 1, source = "backbone", confidenceLevel = "high", - validFrom = now, validUntil = None - ) - - // A tree variant already on the lineage (should NOT be counted as private) - val treeVariant: VariantV2 = VariantV2( - variantId = Some(10), canonicalName = Some("M269"), mutationType = MutationType.SNP, - coordinates = Json.obj("GRCh38" -> Json.obj("contig" -> "chrY", "position" -> 22103211, "ref" -> "G", "alt" -> "A")) - ) - - // A variant not on the tree (should be counted as private) - val novelVariant: VariantV2 = VariantV2( - variantId = Some(50), canonicalName = Some("Z1234"), mutationType = MutationType.SNP, - coordinates = Json.obj("GRCh38" -> Json.obj("contig" -> "chrY", "position" -> 5000000, "ref" -> "C", "alt" -> "T")) - ) - - def makeCall(contig: String, pos: Int, ref: String, alt: String, name: Option[String] = None): VariantCall = - VariantCall(contig, pos, ref, alt, rsId = None, variantName = name, genotype = None, quality = None, depth = None) - - "PrivateVariantExtractionService" should { - - "return empty when no variant calls provided" in { - whenReady(service.extractFromExternalBiosample(1, sampleGuid, "R-M269", HaplogroupType.Y, Seq.empty)) { result => - result mustBe empty - } - } - - "extract private variants filtering out tree variants" in { - val treeCall = makeCall("chrY", 22103211, "G", "A", Some("M269")) - val privateCall = makeCall("chrY", 5000000, "C", "T", Some("Z1234")) - - when(mockCoreRepo.getHaplogroupByName("R-M269", HaplogroupType.Y)).thenReturn(Future.successful(Some(terminalHg))) - when(mockCoreRepo.getAncestors(100)).thenReturn(Future.successful(Seq(ancestorHg))) - - // Tree variants for ancestor R and terminal R-M269 - when(mockVariantRepo.getHaplogroupVariants(1)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getHaplogroupVariants(100)).thenReturn(Future.successful(Seq(treeVariant))) - - // Variant resolution - when(mockVariantRepo.findVariants("M269")).thenReturn(Future.successful(Seq(treeVariant))) - when(mockVariantRepo.findVariants("Z1234")).thenReturn(Future.successful(Seq(novelVariant))) - - // Save - when(mockPrivateVariantRepo.createAll(any[Seq[BiosamplePrivateVariant]])).thenAnswer { invocation => - val pvs = invocation.getArgument[Seq[BiosamplePrivateVariant]](0) - Future.successful(pvs.zipWithIndex.map { case (pv, i) => pv.copy(id = Some(i + 1)) }) - } - - whenReady(service.extractFromExternalBiosample(1, sampleGuid, "R-M269", HaplogroupType.Y, Seq(treeCall, privateCall))) { result => - result must have size 1 - result.head.variantId mustBe 50 - result.head.sampleType mustBe BiosampleSourceType.External - result.head.terminalHaplogroupId mustBe 100 - } - } - - "extract from citizen biosample with correct source type" in { - val privateCall = makeCall("chrY", 5000000, "C", "T", Some("Z1234")) - - when(mockCoreRepo.getHaplogroupByName("R-M269", HaplogroupType.Y)).thenReturn(Future.successful(Some(terminalHg))) - when(mockCoreRepo.getAncestors(100)).thenReturn(Future.successful(Seq(ancestorHg))) - when(mockVariantRepo.getHaplogroupVariants(1)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getHaplogroupVariants(100)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.findVariants("Z1234")).thenReturn(Future.successful(Seq(novelVariant))) - when(mockPrivateVariantRepo.createAll(any[Seq[BiosamplePrivateVariant]])).thenAnswer { invocation => - val pvs = invocation.getArgument[Seq[BiosamplePrivateVariant]](0) - Future.successful(pvs.zipWithIndex.map { case (pv, i) => pv.copy(id = Some(i + 1)) }) - } - - whenReady(service.extractFromCitizenBiosample(42, sampleGuid, "R-M269", HaplogroupType.Y, Seq(privateCall))) { result => - result must have size 1 - result.head.sampleType mustBe BiosampleSourceType.Citizen - result.head.sampleId mustBe 42 - } - } - - "fail when terminal haplogroup not found" in { - when(mockCoreRepo.getHaplogroupByName("UNKNOWN", HaplogroupType.Y)).thenReturn(Future.successful(None)) - - val call = makeCall("chrY", 1000, "A", "G") - - whenReady(service.extractFromExternalBiosample(1, sampleGuid, "UNKNOWN", HaplogroupType.Y, Seq(call)).failed) { ex => - ex mustBe a[IllegalArgumentException] - ex.getMessage must include("not found") - } - } - - "skip variants not in variant_v2 (resolved to -1)" in { - val unknownCall = makeCall("chrY", 9999999, "A", "G") - - when(mockCoreRepo.getHaplogroupByName("R-M269", HaplogroupType.Y)).thenReturn(Future.successful(Some(terminalHg))) - when(mockCoreRepo.getAncestors(100)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getHaplogroupVariants(100)).thenReturn(Future.successful(Seq.empty)) - // Position search returns nothing -> resolves to -1 - when(mockVariantRepo.findVariants("chrY:9999999")).thenReturn(Future.successful(Seq.empty)) - - // -1 variant ID won't match any tree position so it passes the tree filter, - // but we still save it (the -1 sentinel is a temporary approach) - when(mockPrivateVariantRepo.createAll(any[Seq[BiosamplePrivateVariant]])).thenAnswer { invocation => - val pvs = invocation.getArgument[Seq[BiosamplePrivateVariant]](0) - Future.successful(pvs.zipWithIndex.map { case (pv, i) => pv.copy(id = Some(i + 1)) }) - } - - whenReady(service.extractFromExternalBiosample(1, sampleGuid, "R-M269", HaplogroupType.Y, Seq(unknownCall))) { result => - result must have size 1 - result.head.variantId mustBe -1 - } - } - - "not save when all variants are already on tree" in { - val treeCall = makeCall("chrY", 22103211, "G", "A", Some("M269")) - - when(mockCoreRepo.getHaplogroupByName("R-M269", HaplogroupType.Y)).thenReturn(Future.successful(Some(terminalHg))) - when(mockCoreRepo.getAncestors(100)).thenReturn(Future.successful(Seq.empty)) - when(mockVariantRepo.getHaplogroupVariants(100)).thenReturn(Future.successful(Seq(treeVariant))) - when(mockVariantRepo.findVariants("M269")).thenReturn(Future.successful(Seq(treeVariant))) - - whenReady(service.extractFromExternalBiosample(1, sampleGuid, "R-M269", HaplogroupType.Y, Seq(treeCall))) { result => - result mustBe empty - verify(mockPrivateVariantRepo, never()).createAll(any[Seq[BiosamplePrivateVariant]]) - } - } - } -} diff --git a/test/services/ProjectServiceSpec.scala b/test/services/ProjectServiceSpec.scala deleted file mode 100644 index 152b125d..00000000 --- a/test/services/ProjectServiceSpec.scala +++ /dev/null @@ -1,213 +0,0 @@ -package services - -import models.api.{ProjectRequest, ProjectResponse} -import models.domain.Project -import org.mockito.ArgumentMatchers.{any, anyString} -import org.mockito.Mockito.{never, verify, when} -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer -import org.scalatestplus.mockito.MockitoSugar -import org.scalatest.concurrent.ScalaFutures -import org.scalatestplus.play.PlaySpec -import repositories.ProjectRepository - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -class ProjectServiceSpec extends PlaySpec with MockitoSugar with ScalaFutures { - - implicit val ec: ExecutionContext = ExecutionContext.global - - def createRequest( - name: String = "Test Project", - description: Option[String] = Some("A test project"), - atUri: Option[String] = Some("at://did:plc:test/com.decodingus.atmosphere.project/rkey1"), - atCid: Option[String] = None - ): ProjectRequest = ProjectRequest( - name = name, - description = description, - atUri = atUri, - atCid = atCid - ) - - "ProjectService" should { - - "create a new project successfully" in { - val mockRepo = mock[ProjectRepository] - - when(mockRepo.create(any[Project])) - .thenAnswer(new Answer[Future[Project]] { - override def answer(invocation: InvocationOnMock): Future[Project] = { - val p = invocation.getArgument[Project](0) - Future.successful(p.copy(id = Some(1))) - } - }) - - val service = new ProjectService(mockRepo) - val request = createRequest() - - whenReady(service.createProject(request)) { response => - response.name mustBe "Test Project" - response.description mustBe Some("A test project") - response.projectGuid mustBe a[UUID] - response.atCid mustBe defined - - verify(mockRepo).create(any[Project]) - } - } - - "update an existing project successfully" in { - val mockRepo = mock[ProjectRepository] - val existingGuid = UUID.randomUUID() - val existingAtCid = "existing-cid-123" - val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/rkey1" - - val existingProject = Project( - id = Some(1), - projectGuid = existingGuid, - name = "Old Name", - description = Some("Old description"), - ownerDid = "did:example:owner", - createdAt = LocalDateTime.now().minusDays(1), - updatedAt = LocalDateTime.now().minusDays(1), - deleted = false, - atUri = Some(atUri), - atCid = Some(existingAtCid) - ) - - when(mockRepo.findByAtUri(atUri)) - .thenReturn(Future.successful(Some(existingProject))) - - when(mockRepo.update(any[Project], any[Option[String]])) - .thenReturn(Future.successful(true)) - - val service = new ProjectService(mockRepo) - val request = createRequest( - name = "Updated Name", - description = Some("Updated description"), - atUri = Some(atUri), - atCid = Some(existingAtCid) - ) - - whenReady(service.updateProject(atUri, request)) { response => - response.name mustBe "Updated Name" - response.description mustBe Some("Updated description") - response.projectGuid mustBe existingGuid - response.atCid mustBe defined - response.atCid must not be Some(existingAtCid) // Should be new CID - - verify(mockRepo).update(any[Project], any[Option[String]]) - } - } - - "fail update with optimistic locking error when atCid mismatch" in { - val mockRepo = mock[ProjectRepository] - val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/rkey1" - - val existingProject = Project( - id = Some(1), - projectGuid = UUID.randomUUID(), - name = "Project", - description = None, - ownerDid = "did:example:owner", - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now(), - deleted = false, - atUri = Some(atUri), - atCid = Some("current-cid") - ) - - when(mockRepo.findByAtUri(atUri)) - .thenReturn(Future.successful(Some(existingProject))) - - val service = new ProjectService(mockRepo) - val request = createRequest(atCid = Some("stale-cid")) - - whenReady(service.updateProject(atUri, request).failed) { e => - e mustBe a[IllegalStateException] - e.getMessage must include("Optimistic locking failure") - - verify(mockRepo, never).update(any[Project], any[Option[String]]) - } - } - - "fail update when project not found" in { - val mockRepo = mock[ProjectRepository] - val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/nonexistent" - - when(mockRepo.findByAtUri(atUri)) - .thenReturn(Future.successful(None)) - - val service = new ProjectService(mockRepo) - val request = createRequest() - - whenReady(service.updateProject(atUri, request).failed) { e => - e mustBe a[NoSuchElementException] - e.getMessage must include("not found") - } - } - - "delete a project successfully" in { - val mockRepo = mock[ProjectRepository] - val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/rkey1" - - when(mockRepo.softDeleteByAtUri(atUri)) - .thenReturn(Future.successful(true)) - - val service = new ProjectService(mockRepo) - - whenReady(service.deleteProject(atUri)) { result => - result mustBe true - verify(mockRepo).softDeleteByAtUri(atUri) - } - } - - "return false when deleting non-existent project" in { - val mockRepo = mock[ProjectRepository] - val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/nonexistent" - - when(mockRepo.softDeleteByAtUri(atUri)) - .thenReturn(Future.successful(false)) - - val service = new ProjectService(mockRepo) - - whenReady(service.deleteProject(atUri)) { result => - result mustBe false - } - } - - "allow update without atCid (no optimistic locking check)" in { - val mockRepo = mock[ProjectRepository] - val atUri = "at://did:plc:test/com.decodingus.atmosphere.project/rkey1" - - val existingProject = Project( - id = Some(1), - projectGuid = UUID.randomUUID(), - name = "Project", - description = None, - ownerDid = "did:example:owner", - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now(), - deleted = false, - atUri = Some(atUri), - atCid = Some("any-cid") - ) - - when(mockRepo.findByAtUri(atUri)) - .thenReturn(Future.successful(Some(existingProject))) - - when(mockRepo.update(any[Project], any[Option[String]])) - .thenReturn(Future.successful(true)) - - val service = new ProjectService(mockRepo) - // Request without atCid - should skip optimistic locking check - val request = createRequest(atCid = None) - - whenReady(service.updateProject(atUri, request)) { response => - response.name mustBe "Test Project" - verify(mockRepo).update(any[Project], any[Option[String]]) - } - } - } -} diff --git a/test/services/ProjectStrComparisonServiceSpec.scala b/test/services/ProjectStrComparisonServiceSpec.scala deleted file mode 100644 index ecd0fee6..00000000 --- a/test/services/ProjectStrComparisonServiceSpec.scala +++ /dev/null @@ -1,373 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.dal.domain.genomics.{BiosampleVariantCall, StrMutationRate} -import models.domain.{EffectiveVisibility, GroupProject, GroupProjectMember, MemberVisibility} -import models.domain.genomics.{Biosample, CitizenBiosample, SpecimenDonor} -import org.mockito.ArgumentMatchers.{any, eq as meq} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.* - -import java.time.{Instant, LocalDateTime} -import java.util.UUID -import scala.concurrent.Future - -class ProjectStrComparisonServiceSpec extends ServiceSpec { - - val mockProjectRepo: GroupProjectRepository = mock[GroupProjectRepository] - val mockMemberRepo: GroupProjectMemberRepository = mock[GroupProjectMemberRepository] - val mockBiosampleRepo: CitizenBiosampleRepository = mock[CitizenBiosampleRepository] - val mockBiosampleMainRepo: BiosampleRepository = mock[BiosampleRepository] - val mockVariantCallRepo: BiosampleVariantCallRepository = mock[BiosampleVariantCallRepository] - val mockStrRateRepo: StrMutationRateRepository = mock[StrMutationRateRepository] - - val service = new ProjectStrComparisonService( - mockProjectRepo, mockMemberRepo, mockBiosampleRepo, - mockBiosampleMainRepo, mockVariantCallRepo, mockStrRateRepo - ) - - override def beforeEach(): Unit = { - reset(mockProjectRepo, mockMemberRepo, mockBiosampleRepo, - mockBiosampleMainRepo, mockVariantCallRepo, mockStrRateRepo) - } - - val now: LocalDateTime = LocalDateTime.now() - - val testProject: GroupProject = GroupProject( - id = Some(1), - projectGuid = UUID.randomUUID(), - projectName = "R-CTS4466 STR Project", - projectType = "HAPLOGROUP", - targetHaplogroup = Some("R-CTS4466"), - targetLineage = Some("Y_DNA"), - joinPolicy = "APPROVAL_REQUIRED", - strPolicy = "DISTANCE_ONLY", - ownerDid = "did:plc:admin1" - ) - - val modalProject: GroupProject = testProject.copy(id = Some(2), strPolicy = "MODAL_COMPARISON") - val hiddenStrProject: GroupProject = testProject.copy(id = Some(3), strPolicy = "HIDDEN") - val publicStrProject: GroupProject = testProject.copy(id = Some(4), strPolicy = "PUBLIC_RAW") - - val sampleGuid1: UUID = UUID.randomUUID() - val sampleGuid2: UUID = UUID.randomUUID() - val sampleGuid3: UUID = UUID.randomUUID() - - def makeMember(id: Int, did: String, biosampleAtUri: Option[String] = None, - visibility: MemberVisibility = MemberVisibility(strVisibility = "DISTANCE_CALCULATION_ONLY"), - projectId: Int = 1): GroupProjectMember = - GroupProjectMember( - id = Some(id), groupProjectId = projectId, citizenDid = did, - biosampleAtUri = biosampleAtUri, role = "MEMBER", status = "ACTIVE", - visibility = visibility, joinedAt = Some(now) - ) - - val activeMemberViewer: GroupProjectMember = makeMember(99, "did:plc:viewer1") - - def makeCitizenBiosample(guid: UUID, atUri: String): CitizenBiosample = - CitizenBiosample( - id = Some(1), atUri = Some(atUri), accession = None, alias = None, - sourcePlatform = None, collectionDate = None, sex = None, geocoord = None, - description = None, sampleGuid = guid - ) - - def makeBiosample(id: Int, guid: UUID): Biosample = - Biosample(id = Some(id), sampleGuid = guid, sampleAccession = s"ACC-$id", - description = "Test", alias = None, centerName = "TestLab", specimenDonorId = None) - - val strRates: Seq[StrMutationRate] = Seq( - StrMutationRate(id = Some(1), markerName = "DYS393", mutationRate = BigDecimal("0.00076")), - StrMutationRate(id = Some(2), markerName = "DYS390", mutationRate = BigDecimal("0.00310")), - StrMutationRate(id = Some(3), markerName = "DYS19", mutationRate = BigDecimal("0.00230")) - ) - - // STR calls: variantId corresponds to marker (1=DYS393, 2=DYS390, 3=DYS19) - def makeStrCalls(biosampleId: Int, values: Map[Int, Int]): Seq[BiosampleVariantCall] = - values.map { case (variantId, value) => - BiosampleVariantCall( - id = Some(variantId * 100 + biosampleId), - biosampleId = biosampleId, - variantId = variantId, - observedState = value.toString - ) - }.toSeq - - private def setupBiosampleChain(memberId: Int, guid: UUID, biosampleId: Int, atUri: String, - strValues: Map[Int, Int]): Unit = { - when(mockBiosampleRepo.findByAtUri(atUri)) - .thenReturn(Future.successful(Some(makeCitizenBiosample(guid, atUri)))) - when(mockBiosampleMainRepo.findByGuid(guid)) - .thenReturn(Future.successful(Some((makeBiosample(biosampleId, guid), None)))) - when(mockVariantCallRepo.findByBiosample(biosampleId)) - .thenReturn(Future.successful(makeStrCalls(biosampleId, strValues))) - when(mockStrRateRepo.findAll()).thenReturn(Future.successful(strRates)) - } - - "ProjectStrComparisonService.getProjectModalHaplotype" should { - - "compute modal haplotype from member STR data" in { - val m1 = makeMember(1, "did:plc:m1", Some("at://m1/biosample/1")) - val m2 = makeMember(2, "did:plc:m2", Some("at://m2/biosample/2")) - val m3 = makeMember(3, "did:plc:m3", Some("at://m3/biosample/3")) - - when(mockProjectRepo.findById(2)).thenReturn(Future.successful(Some(modalProject))) - when(mockMemberRepo.findByProjectAndCitizen(2, "did:plc:viewer1")) - .thenReturn(Future.successful(Some(activeMemberViewer.copy(groupProjectId = 2)))) - when(mockMemberRepo.findByProjectAndStatus(2, "ACTIVE")) - .thenReturn(Future.successful(Seq(m1, m2, m3))) - - // m1: DYS393=13, DYS390=24, DYS19=14 - setupBiosampleChain(1, sampleGuid1, 101, "at://m1/biosample/1", Map(1 -> 13, 2 -> 24, 3 -> 14)) - // m2: DYS393=13, DYS390=25, DYS19=14 - setupBiosampleChain(2, sampleGuid2, 102, "at://m2/biosample/2", Map(1 -> 13, 2 -> 25, 3 -> 14)) - // m3: DYS393=13, DYS390=24, DYS19=15 - setupBiosampleChain(3, sampleGuid3, 103, "at://m3/biosample/3", Map(1 -> 13, 2 -> 24, 3 -> 15)) - - val result = service.getProjectModalHaplotype(2, "did:plc:viewer1").futureValue - result.isRight mustBe true - - val modal = result.toOption.get - modal.sampleCount mustBe 3 - - // DYS393 (variant 1): all have 13 → modal = 13 - // DYS390 (variant 2): 24, 25, 24 → modal = 24 - // DYS19 (variant 3): 14, 14, 15 → modal = 14 - val modalMap = modal.markerModals.map(m => m.markerName -> m.modalValue).toMap - // marker names here are variantId.toString since we use variantId as key - modalMap.values.toSet must contain allOf(13, 24, 14) - } - - "return error for project not found" in { - when(mockProjectRepo.findById(999)).thenReturn(Future.successful(None)) - val result = service.getProjectModalHaplotype(999, "did:plc:viewer1").futureValue - result mustBe Left("Project not found") - } - - "return error for hidden STR project" in { - when(mockProjectRepo.findById(3)).thenReturn(Future.successful(Some(hiddenStrProject))) - val result = service.getProjectModalHaplotype(3, "did:plc:viewer1").futureValue - result mustBe Left("Project STR policy does not allow STR operations") - } - - "return error for non-member viewer on non-public project" in { - when(mockProjectRepo.findById(2)).thenReturn(Future.successful(Some(modalProject))) - when(mockMemberRepo.findByProjectAndCitizen(2, "did:plc:outsider")) - .thenReturn(Future.successful(None)) - - val result = service.getProjectModalHaplotype(2, "did:plc:outsider").futureValue - result mustBe Left("Only active project members can access STR data") - } - - "allow public access to PUBLIC_RAW project" in { - when(mockProjectRepo.findById(4)).thenReturn(Future.successful(Some(publicStrProject))) - when(mockMemberRepo.findByProjectAndStatus(4, "ACTIVE")) - .thenReturn(Future.successful(Seq.empty)) - - val result = service.getProjectModalHaplotype(4, "did:plc:anyone").futureValue - result.isRight mustBe true - result.toOption.get.sampleCount mustBe 0 - } - } - - "ProjectStrComparisonService.getMemberDistanceFromModal" should { - - "compute distance between member and project modal" in { - val m1 = makeMember(1, "did:plc:m1", Some("at://m1/biosample/1"), projectId = 2) - val m2 = makeMember(2, "did:plc:m2", Some("at://m2/biosample/2"), projectId = 2) - - when(mockProjectRepo.findById(2)).thenReturn(Future.successful(Some(modalProject))) - when(mockMemberRepo.findByProjectAndCitizen(2, "did:plc:viewer1")) - .thenReturn(Future.successful(Some(activeMemberViewer.copy(groupProjectId = 2)))) - when(mockMemberRepo.findById(1)).thenReturn(Future.successful(Some(m1))) - when(mockMemberRepo.findByProjectAndStatus(2, "ACTIVE")) - .thenReturn(Future.successful(Seq(m1, m2))) - - // Modal will be: 1->13, 2->24 (mode of {24,25}=24 since m1 counted twice in modal) - setupBiosampleChain(1, sampleGuid1, 101, "at://m1/biosample/1", Map(1 -> 13, 2 -> 24)) - setupBiosampleChain(2, sampleGuid2, 102, "at://m2/biosample/2", Map(1 -> 13, 2 -> 25)) - - val result = service.getMemberDistanceFromModal(2, 1, "did:plc:viewer1").futureValue - result.isRight mustBe true - - val comparison = result.toOption.get - comparison.memberId mustBe 1 - comparison.markerCount must be > 0 - } - - "return error for member not found" in { - when(mockProjectRepo.findById(2)).thenReturn(Future.successful(Some(modalProject))) - when(mockMemberRepo.findByProjectAndCitizen(2, "did:plc:viewer1")) - .thenReturn(Future.successful(Some(activeMemberViewer.copy(groupProjectId = 2)))) - when(mockMemberRepo.findById(999)).thenReturn(Future.successful(None)) - - val result = service.getMemberDistanceFromModal(2, 999, "did:plc:viewer1").futureValue - result mustBe Left("Member not found") - } - - "return error for member with STR visibility NONE" in { - val hiddenMember = makeMember(5, "did:plc:m5", Some("at://m5/biosample/5"), - visibility = MemberVisibility(strVisibility = "NONE"), projectId = 2) - - when(mockProjectRepo.findById(2)).thenReturn(Future.successful(Some(modalProject))) - when(mockMemberRepo.findByProjectAndCitizen(2, "did:plc:viewer1")) - .thenReturn(Future.successful(Some(activeMemberViewer.copy(groupProjectId = 2)))) - when(mockMemberRepo.findById(5)).thenReturn(Future.successful(Some(hiddenMember))) - - val result = service.getMemberDistanceFromModal(2, 5, "did:plc:viewer1").futureValue - result mustBe Left("Member STR data is not shared") - } - } - - "ProjectStrComparisonService.getMemberPairDistance" should { - - "compute genetic distance between two members" in { - val m1 = makeMember(1, "did:plc:m1", Some("at://m1/biosample/1")) - val m2 = makeMember(2, "did:plc:m2", Some("at://m2/biosample/2")) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:viewer1")) - .thenReturn(Future.successful(Some(activeMemberViewer))) - when(mockMemberRepo.findById(1)).thenReturn(Future.successful(Some(m1))) - when(mockMemberRepo.findById(2)).thenReturn(Future.successful(Some(m2))) - - // m1: variant1=13, variant2=24, variant3=14 - setupBiosampleChain(1, sampleGuid1, 101, "at://m1/biosample/1", Map(1 -> 13, 2 -> 24, 3 -> 14)) - // m2: variant1=13, variant2=25, variant3=16 - setupBiosampleChain(2, sampleGuid2, 102, "at://m2/biosample/2", Map(1 -> 13, 2 -> 25, 3 -> 16)) - - val result = service.getMemberPairDistance(1, 1, 2, "did:plc:viewer1").futureValue - result.isRight mustBe true - - val distance = result.toOption.get - distance.memberId1 mustBe 1 - distance.memberId2 mustBe 2 - // |13-13| + |24-25| + |14-16| = 0 + 1 + 2 = 3 - distance.geneticDistance mustBe 3 - distance.markerCount mustBe 3 - distance.normalizedDistance mustBe 1.0 // 3/3 - } - - "return error when member 1 not found" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:viewer1")) - .thenReturn(Future.successful(Some(activeMemberViewer))) - when(mockMemberRepo.findById(1)).thenReturn(Future.successful(None)) - when(mockMemberRepo.findById(2)).thenReturn(Future.successful(Some(makeMember(2, "did:plc:m2")))) - - val result = service.getMemberPairDistance(1, 1, 2, "did:plc:viewer1").futureValue - result mustBe Left("Member 1 not found") - } - - "return error when member STR is not shared" in { - val m1 = makeMember(1, "did:plc:m1", Some("at://m1/biosample/1"), - visibility = MemberVisibility(strVisibility = "NONE")) - val m2 = makeMember(2, "did:plc:m2", Some("at://m2/biosample/2")) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:viewer1")) - .thenReturn(Future.successful(Some(activeMemberViewer))) - when(mockMemberRepo.findById(1)).thenReturn(Future.successful(Some(m1))) - when(mockMemberRepo.findById(2)).thenReturn(Future.successful(Some(m2))) - - val result = service.getMemberPairDistance(1, 1, 2, "did:plc:viewer1").futureValue - result mustBe Left("Member 1 STR data is not shared") - } - - "return error when member not in project" in { - val m1 = makeMember(1, "did:plc:m1", Some("at://m1/biosample/1"), projectId = 99) - val m2 = makeMember(2, "did:plc:m2", Some("at://m2/biosample/2")) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:viewer1")) - .thenReturn(Future.successful(Some(activeMemberViewer))) - when(mockMemberRepo.findById(1)).thenReturn(Future.successful(Some(m1))) - when(mockMemberRepo.findById(2)).thenReturn(Future.successful(Some(m2))) - - val result = service.getMemberPairDistance(1, 1, 2, "did:plc:viewer1").futureValue - result mustBe Left("Member 1 not in this project") - } - } - - "ProjectStrComparisonService.getDistanceMatrix" should { - - "compute pairwise distance matrix for project members" in { - val m1 = makeMember(1, "did:plc:m1", Some("at://m1/biosample/1")) - val m2 = makeMember(2, "did:plc:m2", Some("at://m2/biosample/2")) - val m3 = makeMember(3, "did:plc:m3", Some("at://m3/biosample/3")) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:viewer1")) - .thenReturn(Future.successful(Some(activeMemberViewer))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(m1, m2, m3))) - - setupBiosampleChain(1, sampleGuid1, 101, "at://m1/biosample/1", Map(1 -> 13, 2 -> 24)) - setupBiosampleChain(2, sampleGuid2, 102, "at://m2/biosample/2", Map(1 -> 14, 2 -> 24)) - setupBiosampleChain(3, sampleGuid3, 103, "at://m3/biosample/3", Map(1 -> 13, 2 -> 26)) - - val result = service.getDistanceMatrix(1, "did:plc:viewer1").futureValue - result.isRight mustBe true - - val matrix = result.toOption.get - // 3 members → 3 pairs: (1,2), (1,3), (2,3) - matrix.size mustBe 3 - - val pair12 = matrix.find(d => d.memberId1 == 1 && d.memberId2 == 2).get - pair12.geneticDistance mustBe 1 // |13-14| + |24-24| = 1 - - val pair13 = matrix.find(d => d.memberId1 == 1 && d.memberId2 == 3).get - pair13.geneticDistance mustBe 2 // |13-13| + |24-26| = 2 - - val pair23 = matrix.find(d => d.memberId1 == 2 && d.memberId2 == 3).get - pair23.geneticDistance mustBe 3 // |14-13| + |24-26| = 3 - } - - "exclude members with STR visibility NONE" in { - val m1 = makeMember(1, "did:plc:m1", Some("at://m1/biosample/1")) - val hiddenMember = makeMember(2, "did:plc:m2", Some("at://m2/biosample/2"), - visibility = MemberVisibility(strVisibility = "NONE")) - val m3 = makeMember(3, "did:plc:m3", Some("at://m3/biosample/3")) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndCitizen(1, "did:plc:viewer1")) - .thenReturn(Future.successful(Some(activeMemberViewer))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(m1, hiddenMember, m3))) - - setupBiosampleChain(1, sampleGuid1, 101, "at://m1/biosample/1", Map(1 -> 13)) - setupBiosampleChain(3, sampleGuid3, 103, "at://m3/biosample/3", Map(1 -> 14)) - - val result = service.getDistanceMatrix(1, "did:plc:viewer1").futureValue - result.isRight mustBe true - - val matrix = result.toOption.get - matrix.size mustBe 1 // Only pair (1,3), member 2 excluded - matrix.head.memberId1 mustBe 1 - matrix.head.memberId2 mustBe 3 - } - - "return error for hidden STR project" in { - when(mockProjectRepo.findById(3)).thenReturn(Future.successful(Some(hiddenStrProject))) - val result = service.getDistanceMatrix(3, "did:plc:viewer1").futureValue - result mustBe Left("Project STR policy does not allow STR operations") - } - } - - "ProjectStrComparisonService.resolveViewerPermission" should { - - "block all operations on HIDDEN STR policy" in { - when(mockProjectRepo.findById(3)).thenReturn(Future.successful(Some(hiddenStrProject))) - val result = service.getDistanceMatrix(3, "did:plc:viewer1").futureValue - result mustBe Left("Project STR policy does not allow STR operations") - } - - "require MODAL_COMPARISON level for modal operations on DISTANCE_ONLY project" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - // testProject has strPolicy = "DISTANCE_ONLY" which maps to "DISTANCE_CALCULATION_ONLY" - // getProjectModalHaplotype requires "MODAL_COMPARISON_ONLY" (rank 2) - // "DISTANCE_CALCULATION_ONLY" has rank 1 < 2 - val result = service.getProjectModalHaplotype(1, "did:plc:viewer1").futureValue - result mustBe Left("Project STR policy does not allow this operation") - } - } -} diff --git a/test/services/ProjectTreeAggregationServiceSpec.scala b/test/services/ProjectTreeAggregationServiceSpec.scala deleted file mode 100644 index dcef2f91..00000000 --- a/test/services/ProjectTreeAggregationServiceSpec.scala +++ /dev/null @@ -1,527 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.HaplogroupType -import models.domain.{EffectiveVisibility, GroupProject, GroupProjectMember, MemberVisibility} -import models.domain.genomics.{BiosampleHaplogroup, CitizenBiosample} -import models.domain.haplogroups.Haplogroup -import org.mockito.ArgumentMatchers.{any, eq as meq} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.* - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class ProjectTreeAggregationServiceSpec extends ServiceSpec { - - val mockProjectRepo: GroupProjectRepository = mock[GroupProjectRepository] - val mockMemberRepo: GroupProjectMemberRepository = mock[GroupProjectMemberRepository] - val mockBiosampleRepo: CitizenBiosampleRepository = mock[CitizenBiosampleRepository] - val mockBiosampleHgRepo: BiosampleHaplogroupRepository = mock[BiosampleHaplogroupRepository] - val mockHaplogroupRepo: HaplogroupCoreRepository = mock[HaplogroupCoreRepository] - - val service = new ProjectTreeAggregationService( - mockProjectRepo, mockMemberRepo, mockBiosampleRepo, mockBiosampleHgRepo, mockHaplogroupRepo - ) - - override def beforeEach(): Unit = { - reset(mockProjectRepo, mockMemberRepo, mockBiosampleRepo, mockBiosampleHgRepo, mockHaplogroupRepo) - } - - val now: LocalDateTime = LocalDateTime.now() - - val testProject: GroupProject = GroupProject( - id = Some(1), - projectGuid = UUID.randomUUID(), - projectName = "R-CTS4466 Project", - projectType = "HAPLOGROUP", - targetHaplogroup = Some("R-CTS4466"), - targetLineage = Some("Y_DNA"), - joinPolicy = "APPROVAL_REQUIRED", - publicTreeView = true, - ownerDid = "did:plc:admin1" - ) - - val privateProject: GroupProject = testProject.copy( - id = Some(2), publicTreeView = false, memberListVisibility = "MEMBERS_ONLY" - ) - - def makeMember(id: Int, did: String, biosampleAtUri: Option[String] = None, - visibility: MemberVisibility = MemberVisibility()): GroupProjectMember = - GroupProjectMember( - id = Some(id), groupProjectId = 1, citizenDid = did, - biosampleAtUri = biosampleAtUri, role = "MEMBER", status = "ACTIVE", - visibility = visibility, joinedAt = Some(now) - ) - - def makeHaplogroup(id: Int, name: String, formedYbp: Option[Int] = None, - tmrcaYbp: Option[Int] = None): Haplogroup = - Haplogroup( - id = Some(id), name = name, lineage = Some("Y-DNA"), description = None, - haplogroupType = HaplogroupType.Y, revisionId = 1, source = "ISOGG", - confidenceLevel = "HIGH", validFrom = now, validUntil = None, - formedYbp = formedYbp, tmrcaYbp = tmrcaYbp - ) - - val sampleGuid1: UUID = UUID.randomUUID() - val sampleGuid2: UUID = UUID.randomUUID() - val sampleGuid3: UUID = UUID.randomUUID() - - def makeBiosample(guid: UUID, atUri: String): CitizenBiosample = - CitizenBiosample( - id = Some(1), atUri = Some(atUri), accession = None, alias = None, - sourcePlatform = None, collectionDate = None, sex = None, geocoord = None, - description = None, sampleGuid = guid - ) - - // Haplogroup tree structure: - // R (id=100) - // ├── R-M269 (id=101) - // │ ├── R-L151 (id=102) - // │ │ └── R-CTS4466 (id=103) - // │ └── R-Z2103 (id=104) - // └── R-M198 (id=105) - - val hapR: Haplogroup = makeHaplogroup(100, "R", formedYbp = Some(28000)) - val hapRM269: Haplogroup = makeHaplogroup(101, "R-M269", formedYbp = Some(13000)) - val hapRL151: Haplogroup = makeHaplogroup(102, "R-L151", formedYbp = Some(4800)) - val hapRCTS4466: Haplogroup = makeHaplogroup(103, "R-CTS4466", formedYbp = Some(3200)) - val hapRZ2103: Haplogroup = makeHaplogroup(104, "R-Z2103", formedYbp = Some(4600)) - val hapRM198: Haplogroup = makeHaplogroup(105, "R-M198", formedYbp = Some(10000)) - - val allRelationships: Seq[(Int, Int)] = Seq( - (101, 100), // R-M269 -> R - (102, 101), // R-L151 -> R-M269 - (103, 102), // R-CTS4466 -> R-L151 - (104, 101), // R-Z2103 -> R-M269 - (105, 100) // R-M198 -> R - ) - - val haplogroupLookup: Map[Int, Haplogroup] = Map( - 100 -> hapR, 101 -> hapRM269, 102 -> hapRL151, - 103 -> hapRCTS4466, 104 -> hapRZ2103, 105 -> hapRM198 - ) - - private def setupHaplogroupLookups(): Unit = { - haplogroupLookup.foreach { case (id, hg) => - when(mockHaplogroupRepo.findById(id)).thenReturn(Future.successful(Some(hg))) - } - when(mockHaplogroupRepo.getAllRelationships(HaplogroupType.Y)).thenReturn(Future.successful(allRelationships)) - - // Ancestor paths - when(mockHaplogroupRepo.getAncestors(100)).thenReturn(Future.successful(Seq.empty)) - when(mockHaplogroupRepo.getAncestors(101)).thenReturn(Future.successful(Seq(hapR))) - when(mockHaplogroupRepo.getAncestors(102)).thenReturn(Future.successful(Seq(hapR, hapRM269))) - when(mockHaplogroupRepo.getAncestors(103)).thenReturn(Future.successful(Seq(hapR, hapRM269, hapRL151))) - when(mockHaplogroupRepo.getAncestors(104)).thenReturn(Future.successful(Seq(hapR, hapRM269))) - when(mockHaplogroupRepo.getAncestors(105)).thenReturn(Future.successful(Seq(hapR))) - } - - "ProjectTreeAggregationService.getAggregatedTree" should { - - "return aggregated tree with member counts" in { - val member1 = makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")) - val member2 = makeMember(2, "did:plc:m2", Some("at://did:plc:m2/biosample/2")) - val member3 = makeMember(3, "did:plc:m3", Some("at://did:plc:m3/biosample/3")) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")).thenReturn( - Future.successful(Seq(member1, member2, member3))) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleRepo.findByAtUri("at://did:plc:m2/biosample/2")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid2, "at://did:plc:m2/biosample/2")))) - when(mockBiosampleRepo.findByAtUri("at://did:plc:m3/biosample/3")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid3, "at://did:plc:m3/biosample/3")))) - - // m1 -> R-CTS4466, m2 -> R-CTS4466, m3 -> R-Z2103 - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(103), None)))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid2)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid2, Some(103), None)))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid3)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid3, Some(104), None)))) - - setupHaplogroupLookups() - - val result = service.getAggregatedTree(1, "Y_DNA", "did:plc:viewer1").futureValue - result.isRight mustBe true - - val summary = result.toOption.get - summary.projectId mustBe 1 - summary.lineageType mustBe "Y_DNA" - summary.totalMembers mustBe 3 - summary.membersWithHaplogroup mustBe 3 - - // Root should be R - summary.rootNodes.size mustBe 1 - val root = summary.rootNodes.head - root.haplogroupName mustBe "R" - root.memberCount mustBe 0 - root.cumulativeCount mustBe 3 - - // R should have R-M269 child (no R-M198 since no members there) - val rm269 = root.children.find(_.haplogroupName == "R-M269") - rm269 mustBe defined - rm269.get.memberCount mustBe 0 - rm269.get.cumulativeCount mustBe 3 - - // R-L151 should have 2 cumulative (2 at CTS4466) - val rl151 = rm269.get.children.find(_.haplogroupName == "R-L151") - rl151 mustBe defined - rl151.get.cumulativeCount mustBe 2 - - // R-CTS4466 should have 2 direct members - val rcts = rl151.get.children.find(_.haplogroupName == "R-CTS4466") - rcts mustBe defined - rcts.get.memberCount mustBe 2 - rcts.get.cumulativeCount mustBe 2 - - // R-Z2103 should have 1 direct member - val rz2103 = rm269.get.children.find(_.haplogroupName == "R-Z2103") - rz2103 mustBe defined - rz2103.get.memberCount mustBe 1 - rz2103.get.cumulativeCount mustBe 1 - } - - "return error for invalid lineage type" in { - val result = service.getAggregatedTree(1, "INVALID", "did:plc:viewer1").futureValue - result mustBe Left("Invalid lineage type: INVALID") - } - - "return error for non-existent project" in { - when(mockProjectRepo.findById(999)).thenReturn(Future.successful(None)) - - val result = service.getAggregatedTree(999, "Y_DNA", "did:plc:viewer1").futureValue - result mustBe Left("Project not found") - } - - "deny access to private project tree for non-members" in { - when(mockProjectRepo.findById(2)).thenReturn(Future.successful(Some(privateProject))) - when(mockMemberRepo.findByProjectAndCitizen(2, "did:plc:outsider")) - .thenReturn(Future.successful(None)) - - val result = service.getAggregatedTree(2, "Y_DNA", "did:plc:outsider").futureValue - result mustBe Left("Insufficient permissions to view project tree") - } - - "allow active members to view private project tree" in { - val member = makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")) - - when(mockProjectRepo.findById(2)).thenReturn(Future.successful(Some(privateProject))) - when(mockMemberRepo.findByProjectAndCitizen(2, "did:plc:m1")) - .thenReturn(Future.successful(Some(member))) - when(mockMemberRepo.findByProjectAndStatus(2, "ACTIVE")) - .thenReturn(Future.successful(Seq(member))) - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(103), None)))) - setupHaplogroupLookups() - - val result = service.getAggregatedTree(2, "Y_DNA", "did:plc:m1").futureValue - result.isRight mustBe true - } - - "exclude members with showInTree=false" in { - val visibleMember = makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")) - val hiddenMember = makeMember(2, "did:plc:m2", Some("at://did:plc:m2/biosample/2"), - visibility = MemberVisibility(showInTree = false)) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(visibleMember, hiddenMember))) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(103), None)))) - setupHaplogroupLookups() - - val result = service.getAggregatedTree(1, "Y_DNA", "did:plc:viewer1").futureValue - result.isRight mustBe true - val summary = result.toOption.get - summary.totalMembers mustBe 1 - summary.membersWithHaplogroup mustBe 1 - } - - "handle members without biosample links" in { - val memberWithBiosample = makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")) - val memberWithout = makeMember(2, "did:plc:m2", None) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(memberWithBiosample, memberWithout))) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(103), None)))) - setupHaplogroupLookups() - - val result = service.getAggregatedTree(1, "Y_DNA", "did:plc:viewer1").futureValue - result.isRight mustBe true - val summary = result.toOption.get - summary.totalMembers mustBe 2 - summary.membersWithHaplogroup mustBe 1 - } - - "handle members with biosample but no haplogroup assignment" in { - val member = makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(member))) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(None)) - - val result = service.getAggregatedTree(1, "Y_DNA", "did:plc:viewer1").futureValue - result.isRight mustBe true - val summary = result.toOption.get - summary.totalMembers mustBe 1 - summary.membersWithHaplogroup mustBe 0 - summary.rootNodes mustBe empty - } - - "return empty tree for project with no members" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq.empty)) - - val result = service.getAggregatedTree(1, "Y_DNA", "did:plc:viewer1").futureValue - result.isRight mustBe true - val summary = result.toOption.get - summary.totalMembers mustBe 0 - summary.membersWithHaplogroup mustBe 0 - summary.rootNodes mustBe empty - } - - "include age estimates in tree nodes" in { - val member = makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(member))) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(103), None)))) - setupHaplogroupLookups() - - val result = service.getAggregatedTree(1, "Y_DNA", "did:plc:viewer1").futureValue - val summary = result.toOption.get - val root = summary.rootNodes.head - root.formedYbp mustBe Some(28000) - - // Navigate to R-CTS4466 - val cts4466 = root.children.head.children.head.children.head - cts4466.formedYbp mustBe Some(3200) - } - - "support MT_DNA lineage type" in { - val member = makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")) - val mtHaplogroup = Haplogroup( - id = Some(200), name = "H", lineage = Some("MT-DNA"), description = None, - haplogroupType = HaplogroupType.MT, revisionId = 1, source = "PhyloTree", - confidenceLevel = "HIGH", validFrom = now, validUntil = None - ) - - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")) - .thenReturn(Future.successful(Seq(member))) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, None, Some(200))))) - - when(mockHaplogroupRepo.findById(200)).thenReturn(Future.successful(Some(mtHaplogroup))) - when(mockHaplogroupRepo.getAncestors(200)).thenReturn(Future.successful(Seq.empty)) - when(mockHaplogroupRepo.getAllRelationships(HaplogroupType.MT)).thenReturn(Future.successful(Seq.empty)) - - val result = service.getAggregatedTree(1, "MT_DNA", "did:plc:viewer1").futureValue - result.isRight mustBe true - val summary = result.toOption.get - summary.lineageType mustBe "MT_DNA" - summary.rootNodes.size mustBe 1 - summary.rootNodes.head.haplogroupName mustBe "H" - } - } - - "ProjectTreeAggregationService.getBranchMemberCount" should { - - "count members at a specific branch including descendants" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")).thenReturn( - Future.successful(Seq( - makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")), - makeMember(2, "did:plc:m2", Some("at://did:plc:m2/biosample/2")), - makeMember(3, "did:plc:m3", Some("at://did:plc:m3/biosample/3")) - ))) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleRepo.findByAtUri("at://did:plc:m2/biosample/2")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid2, "at://did:plc:m2/biosample/2")))) - when(mockBiosampleRepo.findByAtUri("at://did:plc:m3/biosample/3")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid3, "at://did:plc:m3/biosample/3")))) - - // m1 -> R-CTS4466 (103), m2 -> R-L151 (102), m3 -> R-Z2103 (104) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(103), None)))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid2)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid2, Some(102), None)))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid3)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid3, Some(104), None)))) - - // R-M269 (101) descendants: R-L151, R-CTS4466, R-Z2103 - when(mockHaplogroupRepo.getDescendants(101)).thenReturn( - Future.successful(Seq(hapRL151, hapRCTS4466, hapRZ2103))) - - val result = service.getBranchMemberCount(1, 101, "Y_DNA").futureValue - result mustBe Right(3) // All 3 members are under R-M269 - } - - "count only direct members at a leaf branch" in { - when(mockProjectRepo.findById(1)).thenReturn(Future.successful(Some(testProject))) - when(mockMemberRepo.findByProjectAndStatus(1, "ACTIVE")).thenReturn( - Future.successful(Seq( - makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")), - makeMember(2, "did:plc:m2", Some("at://did:plc:m2/biosample/2")) - ))) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleRepo.findByAtUri("at://did:plc:m2/biosample/2")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid2, "at://did:plc:m2/biosample/2")))) - - // m1 -> R-CTS4466, m2 -> R-Z2103 - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(103), None)))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid2)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid2, Some(104), None)))) - - // R-CTS4466 (103) has no descendants - when(mockHaplogroupRepo.getDescendants(103)).thenReturn(Future.successful(Seq.empty)) - - val result = service.getBranchMemberCount(1, 103, "Y_DNA").futureValue - result mustBe Right(1) // Only m1 at CTS4466 - } - - "return error for non-existent project" in { - when(mockProjectRepo.findById(999)).thenReturn(Future.successful(None)) - - val result = service.getBranchMemberCount(999, 101, "Y_DNA").futureValue - result mustBe Left("Project not found") - } - - "return error for invalid lineage type" in { - val result = service.getBranchMemberCount(1, 101, "AUTOSOMAL").futureValue - result mustBe Left("Invalid lineage type: AUTOSOMAL") - } - } - - "ProjectTreeAggregationService.buildTreeFromCounts" should { - - "build tree from a single haplogroup" in { - when(mockHaplogroupRepo.findById(103)).thenReturn(Future.successful(Some(hapRCTS4466))) - when(mockHaplogroupRepo.getAncestors(103)).thenReturn( - Future.successful(Seq(hapR, hapRM269, hapRL151))) - when(mockHaplogroupRepo.getAllRelationships(HaplogroupType.Y)).thenReturn( - Future.successful(allRelationships)) - - // Also set up findById for ancestor nodes - when(mockHaplogroupRepo.findById(100)).thenReturn(Future.successful(Some(hapR))) - when(mockHaplogroupRepo.findById(101)).thenReturn(Future.successful(Some(hapRM269))) - when(mockHaplogroupRepo.findById(102)).thenReturn(Future.successful(Some(hapRL151))) - - val counts = Map(103 -> 5) - val result = service.buildTreeFromCounts(counts, HaplogroupType.Y).futureValue - - result.size mustBe 1 - val root = result.head - root.haplogroupName mustBe "R" - root.memberCount mustBe 0 - root.cumulativeCount mustBe 5 - - val rm269 = root.children.head - rm269.haplogroupName mustBe "R-M269" - rm269.cumulativeCount mustBe 5 - - val rl151 = rm269.children.head - rl151.haplogroupName mustBe "R-L151" - rl151.cumulativeCount mustBe 5 - - val cts4466 = rl151.children.head - cts4466.haplogroupName mustBe "R-CTS4466" - cts4466.memberCount mustBe 5 - cts4466.cumulativeCount mustBe 5 - } - - "build tree with multiple branches" in { - setupHaplogroupLookups() - - val counts = Map(103 -> 3, 104 -> 2, 105 -> 1) - val result = service.buildTreeFromCounts(counts, HaplogroupType.Y).futureValue - - result.size mustBe 1 - val root = result.head - root.haplogroupName mustBe "R" - root.cumulativeCount mustBe 6 - - root.children.map(_.haplogroupName).toSet mustBe Set("R-M269", "R-M198") - - val rm198 = root.children.find(_.haplogroupName == "R-M198").get - rm198.memberCount mustBe 1 - rm198.cumulativeCount mustBe 1 - } - - "return empty for empty counts" in { - val result = service.buildTreeFromCounts(Map.empty, HaplogroupType.Y).futureValue - result mustBe empty - } - } - - "ProjectTreeAggregationService.resolveHaplogroupAssignments" should { - - "resolve Y haplogroup from biosample chain" in { - val member = makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(103), Some(200))))) - - val result = service.resolveHaplogroupAssignments(testProject, HaplogroupType.Y, Seq(member)).futureValue - result mustBe Seq(103) - } - - "resolve MT haplogroup from biosample chain" in { - val member = makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/1")) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/1")) - .thenReturn(Future.successful(Some(makeBiosample(sampleGuid1, "at://did:plc:m1/biosample/1")))) - when(mockBiosampleHgRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(103), Some(200))))) - - val result = service.resolveHaplogroupAssignments(testProject, HaplogroupType.MT, Seq(member)).futureValue - result mustBe Seq(200) - } - - "skip members with unresolvable biosamples" in { - val member = makeMember(1, "did:plc:m1", Some("at://did:plc:m1/biosample/missing")) - - when(mockBiosampleRepo.findByAtUri("at://did:plc:m1/biosample/missing")) - .thenReturn(Future.successful(None)) - - val result = service.resolveHaplogroupAssignments(testProject, HaplogroupType.Y, Seq(member)).futureValue - result mustBe empty - } - } -} diff --git a/test/services/ProposalEngineSpec.scala b/test/services/ProposalEngineSpec.scala deleted file mode 100644 index 05f75113..00000000 --- a/test/services/ProposalEngineSpec.scala +++ /dev/null @@ -1,387 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.HaplogroupType -import models.domain.discovery.* -import org.mockito.ArgumentMatchers.{any, anyInt, anyString} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.{PrivateVariantRepository, ProposedBranchRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class ProposalEngineSpec extends ServiceSpec { - - val mockProposedBranchRepo: ProposedBranchRepository = mock[ProposedBranchRepository] - val mockPrivateVariantRepo: PrivateVariantRepository = mock[PrivateVariantRepository] - - val engine = new ProposalEngine(mockProposedBranchRepo, mockPrivateVariantRepo) - - override def beforeEach(): Unit = { - reset(mockProposedBranchRepo, mockPrivateVariantRepo) - // Default: no config overrides (use defaults) - when(mockProposedBranchRepo.getConfig(any[HaplogroupType], anyString)).thenReturn(Future.successful(None)) - } - - val now: LocalDateTime = LocalDateTime.of(2025, 1, 1, 0, 0) - val sampleGuid: UUID = UUID.randomUUID() - val sampleRef: SampleReference = SampleReference(BiosampleSourceType.External, 1, sampleGuid) - - def makePrivateVariant(variantId: Int, terminalHgId: Int = 100): BiosamplePrivateVariant = - BiosamplePrivateVariant( - id = Some(variantId * 10), - sampleType = BiosampleSourceType.External, - sampleId = 1, - sampleGuid = sampleGuid, - variantId = variantId, - haplogroupType = HaplogroupType.Y, - terminalHaplogroupId = terminalHgId, - discoveredAt = now - ) - - def makeProposal(id: Int, parentHgId: Int = 100, consensusCount: Int = 1, - status: ProposedBranchStatus = ProposedBranchStatus.Pending): ProposedBranch = - ProposedBranch( - id = Some(id), - parentHaplogroupId = parentHgId, - haplogroupType = HaplogroupType.Y, - status = status, - consensusCount = consensusCount, - createdAt = now, - updatedAt = now - ) - - "ProposalEngine" should { - - "return empty when no private variants provided" in { - whenReady(engine.processDiscovery(sampleRef, Seq.empty)) { result => - result mustBe empty - } - } - - "create a new proposal when no existing proposals match" in { - val pvs = Seq(makePrivateVariant(1), makePrivateVariant(2), makePrivateVariant(3)) - - // No existing proposals - when(mockProposedBranchRepo.findByParentAndType(100, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - // Create proposal - val newProposal = makeProposal(1) - when(mockProposedBranchRepo.create(any[ProposedBranch])) - .thenReturn(Future.successful(newProposal)) - - // Add variants - when(mockProposedBranchRepo.addVariant(any[ProposedBranchVariant])).thenAnswer { invocation => - val pbv = invocation.getArgument[ProposedBranchVariant](0) - Future.successful(pbv.copy(id = Some(1))) - } - - // Add evidence - when(mockProposedBranchRepo.addEvidence(any[ProposedBranchEvidence])).thenAnswer { invocation => - val e = invocation.getArgument[ProposedBranchEvidence](0) - Future.successful(e.copy(id = Some(1))) - } - - whenReady(engine.processDiscovery(sampleRef, pvs)) { result => - result must have size 1 - result.head.id mustBe Some(1) - verify(mockProposedBranchRepo).create(any[ProposedBranch]) - } - } - - "match an existing proposal with high Jaccard similarity" in { - val pvs = Seq(makePrivateVariant(1), makePrivateVariant(2), makePrivateVariant(3)) - val existingProposal = makeProposal(10, consensusCount = 1) - - when(mockProposedBranchRepo.findByParentAndType(100, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq(existingProposal))) - - // Existing proposal has variants {1, 2, 3} — perfect match (Jaccard = 1.0) - when(mockProposedBranchRepo.getVariantIds(10)) - .thenReturn(Future.successful(Set(1, 2, 3))) - - // Add evidence - when(mockProposedBranchRepo.addEvidence(any[ProposedBranchEvidence])).thenAnswer { invocation => - val e = invocation.getArgument[ProposedBranchEvidence](0) - Future.successful(e.copy(id = Some(1))) - } - - // Get variants for evidence count update - when(mockProposedBranchRepo.getVariants(10)).thenReturn(Future.successful(Seq( - ProposedBranchVariant(Some(1), 10, 1, true, 1, now, now), - ProposedBranchVariant(Some(2), 10, 2, true, 1, now, now), - ProposedBranchVariant(Some(3), 10, 3, true, 1, now, now) - ))) - - when(mockProposedBranchRepo.updateVariantEvidence(anyInt, anyInt, anyInt)) - .thenReturn(Future.successful(true)) - - when(mockProposedBranchRepo.countEvidence(10)) - .thenReturn(Future.successful(2)) - - when(mockProposedBranchRepo.updateConsensus(anyInt, anyInt, any[Double])) - .thenReturn(Future.successful(true)) - - whenReady(engine.processDiscovery(sampleRef, pvs)) { result => - result must have size 1 - result.head.consensusCount mustBe 2 - // Should NOT create a new proposal - verify(mockProposedBranchRepo, never()).create(any[ProposedBranch]) - } - } - - "create new proposal when Jaccard similarity is too low" in { - val pvs = Seq(makePrivateVariant(1), makePrivateVariant(2), makePrivateVariant(3)) - val existingProposal = makeProposal(10) - - when(mockProposedBranchRepo.findByParentAndType(100, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq(existingProposal))) - - // Existing has {4, 5, 6} — no overlap (Jaccard = 0.0) - when(mockProposedBranchRepo.getVariantIds(10)) - .thenReturn(Future.successful(Set(4, 5, 6))) - - val newProposal = makeProposal(11) - when(mockProposedBranchRepo.create(any[ProposedBranch])) - .thenReturn(Future.successful(newProposal)) - when(mockProposedBranchRepo.addVariant(any[ProposedBranchVariant])).thenAnswer { invocation => - val pbv = invocation.getArgument[ProposedBranchVariant](0) - Future.successful(pbv.copy(id = Some(1))) - } - when(mockProposedBranchRepo.addEvidence(any[ProposedBranchEvidence])).thenAnswer { invocation => - val e = invocation.getArgument[ProposedBranchEvidence](0) - Future.successful(e.copy(id = Some(1))) - } - - whenReady(engine.processDiscovery(sampleRef, pvs)) { result => - result must have size 1 - verify(mockProposedBranchRepo).create(any[ProposedBranch]) - } - } - - "flag partial matches for split review" in { - // Jaccard of 0.5 <= J < 0.8 should flag for split - val pvs = Seq(makePrivateVariant(1), makePrivateVariant(2), makePrivateVariant(3), makePrivateVariant(4)) - val existingProposal = makeProposal(10) - - when(mockProposedBranchRepo.findByParentAndType(100, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq(existingProposal))) - - // Existing has {1, 2, 5, 6} — intersection {1,2}, union {1,2,3,4,5,6} => J = 2/6 = 0.333 - // Actually we need J >= 0.5 for split. Let's use {1, 2, 3, 5}: intersection {1,2,3}, union {1,2,3,4,5} => J = 3/5 = 0.6 - when(mockProposedBranchRepo.getVariantIds(10)) - .thenReturn(Future.successful(Set(1, 2, 3, 5))) - - // No match >= 0.8, so creates a new one - val newProposal = makeProposal(11) - when(mockProposedBranchRepo.create(any[ProposedBranch])) - .thenReturn(Future.successful(newProposal)) - when(mockProposedBranchRepo.addVariant(any[ProposedBranchVariant])).thenAnswer { invocation => - val pbv = invocation.getArgument[ProposedBranchVariant](0) - Future.successful(pbv.copy(id = Some(1))) - } - when(mockProposedBranchRepo.addEvidence(any[ProposedBranchEvidence])).thenAnswer { invocation => - val e = invocation.getArgument[ProposedBranchEvidence](0) - Future.successful(e.copy(id = Some(1))) - } - - // Expect the existing proposal to be updated with split note - when(mockProposedBranchRepo.update(any[ProposedBranch])) - .thenReturn(Future.successful(true)) - - whenReady(engine.processDiscovery(sampleRef, pvs)) { result => - result must have size 1 - // Verify the split candidate was flagged - verify(mockProposedBranchRepo).update(any[ProposedBranch]) - } - } - - "transition proposal to ReadyForReview when consensus threshold met" in { - val pvs = Seq(makePrivateVariant(1), makePrivateVariant(2)) - val existingProposal = makeProposal(10, consensusCount = 2, status = ProposedBranchStatus.Pending) - - when(mockProposedBranchRepo.findByParentAndType(100, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq(existingProposal))) - - // Perfect match - when(mockProposedBranchRepo.getVariantIds(10)) - .thenReturn(Future.successful(Set(1, 2))) - - when(mockProposedBranchRepo.addEvidence(any[ProposedBranchEvidence])).thenAnswer { invocation => - val e = invocation.getArgument[ProposedBranchEvidence](0) - Future.successful(e.copy(id = Some(1))) - } - - when(mockProposedBranchRepo.getVariants(10)).thenReturn(Future.successful(Seq( - ProposedBranchVariant(Some(1), 10, 1, true, 2, now, now), - ProposedBranchVariant(Some(2), 10, 2, true, 2, now, now) - ))) - when(mockProposedBranchRepo.updateVariantEvidence(anyInt, anyInt, anyInt)) - .thenReturn(Future.successful(true)) - - // 3 evidence entries — meets default threshold of 3 - when(mockProposedBranchRepo.countEvidence(10)) - .thenReturn(Future.successful(3)) - - when(mockProposedBranchRepo.updateConsensus(anyInt, anyInt, any[Double])) - .thenReturn(Future.successful(true)) - when(mockProposedBranchRepo.updateStatus(10, ProposedBranchStatus.ReadyForReview)) - .thenReturn(Future.successful(true)) - - whenReady(engine.processDiscovery(sampleRef, pvs)) { result => - result must have size 1 - result.head.status mustBe ProposedBranchStatus.ReadyForReview - verify(mockProposedBranchRepo).updateStatus(10, ProposedBranchStatus.ReadyForReview) - } - } - - "not transition already ReadyForReview proposals" in { - val pvs = Seq(makePrivateVariant(1)) - val existingProposal = makeProposal(10, consensusCount = 5, status = ProposedBranchStatus.ReadyForReview) - - when(mockProposedBranchRepo.findByParentAndType(100, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq(existingProposal))) - - when(mockProposedBranchRepo.getVariantIds(10)) - .thenReturn(Future.successful(Set(1))) - - when(mockProposedBranchRepo.addEvidence(any[ProposedBranchEvidence])).thenAnswer { invocation => - val e = invocation.getArgument[ProposedBranchEvidence](0) - Future.successful(e.copy(id = Some(1))) - } - - when(mockProposedBranchRepo.getVariants(10)).thenReturn(Future.successful(Seq( - ProposedBranchVariant(Some(1), 10, 1, true, 5, now, now) - ))) - when(mockProposedBranchRepo.updateVariantEvidence(anyInt, anyInt, anyInt)) - .thenReturn(Future.successful(true)) - when(mockProposedBranchRepo.countEvidence(10)) - .thenReturn(Future.successful(6)) - when(mockProposedBranchRepo.updateConsensus(anyInt, anyInt, any[Double])) - .thenReturn(Future.successful(true)) - - whenReady(engine.processDiscovery(sampleRef, pvs)) { result => - result must have size 1 - result.head.status mustBe ProposedBranchStatus.ReadyForReview - // Should NOT call updateStatus since already ReadyForReview - verify(mockProposedBranchRepo, never()).updateStatus(anyInt, any[ProposedBranchStatus]) - } - } - - "add new variants from sample that existing proposal doesn't have" in { - // Sample has {1, 2, 3, 4}, proposal has {1, 2, 3} => Jaccard = 3/4 = 0.75... just under 0.8 - // Actually let's make it match: {1, 2, 3, 4} vs {1, 2, 3, 4, 5} => J = 4/5 = 0.8 — exact threshold - val pvs = Seq(makePrivateVariant(1), makePrivateVariant(2), makePrivateVariant(3), makePrivateVariant(4)) - val existingProposal = makeProposal(10) - - when(mockProposedBranchRepo.findByParentAndType(100, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq(existingProposal))) - - when(mockProposedBranchRepo.getVariantIds(10)) - .thenReturn(Future.successful(Set(1, 2, 3, 4, 5))) - - when(mockProposedBranchRepo.addEvidence(any[ProposedBranchEvidence])).thenAnswer { invocation => - val e = invocation.getArgument[ProposedBranchEvidence](0) - Future.successful(e.copy(id = Some(1))) - } - - // No new variants to add (sample {1,2,3,4} is subset of proposal {1,2,3,4,5}) - // But existing shared variants get updated - when(mockProposedBranchRepo.getVariants(10)).thenReturn(Future.successful(Seq( - ProposedBranchVariant(Some(1), 10, 1, true, 1, now, now), - ProposedBranchVariant(Some(2), 10, 2, true, 1, now, now), - ProposedBranchVariant(Some(3), 10, 3, true, 1, now, now), - ProposedBranchVariant(Some(4), 10, 4, true, 1, now, now), - ProposedBranchVariant(Some(5), 10, 5, true, 1, now, now) - ))) - when(mockProposedBranchRepo.updateVariantEvidence(anyInt, anyInt, anyInt)) - .thenReturn(Future.successful(true)) - when(mockProposedBranchRepo.countEvidence(10)) - .thenReturn(Future.successful(2)) - when(mockProposedBranchRepo.updateConsensus(anyInt, anyInt, any[Double])) - .thenReturn(Future.successful(true)) - - whenReady(engine.processDiscovery(sampleRef, pvs)) { result => - result must have size 1 - // Should not create a new proposal - verify(mockProposedBranchRepo, never()).create(any[ProposedBranch]) - } - } - - "handle citizen biosample source type" in { - val citizenGuid = UUID.randomUUID() - val citizenRef = SampleReference(BiosampleSourceType.Citizen, 42, citizenGuid) - val pvs = Seq( - BiosamplePrivateVariant(Some(1), BiosampleSourceType.Citizen, 42, citizenGuid, - variantId = 10, haplogroupType = HaplogroupType.Y, terminalHaplogroupId = 200, discoveredAt = now) - ) - - when(mockProposedBranchRepo.findByParentAndType(200, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - val newProposal = makeProposal(1, parentHgId = 200) - when(mockProposedBranchRepo.create(any[ProposedBranch])) - .thenReturn(Future.successful(newProposal)) - when(mockProposedBranchRepo.addVariant(any[ProposedBranchVariant])).thenAnswer { invocation => - val pbv = invocation.getArgument[ProposedBranchVariant](0) - Future.successful(pbv.copy(id = Some(1))) - } - when(mockProposedBranchRepo.addEvidence(any[ProposedBranchEvidence])).thenAnswer { invocation => - val e = invocation.getArgument[ProposedBranchEvidence](0) - Future.successful(e.copy(id = Some(1))) - } - - whenReady(engine.processDiscovery(citizenRef, pvs)) { result => - result must have size 1 - verify(mockProposedBranchRepo).create(any[ProposedBranch]) - } - } - } - - "jaccardSimilarity" should { - "return 1.0 for identical sets" in { - engine.jaccardSimilarity(Set(1, 2, 3), Set(1, 2, 3)) mustBe 1.0 - } - - "return 0.0 for disjoint sets" in { - engine.jaccardSimilarity(Set(1, 2), Set(3, 4)) mustBe 0.0 - } - - "return correct value for partial overlap" in { - // {1,2,3} vs {2,3,4}: intersection={2,3}=2, union={1,2,3,4}=4, J=0.5 - engine.jaccardSimilarity(Set(1, 2, 3), Set(2, 3, 4)) mustBe 0.5 - } - - "return 1.0 for two empty sets" in { - engine.jaccardSimilarity(Set.empty, Set.empty) mustBe 1.0 - } - - "return 0.0 when one set is empty" in { - engine.jaccardSimilarity(Set(1, 2), Set.empty) mustBe 0.0 - } - } - - "calculateConfidenceScore" should { - "return 0.0 for zero evidence and zero variants" in { - engine.calculateConfidenceScore(0, 0, 0) mustBe 0.0 - } - - "increase with evidence count" in { - val low = engine.calculateConfidenceScore(1, 3, 0) - val high = engine.calculateConfidenceScore(5, 3, 0) - high must be > low - } - - "decrease with variant mismatches" in { - val noMismatch = engine.calculateConfidenceScore(3, 5, 0) - val withMismatch = engine.calculateConfidenceScore(3, 5, 5) - noMismatch must be > withMismatch - } - - "cap at 1.0" in { - engine.calculateConfidenceScore(100, 100, 0) mustBe 1.0 - } - } -} diff --git a/test/services/PublicationDiscoveryServiceSpec.scala b/test/services/PublicationDiscoveryServiceSpec.scala deleted file mode 100644 index 193d0269..00000000 --- a/test/services/PublicationDiscoveryServiceSpec.scala +++ /dev/null @@ -1,156 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.publications.{Publication, PublicationCandidate} -import org.mockito.ArgumentMatchers.{any, anyInt, anyString} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.{PublicationCandidateRepository, PublicationRepository, PublicationSearchConfigRepository, PublicationSearchRunRepository} - -import java.time.{LocalDate, LocalDateTime} -import java.util.UUID -import scala.concurrent.Future - -class PublicationDiscoveryServiceSpec extends ServiceSpec { - - val mockSearchConfigRepo: PublicationSearchConfigRepository = mock[PublicationSearchConfigRepository] - val mockCandidateRepo: PublicationCandidateRepository = mock[PublicationCandidateRepository] - val mockRunRepo: PublicationSearchRunRepository = mock[PublicationSearchRunRepository] - val mockPubRepo: PublicationRepository = mock[PublicationRepository] - val mockPubService: PublicationService = mock[PublicationService] - val mockOpenAlexService: OpenAlexService = mock[OpenAlexService] - val mockRelevanceScoringService: RelevanceScoringService = mock[RelevanceScoringService] - val mockScoringFeedbackService: ScoringFeedbackService = mock[ScoringFeedbackService] - - val service = new PublicationDiscoveryService( - mockSearchConfigRepo, mockCandidateRepo, mockRunRepo, - mockPubRepo, mockPubService, mockOpenAlexService, mockRelevanceScoringService, - mockScoringFeedbackService - ) - - override def beforeEach(): Unit = { - reset(mockSearchConfigRepo, mockCandidateRepo, mockRunRepo, - mockPubRepo, mockPubService, mockOpenAlexService, mockRelevanceScoringService, - mockScoringFeedbackService) - } - - val reviewerId: UUID = UUID.randomUUID() - - def makeCandidate(id: Int, status: String = "pending", doi: Option[String] = Some("10.1234/test")): PublicationCandidate = - PublicationCandidate( - id = Some(id), openAlexId = s"W$id", doi = doi, - title = s"Test Publication $id", `abstract` = Some("Abstract"), - publicationDate = Some(LocalDate.of(2025, 1, 1)), - journalName = Some("Nature"), relevanceScore = Some(0.8), - discoveryDate = LocalDateTime.now(), status = status, - reviewedBy = None, reviewedAt = None, rejectionReason = None, - rawMetadata = None - ) - - "PublicationDiscoveryService" should { - - "acceptCandidate" should { - - "accept and import via DOI" in { - val candidate = makeCandidate(1) - when(mockCandidateRepo.findById(1)).thenReturn(Future.successful(Some(candidate))) - when(mockCandidateRepo.updateStatus(anyInt, anyString, any[Option[UUID]], any[Option[String]])) - .thenReturn(Future.successful(true)) - when(mockPubService.processPublication(anyString, any[Boolean])) - .thenReturn(Future.successful(Some(mock[Publication]))) - - whenReady(service.acceptCandidate(1, reviewerId)) { result => - result mustBe defined - verify(mockCandidateRepo).updateStatus(1, "accepted", Some(reviewerId), None) - verify(mockPubService).processPublication("10.1234/test", true) - } - } - - "return None for nonexistent candidate" in { - when(mockCandidateRepo.findById(999)).thenReturn(Future.successful(None)) - - whenReady(service.acceptCandidate(999, reviewerId)) { result => - result mustBe empty - } - } - - "return None for candidate without DOI" in { - val candidate = makeCandidate(1, doi = None) - when(mockCandidateRepo.findById(1)).thenReturn(Future.successful(Some(candidate))) - when(mockCandidateRepo.updateStatus(anyInt, anyString, any[Option[UUID]], any[Option[String]])) - .thenReturn(Future.successful(true)) - - whenReady(service.acceptCandidate(1, reviewerId)) { result => - result mustBe empty - verify(mockPubService, never()).processPublication(anyString, any[Boolean]) - } - } - } - - "rejectCandidate" should { - - "reject with reason" in { - when(mockCandidateRepo.updateStatus(1, "rejected", Some(reviewerId), Some("Off-topic"))) - .thenReturn(Future.successful(true)) - - whenReady(service.rejectCandidate(1, reviewerId, Some("Off-topic"))) { result => - result mustBe true - } - } - } - - "deferCandidate" should { - - "defer a candidate" in { - when(mockCandidateRepo.updateStatus(1, "deferred", Some(reviewerId), None)) - .thenReturn(Future.successful(true)) - - whenReady(service.deferCandidate(1, reviewerId)) { result => - result mustBe true - } - } - } - - "bulkAcceptCandidates" should { - - "accept multiple candidates" in { - for (id <- 1 to 3) { - val candidate = makeCandidate(id) - when(mockCandidateRepo.findById(id)).thenReturn(Future.successful(Some(candidate))) - when(mockCandidateRepo.updateStatus(id, "accepted", Some(reviewerId), None)) - .thenReturn(Future.successful(true)) - } - when(mockPubService.processPublication(anyString, any[Boolean])) - .thenReturn(Future.successful(Some(mock[Publication]))) - - whenReady(service.bulkAcceptCandidates(Seq(1, 2, 3), reviewerId)) { results => - results must have size 3 - results.count(_.isDefined) mustBe 3 - } - } - } - - "bulkRejectCandidates" should { - - "reject multiple candidates with reason" in { - when(mockCandidateRepo.bulkUpdateStatus(Seq(1, 2), "rejected", reviewerId, Some("Not relevant"))) - .thenReturn(Future.successful(2)) - - whenReady(service.bulkRejectCandidates(Seq(1, 2), reviewerId, Some("Not relevant"))) { count => - count mustBe 2 - } - } - } - - "bulkDeferCandidates" should { - - "defer multiple candidates" in { - when(mockCandidateRepo.bulkUpdateStatus(Seq(1, 2, 3), "deferred", reviewerId, None)) - .thenReturn(Future.successful(3)) - - whenReady(service.bulkDeferCandidates(Seq(1, 2, 3), reviewerId)) { count => - count mustBe 3 - } - } - } - } -} diff --git a/test/services/RelevanceScoringServiceSpec.scala b/test/services/RelevanceScoringServiceSpec.scala deleted file mode 100644 index 30b37158..00000000 --- a/test/services/RelevanceScoringServiceSpec.scala +++ /dev/null @@ -1,252 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.publications.PublicationCandidate -import play.api.Configuration -import play.api.libs.json.Json - -import java.time.{LocalDate, LocalDateTime} - -class RelevanceScoringServiceSpec extends ServiceSpec { - - val config: Configuration = Configuration.from(Map.empty) - val service = new RelevanceScoringService(config) - - def makeCandidate( - title: String = "Test Publication", - abstractText: Option[String] = None, - journalName: Option[String] = None, - rawMetadata: Option[String] = None - ): PublicationCandidate = - PublicationCandidate( - id = Some(1), openAlexId = "W1234", doi = Some("10.1234/test"), - title = title, `abstract` = abstractText, - publicationDate = Some(LocalDate.of(2025, 1, 1)), - journalName = journalName, relevanceScore = None, - discoveryDate = LocalDateTime.now(), status = "pending", - reviewedBy = None, reviewedAt = None, rejectionReason = None, - rawMetadata = rawMetadata.map(Json.parse) - ) - - "RelevanceScoringService" should { - - "calculateKeywordScore" should { - - "score high for primary keywords in title" in { - val candidate = makeCandidate(title = "Y-chromosome haplogroup phylogeny in ancient DNA samples") - val score = service.calculateKeywordScore(candidate) - score must be >= 0.3 // Multiple primary keyword hits - } - - "score for keywords in abstract" in { - val candidate = makeCandidate( - title = "A genetic study", - abstractText = Some("We analyzed Y-DNA haplogroup distributions using whole genome sequencing") - ) - val score = service.calculateKeywordScore(candidate) - score must be > 0.0 - } - - "score zero for unrelated content" in { - val candidate = makeCandidate(title = "Machine learning in financial markets") - val score = service.calculateKeywordScore(candidate) - score mustBe 0.0 - } - - "include secondary keywords at lower weight" in { - val primary = makeCandidate(title = "Y-chromosome haplogroup analysis") - val secondary = makeCandidate(title = "Genetic genealogy and paternal lineage studies") - - val primaryScore = service.calculateKeywordScore(primary) - val secondaryScore = service.calculateKeywordScore(secondary) - - primaryScore must be > secondaryScore - } - - "cap at 1.0 for many keyword matches" in { - val candidate = makeCandidate( - title = "Y-DNA haplogroup phylogenetic analysis of ancient DNA using SNP and Y-STR", - abstractText = Some("Population genetics study using whole genome sequencing with TMRCA molecular clock") - ) - val score = service.calculateKeywordScore(candidate) - score mustBe 1.0 - } - } - - "calculateConceptScore" should { - - "score high for high-value concepts" in { - val metadata = """{"concepts": [ - {"display_name": "Haplogroup", "score": 0.9}, - {"display_name": "Y chromosome", "score": 0.85} - ]}""" - val score = service.calculateConceptScore(Some(Json.parse(metadata))) - score must be >= 0.5 - } - - "score lower for medium-value concepts" in { - val metadata = """{"concepts": [ - {"display_name": "Genetics", "score": 0.9}, - {"display_name": "Genomics", "score": 0.8} - ]}""" - val score = service.calculateConceptScore(Some(Json.parse(metadata))) - score must be > 0.0 - score must be < 1.0 - } - - "return zero for irrelevant concepts" in { - val metadata = """{"concepts": [ - {"display_name": "Computer Science", "score": 0.9}, - {"display_name": "Machine Learning", "score": 0.8} - ]}""" - val score = service.calculateConceptScore(Some(Json.parse(metadata))) - score mustBe 0.0 - } - - "return zero for missing metadata" in { - service.calculateConceptScore(None) mustBe 0.0 - } - - "handle topics field as fallback" in { - val metadata = """{"topics": [ - {"display_name": "Population Genetics", "score": 0.7} - ]}""" - val score = service.calculateConceptScore(Some(Json.parse(metadata))) - score must be > 0.0 - } - } - - "calculateCitationScore" should { - - "use normalized percentile when available" in { - val metadata = """{"citation_normalized_percentile": {"value": 0.85}}""" - val score = service.calculateCitationScore(Some(Json.parse(metadata))) - score mustBe 0.85 - } - - "fall back to cited_by_count with log scaling" in { - val metadata = """{"cited_by_count": 100}""" - val score = service.calculateCitationScore(Some(Json.parse(metadata))) - score must be > 0.0 - score must be < 1.0 - } - - "return zero for uncited papers" in { - val metadata = """{"cited_by_count": 0}""" - val score = service.calculateCitationScore(Some(Json.parse(metadata))) - score mustBe 0.0 - } - - "return zero for missing metadata" in { - service.calculateCitationScore(None) mustBe 0.0 - } - - "cap at 1.0 for highly cited papers" in { - val metadata = """{"cited_by_count": 5000}""" - val score = service.calculateCitationScore(Some(Json.parse(metadata))) - score mustBe 1.0 - } - } - - "calculateJournalScore" should { - - "score 1.0 for high-value journals" in { - service.calculateJournalScore(Some("Nature Genetics")) mustBe 1.0 - service.calculateJournalScore(Some("Molecular Biology and Evolution")) mustBe 1.0 - service.calculateJournalScore(Some("American Journal of Human Genetics")) mustBe 1.0 - } - - "score 0.3 for other known journals" in { - service.calculateJournalScore(Some("BMC Genomics")) mustBe 0.3 - } - - "score 0.0 for missing journal" in { - service.calculateJournalScore(None) mustBe 0.0 - } - - "be case insensitive" in { - service.calculateJournalScore(Some("NATURE GENETICS")) mustBe 1.0 - service.calculateJournalScore(Some("nature genetics")) mustBe 1.0 - } - } - - "score (composite)" should { - - "produce high score for highly relevant paper" in { - val metadata = """{ - "concepts": [{"display_name": "Haplogroup", "score": 0.9}, {"display_name": "Y chromosome", "score": 0.8}], - "citation_normalized_percentile": {"value": 0.9} - }""" - val candidate = makeCandidate( - title = "Y-chromosome haplogroup phylogeny reveals ancient DNA migration patterns", - abstractText = Some("We analyzed Y-DNA SNP and Y-STR data using whole genome sequencing"), - journalName = Some("Nature Genetics"), - rawMetadata = Some(metadata) - ) - val score = service.score(candidate) - score must be >= 0.7 - } - - "produce low score for irrelevant paper" in { - val metadata = """{ - "concepts": [{"display_name": "Computer Science", "score": 0.9}], - "cited_by_count": 2 - }""" - val candidate = makeCandidate( - title = "Deep learning for image classification", - journalName = Some("Journal of AI Research"), - rawMetadata = Some(metadata) - ) - val score = service.score(candidate) - score must be < 0.3 - } - - "be bounded between 0 and 1" in { - val lowCandidate = makeCandidate(title = "Completely unrelated topic") - val highCandidate = makeCandidate( - title = "Y-DNA haplogroup phylogenetic ancient DNA SNP Y-STR whole genome sequencing", - journalName = Some("Nature Genetics"), - rawMetadata = Some("""{"concepts": [{"display_name": "Haplogroup", "score": 1.0}], "citation_normalized_percentile": {"value": 1.0}}""") - ) - - service.score(lowCandidate) must be >= 0.0 - service.score(lowCandidate) must be <= 1.0 - service.score(highCandidate) must be >= 0.0 - service.score(highCandidate) must be <= 1.0 - } - } - - "scoreCandidates" should { - - "update relevance scores for a batch" in { - val candidates = Seq( - makeCandidate(title = "Y-chromosome haplogroup study", journalName = Some("Nature")), - makeCandidate(title = "Unrelated paper on economics") - ) - - val scored = service.scoreCandidates(candidates) - scored must have size 2 - scored.head.relevanceScore mustBe defined - scored(1).relevanceScore mustBe defined - scored.head.relevanceScore.get must be > scored(1).relevanceScore.get - } - } - - "scoreBreakdown" should { - - "return all component scores" in { - val candidate = makeCandidate( - title = "Y-DNA haplogroup analysis", - journalName = Some("Nature Genetics"), - rawMetadata = Some("""{"cited_by_count": 50}""") - ) - - val breakdown = service.scoreBreakdown(candidate) - breakdown.keywordScore must be > 0.0 - breakdown.journalScore mustBe 1.0 - breakdown.citationScore must be > 0.0 - breakdown.compositeScore mustBe service.score(candidate) - } - } - } -} diff --git a/test/services/ScoringFeedbackServiceSpec.scala b/test/services/ScoringFeedbackServiceSpec.scala deleted file mode 100644 index 3276e9c3..00000000 --- a/test/services/ScoringFeedbackServiceSpec.scala +++ /dev/null @@ -1,257 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.publications.PublicationCandidate -import org.mockito.Mockito.when -import play.api.Configuration -import play.api.libs.json.Json -import repositories.PublicationCandidateRepository - -import java.time.{LocalDate, LocalDateTime} -import scala.concurrent.Future - -class ScoringFeedbackServiceSpec extends ServiceSpec { - - val mockCandidateRepo: PublicationCandidateRepository = mock[PublicationCandidateRepository] - val config: Configuration = Configuration.from(Map.empty) - val relevanceScoringService = new RelevanceScoringService(config) - - val service = new ScoringFeedbackService(mockCandidateRepo, relevanceScoringService) - - def makeCandidate( - id: Int, - status: String, - title: String = "Test Publication", - abstractText: Option[String] = None, - journalName: Option[String] = None, - rawMetadata: Option[String] = None - ): PublicationCandidate = - PublicationCandidate( - id = Some(id), openAlexId = s"W$id", doi = Some(s"10.1234/test$id"), - title = title, `abstract` = abstractText, - publicationDate = Some(LocalDate.of(2025, 1, 1)), - journalName = journalName, relevanceScore = Some(0.5), - discoveryDate = LocalDateTime.now(), status = status, - reviewedBy = None, reviewedAt = None, rejectionReason = None, - rawMetadata = rawMetadata.map(Json.parse) - ) - - def acceptedGenomics(id: Int): PublicationCandidate = makeCandidate( - id, "accepted", - title = "Y-chromosome haplogroup phylogeny in ancient DNA", - abstractText = Some("We analyzed Y-DNA SNP data using whole genome sequencing"), - journalName = Some("Nature Genetics"), - rawMetadata = Some("""{"concepts": [{"display_name": "Haplogroup", "score": 0.9}], "citation_normalized_percentile": {"value": 0.8}}""") - ) - - def rejectedIrrelevant(id: Int): PublicationCandidate = makeCandidate( - id, "rejected", - title = "Machine learning for financial market prediction", - journalName = Some("Journal of Finance"), - rawMetadata = Some("""{"concepts": [{"display_name": "Machine Learning", "score": 0.9}], "cited_by_count": 5}""") - ) - - "ScoringFeedbackService" should { - - "computeLearnedWeights" should { - - "return None with insufficient data" in { - val fewCandidates = (1 to 5).map(i => acceptedGenomics(i)) - when(mockCandidateRepo.listReviewed()).thenReturn(Future.successful(fewCandidates)) - - whenReady(service.computeLearnedWeights()) { result => - result mustBe empty - } - } - - "return None when only accepted candidates exist" in { - val candidates = (1 to 12).map(i => acceptedGenomics(i)) - when(mockCandidateRepo.listReviewed()).thenReturn(Future.successful(candidates)) - - whenReady(service.computeLearnedWeights()) { result => - result mustBe empty - } - } - - "return None when only rejected candidates exist" in { - val candidates = (1 to 12).map(i => rejectedIrrelevant(i)) - when(mockCandidateRepo.listReviewed()).thenReturn(Future.successful(candidates)) - - whenReady(service.computeLearnedWeights()) { result => - result mustBe empty - } - } - - "return learned weights with sufficient mixed data" in { - val accepted = (1 to 6).map(i => acceptedGenomics(i)) - val rejected = (7 to 12).map(i => rejectedIrrelevant(i)) - when(mockCandidateRepo.listReviewed()).thenReturn(Future.successful(accepted ++ rejected)) - - whenReady(service.computeLearnedWeights()) { result => - result mustBe defined - val weights = result.get - weights.sampleSize mustBe 12 - - // Weights must sum to ~1.0 - val sum = weights.keywordWeight + weights.conceptWeight + weights.citationWeight + weights.journalWeight - sum mustBe 1.0 +- 0.001 - - // All weights must be positive - weights.keywordWeight must be > 0.0 - weights.conceptWeight must be > 0.0 - weights.citationWeight must be > 0.0 - weights.journalWeight must be > 0.0 - } - } - - "increase weight for discriminative components" in { - val accepted = (1 to 6).map(i => acceptedGenomics(i)) - val rejected = (7 to 12).map(i => rejectedIrrelevant(i)) - when(mockCandidateRepo.listReviewed()).thenReturn(Future.successful(accepted ++ rejected)) - - whenReady(service.computeLearnedWeights()) { result => - val weights = result.get - // Keyword should be highly discriminative (genomics terms vs finance terms) - // so its weight should remain high or increase - weights.discriminativePower("keyword") must be > 0.0 - } - } - } - - "analyzeFeedback" should { - - "return None when no reviewed candidates exist" in { - when(mockCandidateRepo.listReviewed()).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.analyzeFeedback()) { result => - result mustBe empty - } - } - - "return analysis with reviewed candidates" in { - val accepted = (1 to 3).map(i => acceptedGenomics(i)) - val rejected = (4 to 6).map(i => rejectedIrrelevant(i)) - when(mockCandidateRepo.listReviewed()).thenReturn(Future.successful(accepted ++ rejected)) - - whenReady(service.analyzeFeedback()) { result => - result mustBe defined - val analysis = result.get - analysis.totalReviewed mustBe 6 - analysis.acceptedCount mustBe 3 - analysis.rejectedCount mustBe 3 - analysis.acceptedMeans must contain key "keyword" - analysis.rejectedMeans must contain key "keyword" - analysis.componentDiscriminativePower must contain key "keyword" - } - } - - "show higher keyword mean for accepted genomics papers" in { - val accepted = (1 to 3).map(i => acceptedGenomics(i)) - val rejected = (4 to 6).map(i => rejectedIrrelevant(i)) - when(mockCandidateRepo.listReviewed()).thenReturn(Future.successful(accepted ++ rejected)) - - whenReady(service.analyzeFeedback()) { result => - val analysis = result.get - analysis.acceptedMeans("keyword") must be > analysis.rejectedMeans("keyword") - } - } - } - - "computeMeans" should { - - "return zeros for empty breakdowns" in { - val means = service.computeMeans(Seq.empty) - means("keyword") mustBe 0.0 - means("concept") mustBe 0.0 - means("citation") mustBe 0.0 - means("journal") mustBe 0.0 - } - - "compute correct averages" in { - val breakdowns = Seq( - ScoringBreakdown(0.8, 0.6, 0.4, 1.0, 0.7, 0.35, 0.25, 0.20, 0.20), - ScoringBreakdown(0.6, 0.4, 0.2, 0.3, 0.4, 0.35, 0.25, 0.20, 0.20) - ) - val means = service.computeMeans(breakdowns) - means("keyword") mustBe 0.7 +- 0.001 - means("concept") mustBe 0.5 +- 0.001 - means("citation") mustBe 0.3 +- 0.001 - means("journal") mustBe 0.65 +- 0.001 - } - } - - "computeDiscriminativePower" should { - - "measure separation between accepted and rejected" in { - val acceptedBreakdowns = Seq( - ScoringBreakdown(0.9, 0.8, 0.5, 1.0, 0.8, 0.35, 0.25, 0.20, 0.20), - ScoringBreakdown(0.8, 0.7, 0.6, 1.0, 0.75, 0.35, 0.25, 0.20, 0.20) - ) - val rejectedBreakdowns = Seq( - ScoringBreakdown(0.0, 0.0, 0.3, 0.3, 0.1, 0.35, 0.25, 0.20, 0.20), - ScoringBreakdown(0.1, 0.1, 0.2, 0.3, 0.15, 0.35, 0.25, 0.20, 0.20) - ) - - val power = service.computeDiscriminativePower(acceptedBreakdowns, rejectedBreakdowns) - // Keyword: |0.85 - 0.05| = 0.8 - power("keyword") mustBe 0.8 +- 0.001 - // Journal: |1.0 - 0.3| = 0.7 - power("journal") mustBe 0.7 +- 0.001 - // All should be positive - power.values.foreach(_ must be >= 0.0) - } - } - - "deriveWeights" should { - - "produce weights that sum to 1.0" in { - val accepted = (1 to 6).map(i => acceptedGenomics(i)) - val rejected = (7 to 12).map(i => rejectedIrrelevant(i)) - - val weights = service.deriveWeights(accepted, rejected) - val sum = weights.keywordWeight + weights.conceptWeight + weights.citationWeight + weights.journalWeight - sum mustBe 1.0 +- 0.001 - } - - "preserve stability through blending with original weights" in { - val accepted = (1 to 6).map(i => acceptedGenomics(i)) - val rejected = (7 to 12).map(i => rejectedIrrelevant(i)) - - val weights = service.deriveWeights(accepted, rejected) - // No single weight should dominate completely due to blending - weights.keywordWeight must be < 0.9 - weights.conceptWeight must be < 0.9 - weights.citationWeight must be < 0.9 - weights.journalWeight must be < 0.9 - } - } - - "integration with RelevanceScoringService" should { - - "apply learned weights to scoring service" in { - val accepted = (1 to 6).map(i => acceptedGenomics(i)) - val rejected = (7 to 12).map(i => rejectedIrrelevant(i)) - when(mockCandidateRepo.listReviewed()).thenReturn(Future.successful(accepted ++ rejected)) - - val (origKw, origCon, origCit, origJrn) = relevanceScoringService.getActiveWeights - - whenReady(service.computeLearnedWeights()) { result => - val weights = result.get - relevanceScoringService.applyLearnedWeights(weights) - - val (newKw, newCon, newCit, newJrn) = relevanceScoringService.getActiveWeights - // Weights should have changed from defaults - (newKw, newCon, newCit, newJrn) must not be ((origKw, origCon, origCit, origJrn)) - } - } - - "revert to defaults when cleared" in { - relevanceScoringService.applyLearnedWeights(LearnedWeights(0.5, 0.2, 0.2, 0.1, 10, Map.empty)) - relevanceScoringService.getActiveWeights._1 mustBe 0.5 - - relevanceScoringService.clearLearnedWeights() - relevanceScoringService.getActiveWeights._1 mustBe 0.35 - } - } - } -} diff --git a/test/services/StrAgeServiceSpec.scala b/test/services/StrAgeServiceSpec.scala deleted file mode 100644 index 07d2c74d..00000000 --- a/test/services/StrAgeServiceSpec.scala +++ /dev/null @@ -1,299 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.dal.domain.genomics.{BiosampleVariantCall, HaplogroupCharacterState, StrMutationRate} -import models.domain.haplogroups.AgeEstimate -import org.mockito.ArgumentMatchers.{any, anyInt} -import org.mockito.Mockito.{reset, when} -import repositories.{BiosampleVariantCallRepository, HaplogroupCharacterStateRepository, StrMutationRateRepository} - -import java.time.Instant -import scala.concurrent.Future - -class StrAgeServiceSpec extends ServiceSpec { - - val mockStrRateRepo: StrMutationRateRepository = mock[StrMutationRateRepository] - val mockCharStateRepo: HaplogroupCharacterStateRepository = mock[HaplogroupCharacterStateRepository] - val mockVariantCallRepo: BiosampleVariantCallRepository = mock[BiosampleVariantCallRepository] - - val service = new StrAgeService(mockStrRateRepo, mockCharStateRepo, mockVariantCallRepo) - - override def beforeEach(): Unit = { - reset(mockStrRateRepo, mockCharStateRepo, mockVariantCallRepo) - } - - def makeRate(id: Int, markerName: String, rate: Double): StrMutationRate = - StrMutationRate( - id = Some(id), markerName = markerName, - mutationRate = BigDecimal(rate), - mutationRateLower = Some(BigDecimal(rate * 0.8)), - mutationRateUpper = Some(BigDecimal(rate * 1.2)), - source = Some("Ballantyne 2010") - ) - - val rate1: StrMutationRate = makeRate(1, "DYS456", 0.0048) - val rate2: StrMutationRate = makeRate(2, "DYS389I", 0.0022) - val rate3: StrMutationRate = makeRate(3, "DYS19", 0.0028) - val rate4: StrMutationRate = makeRate(4, "DYS391", 0.0058) - val rate5: StrMutationRate = makeRate(5, "DYS390", 0.0031) - - val allRates: Map[Int, StrMutationRate] = Map(1 -> rate1, 2 -> rate2, 3 -> rate3, 4 -> rate4, 5 -> rate5) - - "StrAgeService" should { - - "calculateFromGeneticDistance" should { - - "calculate age from matching markers" in { - val ancestral = Map(1 -> 15, 2 -> 13, 3 -> 14, 4 -> 10, 5 -> 24) - val observed = Map(1 -> 16, 2 -> 13, 3 -> 15, 4 -> 11, 5 -> 23) - // Distances: 1, 0, 1, 1, 1 = total 4 - - val result = service.calculateFromGeneticDistance(ancestral, observed, allRates) - - result.markerCount mustBe 5 - result.totalGeneticDistance mustBe 4 - result.estimate.ybp must be > 0 - result.estimate.ybpLower mustBe defined - result.estimate.ybpUpper mustBe defined - result.method mustBe "STR_GENETIC_DISTANCE" - } - - "return zero for identical haplotypes" in { - val ancestral = Map(1 -> 15, 2 -> 13, 3 -> 14) - val observed = Map(1 -> 15, 2 -> 13, 3 -> 14) - - val result = service.calculateFromGeneticDistance(ancestral, observed, allRates) - - result.estimate.ybp mustBe 0 - result.totalGeneticDistance mustBe 0 - result.markerCount mustBe 3 - } - - "return zero when no common markers" in { - val ancestral = Map(1 -> 15, 2 -> 13) - val observed = Map(3 -> 14, 4 -> 10) - - val result = service.calculateFromGeneticDistance(ancestral, observed, allRates) - - result.estimate.ybp mustBe 0 - result.markerCount mustBe 0 - } - - "handle partial marker overlap" in { - val ancestral = Map(1 -> 15, 2 -> 13, 3 -> 14) - val observed = Map(1 -> 16, 3 -> 15, 4 -> 10) // only 1 and 3 overlap - - val result = service.calculateFromGeneticDistance(ancestral, observed, allRates) - - result.markerCount mustBe 2 // only variantIds 1 and 3 are common - result.totalGeneticDistance mustBe 2 // |16-15| + |15-14| - } - - "produce older estimate with larger genetic distance" in { - val ancestral = Map(1 -> 15, 2 -> 13, 3 -> 14, 4 -> 10, 5 -> 24) - val closeObserved = Map(1 -> 16, 2 -> 13, 3 -> 14, 4 -> 10, 5 -> 24) // distance 1 - val farObserved = Map(1 -> 18, 2 -> 15, 3 -> 17, 4 -> 13, 5 -> 20) // distance 13 - - val closeResult = service.calculateFromGeneticDistance(ancestral, closeObserved, allRates) - val farResult = service.calculateFromGeneticDistance(ancestral, farObserved, allRates) - - farResult.estimate.ybp must be > closeResult.estimate.ybp - } - - "use generation length parameter" in { - val ancestral = Map(1 -> 15, 2 -> 13) - val observed = Map(1 -> 18, 2 -> 15) // distance 5 - - val result33 = service.calculateFromGeneticDistance(ancestral, observed, allRates, 33.0) - val result25 = service.calculateFromGeneticDistance(ancestral, observed, allRates, 25.0) - - // Shorter generation = younger estimate - result25.estimate.ybp must be < result33.estimate.ybp - } - } - - "calculateTmrcaFromStrs" should { - - "calculate TMRCA between two samples" in { - val sample1 = Map(1 -> 15, 2 -> 13, 3 -> 14) - val sample2 = Map(1 -> 17, 2 -> 12, 3 -> 16) - // Distances: 2, 1, 2 = total 5 - - val result = service.calculateTmrcaFromStrs(sample1, sample2, allRates) - - result.markerCount mustBe 3 - result.totalGeneticDistance mustBe 5 - result.estimate.ybp must be > 0 - result.method mustBe "STR_TMRCA" - } - - "return zero for identical samples" in { - val sample = Map(1 -> 15, 2 -> 13) - val result = service.calculateTmrcaFromStrs(sample, sample, allRates) - result.estimate.ybp mustBe 0 - } - - "produce younger TMRCA than single-lineage distance" in { - // TMRCA divides by 2*rate (both lineages), so should be younger - val ancestral = Map(1 -> 15, 2 -> 13, 3 -> 14) - val observed = Map(1 -> 18, 2 -> 15, 3 -> 16) // distance 6 - - val singleLineage = service.calculateFromGeneticDistance(ancestral, observed, allRates) - // For TMRCA, same total distance but divided by 2x rate - val tmrca = service.calculateTmrcaFromStrs(ancestral, observed, allRates) - - tmrca.estimate.ybp must be < singleLineage.estimate.ybp - } - } - - "geneticDistanceConfidenceInterval" should { - - "return wider interval for zero distance" in { - val (lower, upper) = service.geneticDistanceConfidenceInterval(0, 0.01, 5) - lower mustBe 0.0 - upper must be > 0.0 - } - - "produce narrower relative CI with more distance" in { - val (l5, u5) = service.geneticDistanceConfidenceInterval(5, 0.01, 10) - val (l20, u20) = service.geneticDistanceConfidenceInterval(20, 0.01, 10) - - val relWidth5 = (u5 - l5) / 5.0 - val relWidth20 = (u20 - l20) / 20.0 - relWidth20 must be < relWidth5 - } - } - - "multiStepProbability" should { - - "return correct probabilities" in { - service.multiStepProbability(0) mustBe 1.0 - service.multiStepProbability(1) mustBe 0.962 - service.multiStepProbability(-1) mustBe 0.962 - service.multiStepProbability(2) mustBe 0.032 - service.multiStepProbability(-2) mustBe 0.032 - service.multiStepProbability(3) mustBe 0.004 - service.multiStepProbability(5) mustBe 0.001 - } - } - - "calculateForBiosample" should { - - "return None when no ancestral states exist" in { - when(mockCharStateRepo.findStrStatesForHaplogroup(anyInt, any[Seq[Int]])) - .thenReturn(Future.successful(Seq.empty)) - when(mockVariantCallRepo.findByBiosampleAndVariants(anyInt, any[Seq[Int]])) - .thenReturn(Future.successful(Seq.empty)) - when(mockStrRateRepo.findAll()).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.calculateForBiosample(1, 100, Seq(1, 2, 3))) { result => - result mustBe empty - } - } - - "return None when no observed values exist" in { - val states = Seq( - HaplogroupCharacterState(Some(1), 100, 1, "15"), - HaplogroupCharacterState(Some(2), 100, 2, "13") - ) - when(mockCharStateRepo.findStrStatesForHaplogroup(100, Seq(1, 2))) - .thenReturn(Future.successful(states)) - when(mockVariantCallRepo.findByBiosampleAndVariants(1, Seq(1, 2))) - .thenReturn(Future.successful(Seq.empty)) - when(mockStrRateRepo.findAll()) - .thenReturn(Future.successful(Seq(rate1, rate2))) - - whenReady(service.calculateForBiosample(1, 100, Seq(1, 2))) { result => - result mustBe empty - } - } - } - } - - "combineSnpAndStrEstimates" should { - - val snpService = new BranchAgeEstimationService( - mock[repositories.HaplogroupCoreRepository], - mock[repositories.HaplogroupVariantRepository], - mock[repositories.BiosampleCallableLociRepository] - ) - - "produce narrower CI than either estimate alone" in { - val snpResult = AgeEstimateResult( - estimate = AgeEstimate(1000, Some(700), Some(1300)), - snpCount = 12, callableLoci = 15_000_000L, - mutationRate = 8.33e-10, method = "SNP_POISSON" - ) - val strResult = StrAgeEstimateResult( - estimate = AgeEstimate(900, Some(600), Some(1200)), - markerCount = 20, totalGeneticDistance = 8, - method = "STR_GENETIC_DISTANCE" - ) - - val combined = snpService.combineSnpAndStrEstimates(snpResult, strResult) - - combined.method mustBe "COMBINED_SNP_STR" - val combinedWidth = combined.estimate.ybpUpper.get - combined.estimate.ybpLower.get - val snpWidth = 1300 - 700 - val strWidth = 1200 - 600 - combinedWidth must be < snpWidth - combinedWidth must be < strWidth - } - - "weight toward more precise estimate" in { - val preciseSnp = AgeEstimateResult( - estimate = AgeEstimate(1000, Some(900), Some(1100)), // narrow CI - snpCount = 50, callableLoci = 15_000_000L, - mutationRate = 8.33e-10, method = "SNP_POISSON" - ) - val impreciseStr = StrAgeEstimateResult( - estimate = AgeEstimate(800, Some(400), Some(1200)), // wide CI - markerCount = 5, totalGeneticDistance = 3, - method = "STR_GENETIC_DISTANCE" - ) - - val combined = snpService.combineSnpAndStrEstimates(preciseSnp, impreciseStr) - - // Combined should be closer to the precise SNP estimate - val distToSnp = math.abs(combined.estimate.ybp - 1000) - val distToStr = math.abs(combined.estimate.ybp - 800) - distToSnp must be < distToStr - } - - "fall back to SNP-only when STR has zero age" in { - val snpResult = AgeEstimateResult( - estimate = AgeEstimate(1000, Some(700), Some(1300)), - snpCount = 12, callableLoci = 15_000_000L, - mutationRate = 8.33e-10, method = "SNP_POISSON" - ) - val zeroStr = StrAgeEstimateResult( - estimate = AgeEstimate(0, Some(0), Some(0)), - markerCount = 0, totalGeneticDistance = 0, - method = "STR_GENETIC_DISTANCE" - ) - - val combined = snpService.combineSnpAndStrEstimates(snpResult, zeroStr) - - combined.method mustBe "SNP_ONLY" - combined.estimate.ybp mustBe 1000 - } - - "fall back to STR-only when SNP has zero age" in { - val zeroSnp = AgeEstimateResult( - estimate = AgeEstimate(0, Some(0), Some(0)), - snpCount = 0, callableLoci = 15_000_000L, - mutationRate = 8.33e-10, method = "SNP_POISSON" - ) - val strResult = StrAgeEstimateResult( - estimate = AgeEstimate(900, Some(600), Some(1200)), - markerCount = 20, totalGeneticDistance = 8, - method = "STR_GENETIC_DISTANCE" - ) - - val combined = snpService.combineSnpAndStrEstimates(zeroSnp, strResult) - - combined.method mustBe "STR_ONLY" - combined.estimate.ybp mustBe 900 - } - } -} diff --git a/test/services/SubmissionProvenanceServiceSpec.scala b/test/services/SubmissionProvenanceServiceSpec.scala deleted file mode 100644 index 0e0ebbd5..00000000 --- a/test/services/SubmissionProvenanceServiceSpec.scala +++ /dev/null @@ -1,290 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.pds.{PdsNode, PdsSubmission} -import org.mockito.ArgumentMatchers.{any, eq as meq} -import org.mockito.Mockito.{never, reset, verify, when} -import play.api.libs.json.Json -import repositories.{PdsNodeRepository, PdsSubmissionRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class SubmissionProvenanceServiceSpec extends ServiceSpec { - - val mockSubmissionRepo: PdsSubmissionRepository = mock[PdsSubmissionRepository] - val mockNodeRepo: PdsNodeRepository = mock[PdsNodeRepository] - - val service = new SubmissionProvenanceService(mockSubmissionRepo, mockNodeRepo) - - override def beforeEach(): Unit = { - reset(mockSubmissionRepo, mockNodeRepo) - } - - val now: LocalDateTime = LocalDateTime.now() - - val testNode: PdsNode = PdsNode( - id = Some(1), did = "did:plc:user1", pdsUrl = "https://pds.user1.example.com", - softwareVersion = Some("0.1.0"), status = "ONLINE" - ) - - val testSubmission: PdsSubmission = PdsSubmission( - id = Some(1), pdsNodeId = 1, submissionType = "HAPLOGROUP_CALL", - biosampleId = Some(100), proposedValue = "R-CTS4466", - confidenceScore = Some(0.95), algorithmVersion = Some("v2.1"), - softwareVersion = Some("0.1.0"), status = "PENDING" - ) - - val sampleGuid: UUID = UUID.randomUUID() - - "SubmissionProvenanceService.recordSubmission" should { - - "record a haplogroup call submission" in { - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - when(mockSubmissionRepo.create(any[PdsSubmission])) - .thenReturn(Future.successful(testSubmission)) - - val result = service.recordSubmission( - did = "did:plc:user1", - submissionType = "HAPLOGROUP_CALL", - proposedValue = "R-CTS4466", - biosampleId = Some(100), - confidenceScore = Some(0.95), - algorithmVersion = Some("v2.1") - ).futureValue - - result.isRight mustBe true - result.toOption.get.submissionType mustBe "HAPLOGROUP_CALL" - result.toOption.get.proposedValue mustBe "R-CTS4466" - verify(mockSubmissionRepo).create(any[PdsSubmission]) - } - - "record a variant call submission with payload" in { - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - val payload = Json.obj("position" -> 12345, "ref" -> "A", "alt" -> "G") - when(mockSubmissionRepo.create(any[PdsSubmission])) - .thenReturn(Future.successful(testSubmission.copy(submissionType = "VARIANT_CALL", - proposedValue = "chrY:12345:A>G", payload = Some(payload)))) - - val result = service.recordSubmission( - did = "did:plc:user1", - submissionType = "VARIANT_CALL", - proposedValue = "chrY:12345:A>G", - payload = Some(payload) - ).futureValue - - result.isRight mustBe true - } - - "record a submission with biosample GUID" in { - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - when(mockSubmissionRepo.create(any[PdsSubmission])) - .thenReturn(Future.successful(testSubmission.copy(biosampleGuid = Some(sampleGuid)))) - - val result = service.recordSubmission( - did = "did:plc:user1", - submissionType = "HAPLOGROUP_CALL", - proposedValue = "R-L151", - biosampleGuid = Some(sampleGuid) - ).futureValue - - result.isRight mustBe true - } - - "inherit software version from node when not specified" in { - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - when(mockSubmissionRepo.create(any[PdsSubmission])) - .thenAnswer(inv => Future.successful(inv.getArgument[PdsSubmission](0).copy(id = Some(5)))) - - val result = service.recordSubmission( - did = "did:plc:user1", - submissionType = "HAPLOGROUP_CALL", - proposedValue = "R-M269" - ).futureValue - - result.isRight mustBe true - result.toOption.get.softwareVersion mustBe Some("0.1.0") - } - - "reject invalid submission type" in { - val result = service.recordSubmission( - did = "did:plc:user1", - submissionType = "INVALID", - proposedValue = "test" - ).futureValue - - result mustBe Left("Invalid submission type: INVALID") - verify(mockNodeRepo, never()).findByDid(any()) - } - - "reject submission from unregistered PDS" in { - when(mockNodeRepo.findByDid("did:plc:unknown")).thenReturn(Future.successful(None)) - - val result = service.recordSubmission( - did = "did:plc:unknown", - submissionType = "HAPLOGROUP_CALL", - proposedValue = "R-M269" - ).futureValue - - result mustBe Left("PDS node not registered: did:plc:unknown") - } - } - - "SubmissionProvenanceService.acceptSubmission" should { - - "accept a pending submission" in { - when(mockSubmissionRepo.findById(1)).thenReturn(Future.successful(Some(testSubmission))) - when(mockSubmissionRepo.updateStatus(meq(1), meq("ACCEPTED"), meq(Some("curator1")), any())) - .thenReturn(Future.successful(true)) - - val result = service.acceptSubmission(1, "curator1", Some("Verified")).futureValue - result mustBe Right(true) - } - - "reject accepting a non-pending submission" in { - val accepted = testSubmission.copy(status = "ACCEPTED") - when(mockSubmissionRepo.findById(1)).thenReturn(Future.successful(Some(accepted))) - - val result = service.acceptSubmission(1, "curator1").futureValue - result mustBe Left("Cannot update submission with status: ACCEPTED") - } - - "return error for non-existent submission" in { - when(mockSubmissionRepo.findById(999)).thenReturn(Future.successful(None)) - - val result = service.acceptSubmission(999, "curator1").futureValue - result mustBe Left("Submission not found") - } - } - - "SubmissionProvenanceService.rejectSubmission" should { - - "reject a pending submission with notes" in { - when(mockSubmissionRepo.findById(1)).thenReturn(Future.successful(Some(testSubmission))) - when(mockSubmissionRepo.updateStatus(meq(1), meq("REJECTED"), meq(Some("curator1")), - meq(Some("Low confidence")))).thenReturn(Future.successful(true)) - - val result = service.rejectSubmission(1, "curator1", Some("Low confidence")).futureValue - result mustBe Right(true) - } - } - - "SubmissionProvenanceService.supersedeSubmission" should { - - "supersede a pending submission" in { - when(mockSubmissionRepo.findById(1)).thenReturn(Future.successful(Some(testSubmission))) - when(mockSubmissionRepo.updateStatus(meq(1), meq("SUPERSEDED"), meq(Some("system")), any())) - .thenReturn(Future.successful(true)) - - val result = service.supersedeSubmission(1, "system", Some("Newer call available")).futureValue - result mustBe Right(true) - } - } - - "SubmissionProvenanceService.getSubmissionsForNode" should { - - "return submissions for a node" in { - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - when(mockSubmissionRepo.findByNode(any[Int], any[Int])) - .thenReturn(Future.successful(Seq(testSubmission))) - - val result = service.getSubmissionsForNode("did:plc:user1").futureValue - result.isRight mustBe true - result.toOption.get.size mustBe 1 - } - - "filter by submission type" in { - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - when(mockSubmissionRepo.findByNodeAndType(1, "VARIANT_CALL")) - .thenReturn(Future.successful(Seq.empty)) - - val result = service.getSubmissionsForNode("did:plc:user1", Some("VARIANT_CALL")).futureValue - result.isRight mustBe true - result.toOption.get mustBe empty - } - - "return error for unknown node" in { - when(mockNodeRepo.findByDid("did:plc:unknown")).thenReturn(Future.successful(None)) - - val result = service.getSubmissionsForNode("did:plc:unknown").futureValue - result mustBe Left("PDS node not found") - } - } - - "SubmissionProvenanceService.getPendingSubmissions" should { - - "return all pending submissions" in { - when(mockSubmissionRepo.findByStatus("PENDING", 100)) - .thenReturn(Future.successful(Seq(testSubmission))) - - val result = service.getPendingSubmissions().futureValue - result.size mustBe 1 - } - - "filter pending by type" in { - when(mockSubmissionRepo.findByTypeAndStatus("BRANCH_PROPOSAL", "PENDING", 50)) - .thenReturn(Future.successful(Seq.empty)) - - val result = service.getPendingSubmissions(Some("BRANCH_PROPOSAL"), 50).futureValue - result mustBe empty - } - } - - "SubmissionProvenanceService.getNodeSubmissionSummary" should { - - "compute submission summary for a node" in { - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - when(mockSubmissionRepo.countByNodeAndStatus(1)).thenReturn(Future.successful( - Map("PENDING" -> 3, "ACCEPTED" -> 15, "REJECTED" -> 2, "SUPERSEDED" -> 1) - )) - - val result = service.getNodeSubmissionSummary("did:plc:user1").futureValue - result.isRight mustBe true - - val summary = result.toOption.get - summary.totalSubmissions mustBe 21 - summary.pendingCount mustBe 3 - summary.acceptedCount mustBe 15 - summary.rejectedCount mustBe 2 - summary.acceptanceRate mustBe (15.0 / 17.0) +- 0.001 - } - - "handle node with no submissions" in { - when(mockNodeRepo.findByDid("did:plc:user1")).thenReturn(Future.successful(Some(testNode))) - when(mockSubmissionRepo.countByNodeAndStatus(1)).thenReturn(Future.successful(Map.empty)) - - val result = service.getNodeSubmissionSummary("did:plc:user1").futureValue - result.isRight mustBe true - val summary = result.toOption.get - summary.totalSubmissions mustBe 0 - summary.acceptanceRate mustBe 0.0 - } - - "return error for unknown node" in { - when(mockNodeRepo.findByDid("did:plc:unknown")).thenReturn(Future.successful(None)) - - val result = service.getNodeSubmissionSummary("did:plc:unknown").futureValue - result mustBe Left("PDS node not found") - } - } - - "SubmissionProvenanceService.getSubmissionsForBiosample" should { - - "return submissions for a biosample ID" in { - when(mockSubmissionRepo.findByBiosampleId(100)) - .thenReturn(Future.successful(Seq(testSubmission))) - - val result = service.getSubmissionsForBiosample(100).futureValue - result.size mustBe 1 - } - - "return submissions for a biosample GUID" in { - when(mockSubmissionRepo.findByBiosampleGuid(sampleGuid)) - .thenReturn(Future.successful(Seq(testSubmission.copy(biosampleGuid = Some(sampleGuid))))) - - val result = service.getSubmissionsForBiosampleGuid(sampleGuid).futureValue - result.size mustBe 1 - } - } -} diff --git a/test/services/TargetedSequencingServiceSpec.scala b/test/services/TargetedSequencingServiceSpec.scala deleted file mode 100644 index 1ec7d5cb..00000000 --- a/test/services/TargetedSequencingServiceSpec.scala +++ /dev/null @@ -1,241 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.domain.genomics.* -import org.mockito.Mockito.{reset, when} -import repositories.{TestTypeRepository, TestTypeTargetRegionRepository} - -import scala.concurrent.Future - -class TargetedSequencingServiceSpec extends ServiceSpec { - - val mockTestTypeRepo: TestTypeRepository = mock[TestTypeRepository] - val mockTargetRegionRepo: TestTypeTargetRegionRepository = mock[TestTypeTargetRegionRepository] - - val service = new TargetedSequencingService(mockTestTypeRepo, mockTargetRegionRepo) - - override def beforeEach(): Unit = { - reset(mockTestTypeRepo, mockTargetRegionRepo) - } - - val bigY700: TestTypeRow = TestTypeRow( - id = Some(10), code = "BIG_Y_700", displayName = "FTDNA Big Y-700", - category = DataGenerationMethod.Sequencing, vendor = Some("FamilyTreeDNA"), - targetType = TargetType.YChromosome, - expectedMinDepth = Some(30.0), expectedTargetDepth = Some(50.0), - supportsHaplogroupY = true, supportsHaplogroupMt = false, - supportsAutosomalIbd = false, supportsAncestry = false, - typicalFileFormats = List("BAM", "VCF", "BED") - ) - - val mtFullSeq: TestTypeRow = TestTypeRow( - id = Some(11), code = "MT_FULL_SEQUENCE", displayName = "mtDNA Full Sequence", - category = DataGenerationMethod.Sequencing, vendor = Some("FamilyTreeDNA"), - targetType = TargetType.MtDna, - expectedMinDepth = Some(500.0), expectedTargetDepth = Some(1000.0), - supportsHaplogroupY = false, supportsHaplogroupMt = true, - supportsAutosomalIbd = false, supportsAncestry = false, - typicalFileFormats = List("BAM", "FASTA", "VCF") - ) - - val bigY500: TestTypeRow = TestTypeRow( - id = Some(12), code = "BIG_Y_500", displayName = "FTDNA Big Y-500 (Legacy)", - category = DataGenerationMethod.Sequencing, vendor = Some("FamilyTreeDNA"), - targetType = TargetType.YChromosome, - successorTestTypeId = Some(10), - supportsHaplogroupY = true, supportsHaplogroupMt = false, - supportsAutosomalIbd = false, supportsAncestry = false, - typicalFileFormats = List("BAM", "VCF", "BED") - ) - - val wgs: TestTypeRow = TestTypeRow( - id = Some(1), code = "WGS", displayName = "Whole Genome Sequencing", - category = DataGenerationMethod.Sequencing, - targetType = TargetType.WholeGenome, - supportsHaplogroupY = true, supportsHaplogroupMt = true, - supportsAutosomalIbd = true, supportsAncestry = true, - typicalFileFormats = List("BAM", "CRAM", "VCF") - ) - - val bigYRegion: TestTypeTargetRegion = TestTypeTargetRegion( - id = Some(1), testTypeId = 10, contigName = "chrY", - startPosition = Some(2781480), endPosition = Some(56887903), - regionName = "Y Combbed Region", regionType = "TARGETED_SNPS", - expectedCoveragePct = Some(0.95), expectedMinDepth = Some(30.0) - ) - - val mtRegion: TestTypeTargetRegion = TestTypeTargetRegion( - id = Some(2), testTypeId = 11, contigName = "chrM", - startPosition = Some(1), endPosition = Some(16569), - regionName = "Full Mitochondrial Genome", regionType = "FULL", - expectedCoveragePct = Some(0.999), expectedMinDepth = Some(500.0) - ) - - "TargetedSequencingService" should { - - "return capabilities for Big Y-700" in { - when(mockTestTypeRepo.findByCode("BIG_Y_700")).thenReturn(Future.successful(Some(bigY700))) - when(mockTargetRegionRepo.findByTestTypeId(10)).thenReturn(Future.successful(Seq(bigYRegion))) - - whenReady(service.getTargetedTestCapabilities("BIG_Y_700")) { result => - result mustBe defined - val caps = result.get - caps.supportsYDna mustBe true - caps.supportsMtDna mustBe false - caps.primaryContig mustBe Some("chrY") - caps.totalTargetedBases mustBe Some(54106424L) - caps.targetRegions must have size 1 - } - } - - "return capabilities for mtDNA Full Sequence" in { - when(mockTestTypeRepo.findByCode("MT_FULL_SEQUENCE")).thenReturn(Future.successful(Some(mtFullSeq))) - when(mockTargetRegionRepo.findByTestTypeId(11)).thenReturn(Future.successful(Seq(mtRegion))) - - whenReady(service.getTargetedTestCapabilities("MT_FULL_SEQUENCE")) { result => - result mustBe defined - val caps = result.get - caps.supportsYDna mustBe false - caps.supportsMtDna mustBe true - caps.primaryContig mustBe Some("chrM") - caps.totalTargetedBases mustBe Some(16569L) - } - } - - "return None for unknown test type" in { - when(mockTestTypeRepo.findByCode("UNKNOWN")).thenReturn(Future.successful(None)) - - whenReady(service.getTargetedTestCapabilities("UNKNOWN")) { result => - result mustBe None - } - } - - "assess coverage as HIGH when meeting expectations" in { - when(mockTestTypeRepo.findByCode("BIG_Y_700")).thenReturn(Future.successful(Some(bigY700))) - when(mockTargetRegionRepo.findByTestTypeId(10)).thenReturn(Future.successful(Seq(bigYRegion))) - - whenReady(service.assessCoverage("BIG_Y_700", Some(55.0), Some(0.96))) { result => - result mustBe defined - val assessment = result.get - assessment.qualityTier mustBe "HIGH" - assessment.overallMeetsExpectation mustBe true - assessment.targetRegions.head.meetsExpectation mustBe true - } - } - - "assess coverage as LOW when below expectations" in { - when(mockTestTypeRepo.findByCode("BIG_Y_700")).thenReturn(Future.successful(Some(bigY700))) - when(mockTargetRegionRepo.findByTestTypeId(10)).thenReturn(Future.successful(Seq(bigYRegion))) - - whenReady(service.assessCoverage("BIG_Y_700", Some(15.0), Some(0.60))) { result => - result mustBe defined - val assessment = result.get - assessment.qualityTier mustBe "LOW" - assessment.overallMeetsExpectation mustBe false - assessment.targetRegions.head.meetsExpectation mustBe false - } - } - - "assess mtDNA coverage correctly" in { - when(mockTestTypeRepo.findByCode("MT_FULL_SEQUENCE")).thenReturn(Future.successful(Some(mtFullSeq))) - when(mockTargetRegionRepo.findByTestTypeId(11)).thenReturn(Future.successful(Seq(mtRegion))) - - whenReady(service.assessCoverage("MT_FULL_SEQUENCE", Some(1200.0), Some(0.999))) { result => - result mustBe defined - val assessment = result.get - assessment.qualityTier mustBe "HIGH" - assessment.overallMeetsExpectation mustBe true - } - } - - "return INSUFFICIENT for very low coverage" in { - when(mockTestTypeRepo.findByCode("BIG_Y_700")).thenReturn(Future.successful(Some(bigY700))) - when(mockTargetRegionRepo.findByTestTypeId(10)).thenReturn(Future.successful(Seq(bigYRegion))) - - whenReady(service.assessCoverage("BIG_Y_700", Some(5.0), Some(0.30))) { result => - result mustBe defined - result.get.qualityTier mustBe "INSUFFICIENT" - } - } - - "return None when assessing coverage for type with no regions" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgs))) - when(mockTargetRegionRepo.findByTestTypeId(1)).thenReturn(Future.successful(Seq.empty)) - - whenReady(service.assessCoverage("WGS", Some(30.0), Some(0.95))) { result => - result mustBe None - } - } - - "list targeted Y-DNA tests" in { - when(mockTestTypeRepo.findByCapability( - org.mockito.ArgumentMatchers.eq(Some(true)), - org.mockito.ArgumentMatchers.any[Option[Boolean]], - org.mockito.ArgumentMatchers.any[Option[Boolean]], - org.mockito.ArgumentMatchers.any[Option[Boolean]] - )).thenReturn(Future.successful(Seq(bigY700, wgs))) - - whenReady(service.getTargetedYTests) { tests => - tests must have size 1 - tests.head.code mustBe "BIG_Y_700" - } - } - - "list targeted mtDNA tests" in { - when(mockTestTypeRepo.findByCapability( - org.mockito.ArgumentMatchers.any[Option[Boolean]], - org.mockito.ArgumentMatchers.eq(Some(true)), - org.mockito.ArgumentMatchers.any[Option[Boolean]], - org.mockito.ArgumentMatchers.any[Option[Boolean]] - )).thenReturn(Future.successful(Seq(mtFullSeq, wgs))) - - whenReady(service.getTargetedMtTests) { tests => - tests must have size 1 - tests.head.code mustBe "MT_FULL_SEQUENCE" - } - } - - "find upgrade path from Big Y-500 to Big Y-700" in { - when(mockTestTypeRepo.findByCode("BIG_Y_500")).thenReturn(Future.successful(Some(bigY500))) - when(mockTestTypeRepo.getTestTypeRowsByIds(Seq(10))).thenReturn(Future.successful(Seq(bigY700))) - - whenReady(service.findUpgradePath("BIG_Y_500")) { result => - result mustBe defined - result.get.code mustBe "BIG_Y_700" - } - } - - "return None for upgrade path when no successor" in { - when(mockTestTypeRepo.findByCode("BIG_Y_700")).thenReturn(Future.successful(Some(bigY700))) - - whenReady(service.findUpgradePath("BIG_Y_700")) { result => - result mustBe None - } - } - - "identify targeted test types" in { - when(mockTestTypeRepo.findByCode("BIG_Y_700")).thenReturn(Future.successful(Some(bigY700))) - - whenReady(service.isTargetedTest("BIG_Y_700")) { result => - result mustBe true - } - } - - "identify non-targeted test types" in { - when(mockTestTypeRepo.findByCode("WGS")).thenReturn(Future.successful(Some(wgs))) - - whenReady(service.isTargetedTest("WGS")) { result => - result mustBe false - } - } - - "calculate quality tier boundaries correctly" in { - TargetedCoverageAssessment.qualityTierFromCoverage(0.99) mustBe "HIGH" - TargetedCoverageAssessment.qualityTierFromCoverage(0.95) mustBe "HIGH" - TargetedCoverageAssessment.qualityTierFromCoverage(0.90) mustBe "MEDIUM" - TargetedCoverageAssessment.qualityTierFromCoverage(0.80) mustBe "MEDIUM" - TargetedCoverageAssessment.qualityTierFromCoverage(0.60) mustBe "LOW" - TargetedCoverageAssessment.qualityTierFromCoverage(0.30) mustBe "INSUFFICIENT" - } - } -} diff --git a/test/services/TerminalVariantClusteringServiceSpec.scala b/test/services/TerminalVariantClusteringServiceSpec.scala deleted file mode 100644 index 87763e95..00000000 --- a/test/services/TerminalVariantClusteringServiceSpec.scala +++ /dev/null @@ -1,270 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.HaplogroupType -import models.domain.discovery.* -import models.domain.haplogroups.Haplogroup -import org.mockito.ArgumentMatchers.{any, anyInt, anyString} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.{HaplogroupCoreRepository, PrivateVariantRepository, ProposedBranchRepository} - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class TerminalVariantClusteringServiceSpec extends ServiceSpec { - - val mockPrivateVariantRepo: PrivateVariantRepository = mock[PrivateVariantRepository] - val mockProposedBranchRepo: ProposedBranchRepository = mock[ProposedBranchRepository] - val mockCoreRepo: HaplogroupCoreRepository = mock[HaplogroupCoreRepository] - - // ProposalEngine needs its own repos - val proposalEngine = new ProposalEngine(mockProposedBranchRepo, mockPrivateVariantRepo) - - val service = new TerminalVariantClusteringService( - mockPrivateVariantRepo, proposalEngine, mockCoreRepo, mockProposedBranchRepo - ) - - override def beforeEach(): Unit = { - reset(mockPrivateVariantRepo, mockProposedBranchRepo, mockCoreRepo) - when(mockProposedBranchRepo.getConfig(any[HaplogroupType], anyString)) - .thenReturn(Future.successful(None)) - } - - val now: LocalDateTime = LocalDateTime.of(2025, 6, 1, 12, 0) - - def makeGuid(): UUID = UUID.randomUUID() - - def makePV(sampleId: Int, sampleGuid: UUID, variantId: Int, terminalHgId: Int = 100): BiosamplePrivateVariant = - BiosamplePrivateVariant( - id = Some(sampleId * 100 + variantId), - sampleType = BiosampleSourceType.External, - sampleId = sampleId, - sampleGuid = sampleGuid, - variantId = variantId, - haplogroupType = HaplogroupType.Y, - terminalHaplogroupId = terminalHgId, - discoveredAt = now, - status = PrivateVariantStatus.Active - ) - - val parentHg: Haplogroup = Haplogroup( - id = Some(100), name = "R-M269", lineage = Some("R>R-M269"), - description = None, haplogroupType = HaplogroupType.Y, - revisionId = 1, source = "backbone", confidenceLevel = "high", - validFrom = now, validUntil = None - ) - - "groupBySample" should { - "group variants by sample key" in { - val guid1 = makeGuid() - val guid2 = makeGuid() - val pvs = Seq( - makePV(1, guid1, 10), makePV(1, guid1, 20), - makePV(2, guid2, 10), makePV(2, guid2, 30) - ) - - val result = service.groupBySample(pvs) - result must have size 2 - result(SampleKey(BiosampleSourceType.External, 1, guid1)) mustBe Set(10, 20) - result(SampleKey(BiosampleSourceType.External, 2, guid2)) mustBe Set(10, 30) - } - - "handle empty input" in { - service.groupBySample(Seq.empty) mustBe empty - } - } - - "findClusters" should { - "find exact clusters when samples share identical variant sets" in { - val guid1 = makeGuid() - val guid2 = makeGuid() - val guid3 = makeGuid() - val sampleSets = Map( - SampleKey(BiosampleSourceType.External, 1, guid1) -> Set(10, 20, 30), - SampleKey(BiosampleSourceType.External, 2, guid2) -> Set(10, 20, 30), - SampleKey(BiosampleSourceType.External, 3, guid3) -> Set(40, 50) - ) - - val clusters = service.findClusters(sampleSets, minClusterSize = 2, minVariantsPerCluster = 1) - val exactClusters = clusters.filter(_.clusterType == ClusterType.Exact) - exactClusters must have size 1 - exactClusters.head.variantIds mustBe Set(10, 20, 30) - exactClusters.head.supportingSamples must have size 2 - } - - "return empty when fewer samples than minClusterSize" in { - val guid1 = makeGuid() - val sampleSets = Map( - SampleKey(BiosampleSourceType.External, 1, guid1) -> Set(10, 20) - ) - - val clusters = service.findClusters(sampleSets, minClusterSize = 2, minVariantsPerCluster = 1) - clusters mustBe empty - } - - "respect minVariantsPerCluster threshold" in { - val guid1 = makeGuid() - val guid2 = makeGuid() - val sampleSets = Map( - SampleKey(BiosampleSourceType.External, 1, guid1) -> Set(10), - SampleKey(BiosampleSourceType.External, 2, guid2) -> Set(10) - ) - - // Require at least 2 variants per cluster - val clusters = service.findClusters(sampleSets, minClusterSize = 2, minVariantsPerCluster = 2) - val exactClusters = clusters.filter(_.clusterType == ClusterType.Exact) - exactClusters mustBe empty - } - - "find core clusters from overlapping variant sets" in { - val guid1 = makeGuid() - val guid2 = makeGuid() - val guid3 = makeGuid() - // Samples share core {10, 20} but differ on additional variants - val sampleSets = Map( - SampleKey(BiosampleSourceType.External, 1, guid1) -> Set(10, 20, 30), - SampleKey(BiosampleSourceType.External, 2, guid2) -> Set(10, 20, 40), - SampleKey(BiosampleSourceType.External, 3, guid3) -> Set(10, 20, 50) - ) - - val clusters = service.findClusters(sampleSets, minClusterSize = 3, minVariantsPerCluster = 2) - val coreClusters = clusters.filter(_.clusterType == ClusterType.Core) - coreClusters.size must be >= 1 - // The core {10, 20} should appear as it's shared by all 3 samples - coreClusters.exists(_.variantIds == Set(10, 20)) mustBe true - } - - "not produce redundant subset clusters" in { - val guid1 = makeGuid() - val guid2 = makeGuid() - val sampleSets = Map( - SampleKey(BiosampleSourceType.External, 1, guid1) -> Set(10, 20, 30), - SampleKey(BiosampleSourceType.External, 2, guid2) -> Set(10, 20, 30) - ) - - // Should produce one exact cluster, not additional core subsets - val clusters = service.findClusters(sampleSets, minClusterSize = 2, minVariantsPerCluster = 1) - val exactClusters = clusters.filter(_.clusterType == ClusterType.Exact) - exactClusters must have size 1 - // Core clusters for subsets {10,20}, {10,30}, {20,30} should be filtered out - // since {10,20,30} is a superset with same supporter count - val coreClusters = clusters.filter(_.clusterType == ClusterType.Core) - coreClusters mustBe empty - } - } - - "findCoreClusters" should { - "find intersections across sample pairs" in { - val guid1 = makeGuid() - val guid2 = makeGuid() - val sampleSets = Map( - SampleKey(BiosampleSourceType.External, 1, guid1) -> Set(10, 20, 30), - SampleKey(BiosampleSourceType.External, 2, guid2) -> Set(10, 20, 40) - ) - - val cores = service.findCoreClusters(sampleSets, minClusterSize = 2, minVariantsPerCluster = 2) - cores must have size 1 - cores.head.variantIds mustBe Set(10, 20) - } - - "return empty for insufficient samples" in { - val guid1 = makeGuid() - val sampleSets = Map( - SampleKey(BiosampleSourceType.External, 1, guid1) -> Set(10, 20) - ) - service.findCoreClusters(sampleSets, minClusterSize = 2, minVariantsPerCluster = 1) mustBe empty - } - } - - "clusterForTerminal" should { - "cluster private variants and feed into proposal engine" in { - val guid1 = makeGuid() - val guid2 = makeGuid() - val pvs = Seq( - makePV(1, guid1, 10), makePV(1, guid1, 20), - makePV(2, guid2, 10), makePV(2, guid2, 20) - ) - - when(mockPrivateVariantRepo.findByTerminalHaplogroup(100)) - .thenReturn(Future.successful(pvs)) - - // No existing proposals - when(mockProposedBranchRepo.findByParentAndType(100, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - // Create proposal - val newProposal = ProposedBranch( - id = Some(1), parentHaplogroupId = 100, haplogroupType = HaplogroupType.Y, - consensusCount = 1, createdAt = now, updatedAt = now - ) - when(mockProposedBranchRepo.create(any[ProposedBranch])) - .thenReturn(Future.successful(newProposal)) - when(mockProposedBranchRepo.addVariant(any[ProposedBranchVariant])).thenAnswer { invocation => - val pbv = invocation.getArgument[ProposedBranchVariant](0) - Future.successful(pbv.copy(id = Some(1))) - } - when(mockProposedBranchRepo.addEvidence(any[ProposedBranchEvidence])).thenAnswer { invocation => - val e = invocation.getArgument[ProposedBranchEvidence](0) - Future.successful(e.copy(id = Some(1))) - } - when(mockProposedBranchRepo.getEvidence(1)).thenReturn(Future.successful(Seq.empty)) - - // Name suggestion - when(mockCoreRepo.findById(100)).thenReturn(Future.successful(Some(parentHg))) - when(mockProposedBranchRepo.findById(1)).thenReturn(Future.successful(Some(newProposal))) - when(mockProposedBranchRepo.update(any[ProposedBranch])).thenReturn(Future.successful(true)) - - whenReady(service.clusterForTerminal(100, HaplogroupType.Y)) { result => - result.samplesAnalyzed mustBe 2 - result.clusters must not be empty - result.proposalsCreated must be >= 1 - } - } - - "return empty result when no private variants exist" in { - when(mockPrivateVariantRepo.findByTerminalHaplogroup(100)) - .thenReturn(Future.successful(Seq.empty)) - - whenReady(service.clusterForTerminal(100, HaplogroupType.Y)) { result => - result.samplesAnalyzed mustBe 0 - result.clusters mustBe empty - result.proposalsCreated mustBe 0 - } - } - - "skip invalidated private variants" in { - val guid1 = makeGuid() - val pvs = Seq( - makePV(1, guid1, 10).copy(status = PrivateVariantStatus.Invalidated), - makePV(1, guid1, 20).copy(status = PrivateVariantStatus.Invalidated) - ) - - when(mockPrivateVariantRepo.findByTerminalHaplogroup(100)) - .thenReturn(Future.successful(pvs)) - - whenReady(service.clusterForTerminal(100, HaplogroupType.Y)) { result => - result.samplesAnalyzed mustBe 0 - result.clusters mustBe empty - } - } - } - - "suggestBranchName" should { - "generate name from parent haplogroup" in { - when(mockCoreRepo.findById(100)).thenReturn(Future.successful(Some(parentHg))) - - whenReady(service.suggestBranchName(100, 0)) { result => - result mustBe Some("R-M269-proposed-1") - } - } - - "return None when parent not found" in { - when(mockCoreRepo.findById(999)).thenReturn(Future.successful(None)) - - whenReady(service.suggestBranchName(999, 0)) { result => - result mustBe None - } - } - } -} diff --git a/test/services/TreeEvolutionServiceSpec.scala b/test/services/TreeEvolutionServiceSpec.scala deleted file mode 100644 index 58d86294..00000000 --- a/test/services/TreeEvolutionServiceSpec.scala +++ /dev/null @@ -1,254 +0,0 @@ -package services - -import helpers.ServiceSpec -import models.HaplogroupType -import models.domain.discovery.* -import models.domain.genomics.BiosampleHaplogroup -import models.domain.haplogroups.Haplogroup -import org.mockito.ArgumentMatchers.{any, anyInt, anyString} -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.* - -import java.time.LocalDateTime -import java.util.UUID -import scala.concurrent.Future - -class TreeEvolutionServiceSpec extends ServiceSpec { - - val mockCoreRepo: HaplogroupCoreRepository = mock[HaplogroupCoreRepository] - val mockVariantRepo: HaplogroupVariantRepository = mock[HaplogroupVariantRepository] - val mockProposedBranchRepo: ProposedBranchRepository = mock[ProposedBranchRepository] - val mockPrivateVariantRepo: PrivateVariantRepository = mock[PrivateVariantRepository] - val mockBhRepo: BiosampleHaplogroupRepository = mock[BiosampleHaplogroupRepository] - val mockCuratorActionRepo: CuratorActionRepository = mock[CuratorActionRepository] - - val service = new TreeEvolutionService( - mockCoreRepo, mockVariantRepo, mockProposedBranchRepo, - mockPrivateVariantRepo, mockBhRepo, mockCuratorActionRepo - ) - - override def beforeEach(): Unit = { - reset(mockCoreRepo, mockVariantRepo, mockProposedBranchRepo, - mockPrivateVariantRepo, mockBhRepo, mockCuratorActionRepo) - } - - val now: LocalDateTime = LocalDateTime.of(2025, 6, 1, 12, 0) - val curatorId = "curator@decodingus.org" - - val sampleGuid1: UUID = UUID.randomUUID() - val sampleGuid2: UUID = UUID.randomUUID() - - def makeAcceptedProposal(id: Int, parentHgId: Int = 100): ProposedBranch = - ProposedBranch( - id = Some(id), - parentHaplogroupId = parentHgId, - proposedName = Some("R-Z9999"), - haplogroupType = HaplogroupType.Y, - status = ProposedBranchStatus.Accepted, - consensusCount = 5, - confidenceScore = 0.85, - createdAt = now, - updatedAt = now, - reviewedAt = Some(now), - reviewedBy = Some(curatorId) - ) - - "TreeEvolutionService" should { - - "promote an accepted proposal to the tree" in { - val proposal = makeAcceptedProposal(10) - - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - - // Create haplogroup - when(mockCoreRepo.createWithParent(any[Haplogroup], any[Option[Int]], anyString)) - .thenReturn(Future.successful((500, Some(1)))) - - // Defining variants - val variants = Seq( - ProposedBranchVariant(Some(1), 10, 42, isDefining = true, 5, now, now), - ProposedBranchVariant(Some(2), 10, 43, isDefining = true, 5, now, now), - ProposedBranchVariant(Some(3), 10, 44, isDefining = false, 2, now, now) - ) - when(mockProposedBranchRepo.getVariants(10)).thenReturn(Future.successful(variants)) - when(mockVariantRepo.addVariantToHaplogroup(anyInt, anyInt)).thenReturn(Future.successful(1)) - - // Evidence / biosample reassignment - val evidence = Seq( - ProposedBranchEvidence(Some(1), 10, BiosampleSourceType.External, 1, sampleGuid1, variantMatchCount = 2), - ProposedBranchEvidence(Some(2), 10, BiosampleSourceType.Citizen, 42, sampleGuid2, variantMatchCount = 2) - ) - when(mockProposedBranchRepo.getEvidence(10)).thenReturn(Future.successful(evidence)) - - // Biosample haplogroup lookups - when(mockBhRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(100), None)))) - when(mockBhRepo.findBySampleGuid(sampleGuid2)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid2, Some(100), None)))) - when(mockBhRepo.updateYHaplogroup(any[UUID], anyInt)).thenReturn(Future.successful(true)) - - // Private variant promotion - when(mockPrivateVariantRepo.findActiveByVariantIds(any[Set[Int]], any[HaplogroupType])) - .thenReturn(Future.successful(Seq( - BiosamplePrivateVariant(Some(101), BiosampleSourceType.External, 1, sampleGuid1, 42, HaplogroupType.Y, 100, now), - BiosamplePrivateVariant(Some(102), BiosampleSourceType.Citizen, 42, sampleGuid2, 42, HaplogroupType.Y, 100, now) - ))) - when(mockPrivateVariantRepo.updateStatus(anyInt, any[PrivateVariantStatus])) - .thenReturn(Future.successful(true)) - - // Update proposal - when(mockProposedBranchRepo.update(any[ProposedBranch])).thenReturn(Future.successful(true)) - - // Audit - when(mockCuratorActionRepo.create(any[CuratorAction])).thenAnswer { invocation => - val a = invocation.getArgument[CuratorAction](0) - Future.successful(a.copy(id = Some(1))) - } - - whenReady(service.promoteProposal(10, curatorId)) { result => - result.proposalId mustBe 10 - result.newHaplogroupId mustBe 500 - result.haplogroupName mustBe "R-Z9999" - result.definingVariantCount mustBe 2 // Only isDefining=true - result.reassignedBiosampleCount mustBe 2 - result.promotedVariantCount must be >= 1 - - // Verify haplogroup created - verify(mockCoreRepo).createWithParent(any[Haplogroup], any[Option[Int]], anyString) - // Verify only defining variants linked (2 not 3) - verify(mockVariantRepo).addVariantToHaplogroup(500, 42) - verify(mockVariantRepo).addVariantToHaplogroup(500, 43) - // Verify audit - verify(mockCuratorActionRepo).create(any[CuratorAction]) - } - } - - "reject promotion of non-Accepted proposal" in { - val pending = makeAcceptedProposal(10).copy(status = ProposedBranchStatus.Pending) - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(pending))) - - whenReady(service.promoteProposal(10, curatorId).failed) { ex => - ex mustBe a[IllegalStateException] - ex.getMessage must include("must be Accepted") - } - } - - "reject promotion when proposal has no name" in { - val noName = makeAcceptedProposal(10).copy(proposedName = None) - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(noName))) - - whenReady(service.promoteProposal(10, curatorId).failed) { ex => - ex mustBe a[IllegalStateException] - ex.getMessage must include("no proposed name") - } - } - - "fail when proposal not found" in { - when(mockProposedBranchRepo.findById(999)).thenReturn(Future.successful(None)) - - whenReady(service.promoteProposal(999, curatorId).failed) { ex => - ex mustBe a[NoSuchElementException] - } - } - - "not reassign biosample if haplogroup doesn't match old terminal" in { - val proposal = makeAcceptedProposal(10) - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - when(mockCoreRepo.createWithParent(any[Haplogroup], any[Option[Int]], anyString)) - .thenReturn(Future.successful((500, Some(1)))) - - val variants = Seq(ProposedBranchVariant(Some(1), 10, 42, isDefining = true, 3, now, now)) - when(mockProposedBranchRepo.getVariants(10)).thenReturn(Future.successful(variants)) - when(mockVariantRepo.addVariantToHaplogroup(anyInt, anyInt)).thenReturn(Future.successful(1)) - - // Sample is assigned to haplogroup 200, not 100 (the parent) - val evidence = Seq(ProposedBranchEvidence(Some(1), 10, BiosampleSourceType.External, 1, sampleGuid1)) - when(mockProposedBranchRepo.getEvidence(10)).thenReturn(Future.successful(evidence)) - when(mockBhRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, Some(200), None)))) - - when(mockPrivateVariantRepo.findActiveByVariantIds(any[Set[Int]], any[HaplogroupType])) - .thenReturn(Future.successful(Seq.empty)) - when(mockProposedBranchRepo.update(any[ProposedBranch])).thenReturn(Future.successful(true)) - when(mockCuratorActionRepo.create(any[CuratorAction])).thenAnswer { invocation => - Future.successful(invocation.getArgument[CuratorAction](0).copy(id = Some(1))) - } - - whenReady(service.promoteProposal(10, curatorId)) { result => - result.reassignedBiosampleCount mustBe 0 - verify(mockBhRepo, never()).updateYHaplogroup(any[UUID], anyInt) - } - } - - "handle MT haplogroup reassignment" in { - val proposal = makeAcceptedProposal(10).copy(haplogroupType = HaplogroupType.MT) - when(mockProposedBranchRepo.findById(10)).thenReturn(Future.successful(Some(proposal))) - when(mockCoreRepo.createWithParent(any[Haplogroup], any[Option[Int]], anyString)) - .thenReturn(Future.successful((500, Some(1)))) - - val variants = Seq(ProposedBranchVariant(Some(1), 10, 42, isDefining = true, 3, now, now)) - when(mockProposedBranchRepo.getVariants(10)).thenReturn(Future.successful(variants)) - when(mockVariantRepo.addVariantToHaplogroup(anyInt, anyInt)).thenReturn(Future.successful(1)) - - val evidence = Seq(ProposedBranchEvidence(Some(1), 10, BiosampleSourceType.External, 1, sampleGuid1)) - when(mockProposedBranchRepo.getEvidence(10)).thenReturn(Future.successful(evidence)) - when(mockBhRepo.findBySampleGuid(sampleGuid1)) - .thenReturn(Future.successful(Some(BiosampleHaplogroup(sampleGuid1, None, Some(100))))) - when(mockBhRepo.updateMtHaplogroup(any[UUID], anyInt)).thenReturn(Future.successful(true)) - - when(mockPrivateVariantRepo.findActiveByVariantIds(any[Set[Int]], any[HaplogroupType])) - .thenReturn(Future.successful(Seq.empty)) - when(mockProposedBranchRepo.update(any[ProposedBranch])).thenReturn(Future.successful(true)) - when(mockCuratorActionRepo.create(any[CuratorAction])).thenAnswer { invocation => - Future.successful(invocation.getArgument[CuratorAction](0).copy(id = Some(1))) - } - - whenReady(service.promoteProposal(10, curatorId)) { result => - result.reassignedBiosampleCount mustBe 1 - verify(mockBhRepo).updateMtHaplogroup(sampleGuid1, 500) - } - } - } - - "reassignBiosamplesToNewTerminal" should { - "bulk reassign Y haplogroups" in { - val guid1 = UUID.randomUUID() - val guid2 = UUID.randomUUID() - when(mockBhRepo.findByHaplogroupId(100, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq( - BiosampleHaplogroup(guid1, Some(100), None), - BiosampleHaplogroup(guid2, Some(100), None) - ))) - when(mockBhRepo.updateYHaplogroup(any[UUID], anyInt)).thenReturn(Future.successful(true)) - - whenReady(service.reassignBiosamplesToNewTerminal(100, 500, HaplogroupType.Y)) { count => - count mustBe 2 - verify(mockBhRepo).updateYHaplogroup(guid1, 500) - verify(mockBhRepo).updateYHaplogroup(guid2, 500) - } - } - - "bulk reassign MT haplogroups" in { - val guid1 = UUID.randomUUID() - when(mockBhRepo.findByHaplogroupId(50, HaplogroupType.MT)) - .thenReturn(Future.successful(Seq( - BiosampleHaplogroup(guid1, None, Some(50)) - ))) - when(mockBhRepo.updateMtHaplogroup(any[UUID], anyInt)).thenReturn(Future.successful(true)) - - whenReady(service.reassignBiosamplesToNewTerminal(50, 500, HaplogroupType.MT)) { count => - count mustBe 1 - verify(mockBhRepo).updateMtHaplogroup(guid1, 500) - } - } - - "return 0 when no biosamples match" in { - when(mockBhRepo.findByHaplogroupId(999, HaplogroupType.Y)) - .thenReturn(Future.successful(Seq.empty)) - - whenReady(service.reassignBiosamplesToNewTerminal(999, 500, HaplogroupType.Y)) { count => - count mustBe 0 - } - } - } -} diff --git a/test/services/TreeVersioningServiceSpec.scala b/test/services/TreeVersioningServiceSpec.scala deleted file mode 100644 index a4089f17..00000000 --- a/test/services/TreeVersioningServiceSpec.scala +++ /dev/null @@ -1,766 +0,0 @@ -package services - -import models.HaplogroupType -import models.api.haplogroups.MergeStatistics -import models.domain.haplogroups.* -import org.mockito.ArgumentMatchers.{any, anyInt, anyString, eq => eqTo} -import org.mockito.Mockito.{never, reset, verify, when} -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.ScalaFutures -import org.scalatest.time.{Millis, Seconds, Span} -import org.scalatestplus.mockito.MockitoSugar -import org.scalatestplus.play.PlaySpec -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, TreeVersioningRepository, WipTreeRepository, WipStatistics} - -import java.time.LocalDateTime -import scala.concurrent.{ExecutionContext, Future} - -class TreeVersioningServiceSpec extends PlaySpec with MockitoSugar with ScalaFutures with BeforeAndAfterEach { - - implicit val ec: ExecutionContext = ExecutionContext.global - implicit val patience: PatienceConfig = PatienceConfig(timeout = Span(5, Seconds), interval = Span(100, Millis)) - - // Mocks - var mockRepository: TreeVersioningRepository = _ - var mockWipRepository: WipTreeRepository = _ - var mockHaplogroupRepository: HaplogroupCoreRepository = _ - var mockHaplogroupVariantRepository: HaplogroupVariantRepository = _ - var mockAuditService: CuratorAuditService = _ - var service: TreeVersioningServiceImpl = _ - - // Test fixtures - val now: LocalDateTime = LocalDateTime.now() - - def createChangeSet( - id: Int, - haplogroupType: HaplogroupType = HaplogroupType.Y, - name: String = "TestChangeSet", - sourceName: String = "TestSource", - status: ChangeSetStatus = ChangeSetStatus.Draft, - createdBy: String = "test-user" - ): ChangeSet = ChangeSet( - id = Some(id), - haplogroupType = haplogroupType, - name = name, - description = Some("Test description"), - sourceName = sourceName, - createdAt = now, - createdBy = createdBy, - status = status, - statistics = ChangeSetStatistics() - ) - - def createTreeChange( - id: Int, - changeSetId: Int, - changeType: TreeChangeType = TreeChangeType.Create, - haplogroupId: Option[Int] = None, - status: ChangeStatus = ChangeStatus.Pending - ): TreeChange = TreeChange( - id = Some(id), - changeSetId = changeSetId, - changeType = changeType, - haplogroupId = haplogroupId, - status = status, - createdAt = now, - sequenceNum = id - ) - - override def beforeEach(): Unit = { - mockRepository = mock[TreeVersioningRepository] - mockWipRepository = mock[WipTreeRepository] - mockHaplogroupRepository = mock[HaplogroupCoreRepository] - mockHaplogroupVariantRepository = mock[HaplogroupVariantRepository] - mockAuditService = mock[CuratorAuditService] - service = new TreeVersioningServiceImpl( - mockRepository, - mockWipRepository, - mockHaplogroupRepository, - mockHaplogroupVariantRepository, - mockAuditService - ) - - // Default mock behavior for WIP repository (empty stats - no WIP data) - when(mockWipRepository.getWipStatistics(anyInt())) - .thenReturn(Future.successful(WipStatistics(0, 0, 0, 0))) - - // Default mock behavior for audit service - when(mockAuditService.logChangeSetCreate(anyString(), any[ChangeSet], any[Option[String]])) - .thenReturn(Future.successful(mock[models.domain.curator.AuditLogEntry])) - when(mockAuditService.logChangeSetStatusChange(anyString(), anyInt(), any[ChangeSetStatus], any[ChangeSetStatus], any[Option[String]])) - .thenReturn(Future.successful(mock[models.domain.curator.AuditLogEntry])) - when(mockAuditService.logChangeSetApply(anyString(), any[ChangeSet], anyInt(), any[Option[String]])) - .thenReturn(Future.successful(mock[models.domain.curator.AuditLogEntry])) - when(mockAuditService.logChangeSetDiscard(anyString(), any[ChangeSet], anyString())) - .thenReturn(Future.successful(mock[models.domain.curator.AuditLogEntry])) - when(mockAuditService.logChangeReview(anyString(), any[TreeChange], anyString(), any[Option[String]])) - .thenReturn(Future.successful(mock[models.domain.curator.AuditLogEntry])) - } - - // ============================================================================ - // Change Set Lifecycle Tests - // ============================================================================ - - "TreeVersioningService.createChangeSet" should { - - "create a new change set when none exists" in { - when(mockRepository.getActiveChangeSet(HaplogroupType.Y)) - .thenReturn(Future.successful(None)) - when(mockRepository.createChangeSet(any[ChangeSet])) - .thenReturn(Future.successful(1)) - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(createChangeSet(1)))) - - whenReady(service.createChangeSet(HaplogroupType.Y, "ISOGG", Some("Test merge"))) { result => - result.id mustBe Some(1) - result.haplogroupType mustBe HaplogroupType.Y - verify(mockRepository).createChangeSet(any[ChangeSet]) - verify(mockAuditService).logChangeSetCreate(anyString(), any[ChangeSet], any[Option[String]]) - } - } - - "fail when active change set already exists" in { - val existingCs = createChangeSet(1, status = ChangeSetStatus.Draft) - when(mockRepository.getActiveChangeSet(HaplogroupType.Y)) - .thenReturn(Future.successful(Some(existingCs))) - - whenReady(service.createChangeSet(HaplogroupType.Y, "ISOGG").failed) { ex => - ex mustBe an[IllegalStateException] - ex.getMessage must include("Active change set already exists") - verify(mockRepository, never()).createChangeSet(any[ChangeSet]) - } - } - - "fail when change set is under review" in { - val existingCs = createChangeSet(1, status = ChangeSetStatus.UnderReview) - when(mockRepository.getActiveChangeSet(HaplogroupType.Y)) - .thenReturn(Future.successful(Some(existingCs))) - - whenReady(service.createChangeSet(HaplogroupType.Y, "ISOGG").failed) { ex => - ex mustBe an[IllegalStateException] - ex.getMessage must include("UnderReview") - } - } - } - - "TreeVersioningService.getActiveChangeSet" should { - - "return active change set when it exists" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.Draft) - when(mockRepository.getActiveChangeSet(HaplogroupType.Y)) - .thenReturn(Future.successful(Some(changeSet))) - - whenReady(service.getActiveChangeSet(HaplogroupType.Y)) { result => - result mustBe defined - result.get.id mustBe Some(1) - } - } - - "return None when no active change set" in { - when(mockRepository.getActiveChangeSet(HaplogroupType.Y)) - .thenReturn(Future.successful(None)) - - whenReady(service.getActiveChangeSet(HaplogroupType.Y)) { result => - result mustBe empty - } - } - } - - "TreeVersioningService.getChangeSetDetails" should { - - "return full details for an existing change set" in { - val changeSet = createChangeSet(1) - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - when(mockRepository.countTreeChanges(eqTo(1), any[Option[TreeChangeType]], any[Option[ChangeStatus]])) - .thenReturn(Future.successful(10)) - when(mockRepository.getChangeSummaryByType(1)) - .thenReturn(Future.successful(Map(TreeChangeType.Create -> 5, TreeChangeType.Update -> 5))) - when(mockRepository.getChangeSummaryByStatus(1)) - .thenReturn(Future.successful(Map(ChangeStatus.Pending -> 8, ChangeStatus.Applied -> 2))) - when(mockRepository.listComments(1)) - .thenReturn(Future.successful(Seq.empty)) - - whenReady(service.getChangeSetDetails(1)) { result => - result mustBe defined - result.get.changeSet.id mustBe Some(1) - result.get.totalChanges mustBe 10 - result.get.changesByType must contain key "CREATE" - result.get.changesByStatus must contain key "PENDING" - } - } - - "return None for non-existent change set" in { - when(mockRepository.getChangeSet(999)) - .thenReturn(Future.successful(None)) - - whenReady(service.getChangeSetDetails(999)) { result => - result mustBe empty - } - } - } - - "TreeVersioningService.finalizeChangeSet" should { - - "update change set with statistics and move to READY_FOR_REVIEW" in { - val stats = MergeStatistics( - nodesProcessed = 100, - nodesCreated = 20, - nodesUpdated = 30, - nodesUnchanged = 50, - variantsAdded = 15, - variantsUpdated = 3, - relationshipsCreated = 25, - relationshipsUpdated = 5, - splitOperations = 2 - ) - - when(mockRepository.finalizeChangeSet(eqTo(1), any[ChangeSetStatistics], any[Option[String]])) - .thenReturn(Future.successful(true)) - - whenReady(service.finalizeChangeSet(1, stats, Some("/path/to/report.txt"))) { result => - result mustBe true - verify(mockRepository).finalizeChangeSet(eqTo(1), any[ChangeSetStatistics], eqTo(Some("/path/to/report.txt"))) - } - } - } - - "TreeVersioningService.startReview" should { - - "start review when change set is READY_FOR_REVIEW" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.ReadyForReview) - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - when(mockRepository.updateChangeSetStatus(1, ChangeSetStatus.UnderReview)) - .thenReturn(Future.successful(true)) - - whenReady(service.startReview(1, "curator123")) { result => - result mustBe true - verify(mockRepository).updateChangeSetStatus(1, ChangeSetStatus.UnderReview) - verify(mockAuditService).logChangeSetStatusChange( - eqTo("curator123"), eqTo(1), - eqTo(ChangeSetStatus.ReadyForReview), eqTo(ChangeSetStatus.UnderReview), - any[Option[String]] - ) - } - } - - "fail when change set is not READY_FOR_REVIEW" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.Draft) - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - - whenReady(service.startReview(1, "curator123").failed) { ex => - ex mustBe an[IllegalStateException] - ex.getMessage must include("expected READY_FOR_REVIEW") - verify(mockRepository, never()).updateChangeSetStatus(anyInt(), any[ChangeSetStatus]) - } - } - - "fail when change set not found" in { - when(mockRepository.getChangeSet(999)) - .thenReturn(Future.successful(None)) - - whenReady(service.startReview(999, "curator123").failed) { ex => - ex mustBe a[NoSuchElementException] - ex.getMessage must include("not found") - } - } - } - - "TreeVersioningService.applyChangeSet" should { - - "apply change set when UNDER_REVIEW" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.UnderReview) - val appliedChangeSet = changeSet.copy(status = ChangeSetStatus.Applied, appliedAt = Some(now)) - - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - .thenReturn(Future.successful(Some(appliedChangeSet))) - when(mockRepository.applyAllPendingChanges(1)) - .thenReturn(Future.successful(10)) - when(mockRepository.applyChangeSet(1, "curator123")) - .thenReturn(Future.successful(true)) - - whenReady(service.applyChangeSet(1, "curator123")) { result => - result mustBe true - verify(mockRepository).applyAllPendingChanges(1) - verify(mockRepository).applyChangeSet(1, "curator123") - verify(mockAuditService).logChangeSetApply(eqTo("curator123"), any[ChangeSet], eqTo(10), any[Option[String]]) - } - } - - "apply change set when READY_FOR_REVIEW (skip straight to apply)" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.ReadyForReview) - val appliedChangeSet = changeSet.copy(status = ChangeSetStatus.Applied) - - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - .thenReturn(Future.successful(Some(appliedChangeSet))) - when(mockRepository.applyAllPendingChanges(1)) - .thenReturn(Future.successful(5)) - when(mockRepository.applyChangeSet(1, "curator123")) - .thenReturn(Future.successful(true)) - - whenReady(service.applyChangeSet(1, "curator123")) { result => - result mustBe true - } - } - - "fail when change set is DRAFT" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.Draft) - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - - whenReady(service.applyChangeSet(1, "curator123").failed) { ex => - ex mustBe an[IllegalStateException] - ex.getMessage must include("Cannot apply") - verify(mockRepository, never()).applyChangeSet(anyInt(), anyString()) - } - } - - "fail when change set already APPLIED" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.Applied) - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - - whenReady(service.applyChangeSet(1, "curator123").failed) { ex => - ex mustBe an[IllegalStateException] - ex.getMessage must include("Cannot apply") - } - } - } - - "TreeVersioningService.discardChangeSet" should { - - "discard change set when not APPLIED" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.UnderReview) - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - when(mockRepository.discardChangeSet(1, "curator123", "Not needed")) - .thenReturn(Future.successful(true)) - - whenReady(service.discardChangeSet(1, "curator123", "Not needed")) { result => - result mustBe true - verify(mockRepository).discardChangeSet(1, "curator123", "Not needed") - verify(mockAuditService).logChangeSetDiscard("curator123", changeSet, "Not needed") - } - } - - "discard DRAFT change set" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.Draft) - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - when(mockRepository.discardChangeSet(1, "curator123", "Aborted")) - .thenReturn(Future.successful(true)) - - whenReady(service.discardChangeSet(1, "curator123", "Aborted")) { result => - result mustBe true - } - } - - "fail when change set already APPLIED" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.Applied) - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - - whenReady(service.discardChangeSet(1, "curator123", "Test").failed) { ex => - ex mustBe an[IllegalStateException] - ex.getMessage must include("already APPLIED") - verify(mockRepository, never()).discardChangeSet(anyInt(), anyString(), anyString()) - } - } - } - - // ============================================================================ - // Change Recording Tests - // ============================================================================ - - "TreeVersioningService.recordCreate" should { - - "record a CREATE change" in { - when(mockRepository.getNextSequenceNum(1)) - .thenReturn(Future.successful(1)) - when(mockRepository.createTreeChange(any[TreeChange])) - .thenReturn(Future.successful(100)) - - val haplogroupData = """{"name":"R1b-L21","variants":["L21"]}""" - - whenReady(service.recordCreate(1, haplogroupData, Some(50))) { changeId => - changeId mustBe 100 - verify(mockRepository).createTreeChange(org.mockito.ArgumentMatchers.argThat { (tc: TreeChange) => - tc.changeType == TreeChangeType.Create && - tc.haplogroupData.contains(haplogroupData) && - tc.newParentId.contains(50) - }) - } - } - - "record a CREATE change with ambiguity info" in { - when(mockRepository.getNextSequenceNum(1)) - .thenReturn(Future.successful(2)) - when(mockRepository.createTreeChange(any[TreeChange])) - .thenReturn(Future.successful(101)) - - whenReady(service.recordCreate( - changeSetId = 1, - haplogroupData = "{}", - parentId = None, - ambiguityType = Some("MULTIPLE_MATCH"), - ambiguityConfidence = Some(0.85) - )) { changeId => - changeId mustBe 101 - verify(mockRepository).createTreeChange(org.mockito.ArgumentMatchers.argThat { (tc: TreeChange) => - tc.ambiguityType.contains("MULTIPLE_MATCH") && - tc.ambiguityConfidence.contains(0.85) - }) - } - } - } - - "TreeVersioningService.recordUpdate" should { - - "record an UPDATE change" in { - when(mockRepository.getNextSequenceNum(1)) - .thenReturn(Future.successful(3)) - when(mockRepository.createTreeChange(any[TreeChange])) - .thenReturn(Future.successful(102)) - - val oldData = """{"formedYbp":4500}""" - val newData = """{"formedYbp":4800}""" - - whenReady(service.recordUpdate(1, 100, oldData, newData)) { changeId => - changeId mustBe 102 - verify(mockRepository).createTreeChange(org.mockito.ArgumentMatchers.argThat { (tc: TreeChange) => - tc.changeType == TreeChangeType.Update && - tc.haplogroupId.contains(100) && - tc.oldData.contains(oldData) && - tc.haplogroupData.contains(newData) - }) - } - } - } - - "TreeVersioningService.recordReparent" should { - - "record a REPARENT change" in { - when(mockRepository.getNextSequenceNum(1)) - .thenReturn(Future.successful(4)) - when(mockRepository.createTreeChange(any[TreeChange])) - .thenReturn(Future.successful(103)) - - whenReady(service.recordReparent(1, 100, Some(50), 60)) { changeId => - changeId mustBe 103 - verify(mockRepository).createTreeChange(org.mockito.ArgumentMatchers.argThat { (tc: TreeChange) => - tc.changeType == TreeChangeType.Reparent && - tc.haplogroupId.contains(100) && - tc.oldParentId.contains(50) && - tc.newParentId.contains(60) - }) - } - } - } - - "TreeVersioningService.recordAddVariant" should { - - "record an ADD_VARIANT change" in { - when(mockRepository.getNextSequenceNum(1)) - .thenReturn(Future.successful(5)) - when(mockRepository.createTreeChange(any[TreeChange])) - .thenReturn(Future.successful(104)) - - whenReady(service.recordAddVariant(1, 100, 200)) { changeId => - changeId mustBe 104 - verify(mockRepository).createTreeChange(org.mockito.ArgumentMatchers.argThat { (tc: TreeChange) => - tc.changeType == TreeChangeType.AddVariant && - tc.haplogroupId.contains(100) && - tc.variantId.contains(200) - }) - } - } - } - - "TreeVersioningService.recordRemoveVariant" should { - - "record a REMOVE_VARIANT change" in { - when(mockRepository.getNextSequenceNum(1)) - .thenReturn(Future.successful(6)) - when(mockRepository.createTreeChange(any[TreeChange])) - .thenReturn(Future.successful(105)) - - whenReady(service.recordRemoveVariant(1, 100, 200)) { changeId => - changeId mustBe 105 - verify(mockRepository).createTreeChange(org.mockito.ArgumentMatchers.argThat { (tc: TreeChange) => - tc.changeType == TreeChangeType.RemoveVariant && - tc.haplogroupId.contains(100) && - tc.variantId.contains(200) - }) - } - } - } - - // ============================================================================ - // Change Review Tests - // ============================================================================ - - "TreeVersioningService.getPendingReviewChanges" should { - - "return pending changes ordered by ambiguity confidence" in { - val changes = Seq( - createTreeChange(1, 1, TreeChangeType.Create), - createTreeChange(2, 1, TreeChangeType.Reparent) - ) - when(mockRepository.getPendingReviewChanges(1, 50)) - .thenReturn(Future.successful(changes)) - - whenReady(service.getPendingReviewChanges(1, 50)) { result => - result must have size 2 - result.head.id mustBe Some(1) - } - } - } - - "TreeVersioningService.reviewChange" should { - - "approve a pending change" in { - val change = createTreeChange(1, 1) - when(mockRepository.getTreeChange(1)) - .thenReturn(Future.successful(Some(change))) - when(mockRepository.reviewTreeChange(1, "curator123", Some("Looks good"), ChangeStatus.Applied)) - .thenReturn(Future.successful(true)) - - whenReady(service.reviewChange(1, "curator123", ChangeStatus.Applied, Some("Looks good"))) { result => - result mustBe true - verify(mockRepository).reviewTreeChange(1, "curator123", Some("Looks good"), ChangeStatus.Applied) - verify(mockAuditService).logChangeReview("curator123", change, "APPLIED", Some("Looks good")) - } - } - - "skip a change" in { - val change = createTreeChange(1, 1) - when(mockRepository.getTreeChange(1)) - .thenReturn(Future.successful(Some(change))) - when(mockRepository.reviewTreeChange(1, "curator123", None, ChangeStatus.Skipped)) - .thenReturn(Future.successful(true)) - - whenReady(service.reviewChange(1, "curator123", ChangeStatus.Skipped)) { result => - result mustBe true - } - } - - "revert a change" in { - val change = createTreeChange(1, 1) - when(mockRepository.getTreeChange(1)) - .thenReturn(Future.successful(Some(change))) - when(mockRepository.reviewTreeChange(1, "curator123", Some("Incorrect reparent"), ChangeStatus.Reverted)) - .thenReturn(Future.successful(true)) - - whenReady(service.reviewChange(1, "curator123", ChangeStatus.Reverted, Some("Incorrect reparent"))) { result => - result mustBe true - } - } - - "fail when trying to set status back to PENDING" in { - whenReady(service.reviewChange(1, "curator123", ChangeStatus.Pending).failed) { ex => - ex mustBe an[IllegalArgumentException] - ex.getMessage must include("Cannot set status back to PENDING") - verify(mockRepository, never()).reviewTreeChange(anyInt(), anyString(), any[Option[String]], any[ChangeStatus]) - } - } - } - - "TreeVersioningService.approveAllPending" should { - - "bulk approve all pending changes" in { - when(mockRepository.applyAllPendingChanges(1)) - .thenReturn(Future.successful(15)) - - whenReady(service.approveAllPending(1, "curator123")) { count => - count mustBe 15 - verify(mockRepository).applyAllPendingChanges(1) - } - } - } - - // ============================================================================ - // Comment Tests - // ============================================================================ - - "TreeVersioningService.addComment" should { - - "add a comment to a change set" in { - when(mockRepository.addComment(any[ChangeSetComment])) - .thenReturn(Future.successful(1)) - - whenReady(service.addComment(1, "curator123", "This looks good", None)) { commentId => - commentId mustBe 1 - verify(mockRepository).addComment(org.mockito.ArgumentMatchers.argThat { (c: ChangeSetComment) => - c.changeSetId == 1 && - c.author == "curator123" && - c.content == "This looks good" && - c.treeChangeId.isEmpty - }) - } - } - - "add a comment linked to a specific change" in { - when(mockRepository.addComment(any[ChangeSetComment])) - .thenReturn(Future.successful(2)) - - whenReady(service.addComment(1, "curator123", "Check this change", Some(100))) { commentId => - commentId mustBe 2 - verify(mockRepository).addComment(org.mockito.ArgumentMatchers.argThat { (c: ChangeSetComment) => - c.treeChangeId.contains(100) - }) - } - } - } - - "TreeVersioningService.listComments" should { - - "return all comments for a change set" in { - val comments = Seq( - ChangeSetComment(Some(1), 1, None, "curator1", "Comment 1", now), - ChangeSetComment(Some(2), 1, Some(100), "curator2", "Comment 2", now) - ) - when(mockRepository.listComments(1)) - .thenReturn(Future.successful(comments)) - - whenReady(service.listComments(1)) { result => - result must have size 2 - result.head.content mustBe "Comment 1" - } - } - } - - // ============================================================================ - // Tree Diff Tests - // ============================================================================ - - "TreeVersioningService.getTreeDiff" should { - - "compute diff from change set with various change types" in { - val changeSet = createChangeSet(1) - val changes = Seq( - createTreeChange(1, 1, TreeChangeType.Create).copy( - haplogroupData = Some("""{"name":"R1b-NEW"}"""), - newParentId = Some(50), - createdHaplogroupId = Some(100) - ), - createTreeChange(2, 1, TreeChangeType.Reparent).copy( - haplogroupId = Some(101), - oldParentId = Some(50), - newParentId = Some(60) - ), - createTreeChange(3, 1, TreeChangeType.Update).copy( - haplogroupId = Some(102) - ), - createTreeChange(4, 1, TreeChangeType.AddVariant).copy( - haplogroupId = Some(102), - variantId = Some(200) - ) - ) - - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - when(mockRepository.getChangesForChangeSet(1)) - .thenReturn(Future.successful(changes)) - when(mockRepository.getHaplogroupNamesById(any[Set[Int]])) - .thenReturn(Future.successful(Map(101 -> "R-M269", 102 -> "R-U106", 100 -> "R-M343"))) - - whenReady(service.getTreeDiff(1)) { diff => - diff.changeSetId mustBe 1 - diff.summary.totalChanges mustBe 4 - diff.summary.nodesAdded mustBe 1 - diff.summary.nodesReparented mustBe 1 - diff.summary.nodesModified mustBe 1 // Update and AddVariant grouped by haplogroup - diff.summary.variantsAdded mustBe 1 - - // Check entries - diff.entries.count(_.diffType == DiffType.Added) mustBe 1 - diff.entries.count(_.diffType == DiffType.Reparented) mustBe 1 - diff.entries.count(_.diffType == DiffType.Modified) mustBe 1 - } - } - - "return empty diff for non-existent change set" in { - when(mockRepository.getChangeSet(999)) - .thenReturn(Future.successful(None)) - when(mockRepository.getChangesForChangeSet(999)) - .thenReturn(Future.successful(Seq.empty)) - when(mockRepository.getHaplogroupNamesById(any[Set[Int]])) - .thenReturn(Future.successful(Map.empty[Int, String])) - - whenReady(service.getTreeDiff(999)) { diff => - diff.changeSetId mustBe 999 - diff.summary.totalChanges mustBe 0 - diff.entries mustBe empty - } - } - } - - "TreeVersioningService.getActiveTreeDiff" should { - - "return diff for active change set" in { - val changeSet = createChangeSet(1, status = ChangeSetStatus.UnderReview) - when(mockRepository.getActiveChangeSet(HaplogroupType.Y)) - .thenReturn(Future.successful(Some(changeSet))) - when(mockRepository.getChangeSet(1)) - .thenReturn(Future.successful(Some(changeSet))) - when(mockRepository.getChangesForChangeSet(1)) - .thenReturn(Future.successful(Seq.empty)) - when(mockRepository.getHaplogroupNamesById(any[Set[Int]])) - .thenReturn(Future.successful(Map.empty[Int, String])) - - whenReady(service.getActiveTreeDiff(HaplogroupType.Y)) { result => - result mustBe defined - result.get.changeSetId mustBe 1 - } - } - - "return None when no active change set" in { - when(mockRepository.getActiveChangeSet(HaplogroupType.Y)) - .thenReturn(Future.successful(None)) - - whenReady(service.getActiveTreeDiff(HaplogroupType.Y)) { result => - result mustBe empty - } - } - } - - "TreeVersioningService.getChangesForDiff" should { - - "return all changes for a change set" in { - val changes = Seq( - createTreeChange(1, 1, TreeChangeType.Create), - createTreeChange(2, 1, TreeChangeType.Update) - ) - when(mockRepository.getChangesForChangeSet(1)) - .thenReturn(Future.successful(changes)) - - whenReady(service.getChangesForDiff(1)) { result => - result must have size 2 - } - } - } - - // ============================================================================ - // MT DNA Tests - // ============================================================================ - - "TreeVersioningService" should { - - "handle MT DNA haplogroup type correctly" in { - when(mockRepository.getActiveChangeSet(HaplogroupType.MT)) - .thenReturn(Future.successful(None)) - when(mockRepository.createChangeSet(any[ChangeSet])) - .thenReturn(Future.successful(2)) - when(mockRepository.getChangeSet(2)) - .thenReturn(Future.successful(Some(createChangeSet(2, haplogroupType = HaplogroupType.MT)))) - - whenReady(service.createChangeSet(HaplogroupType.MT, "PhyloTree")) { result => - result.haplogroupType mustBe HaplogroupType.MT - } - } - } -} diff --git a/test/services/firehose/AtmosphereEventHandlerSpec.scala b/test/services/firehose/AtmosphereEventHandlerSpec.scala deleted file mode 100644 index 5ae9debc..00000000 --- a/test/services/firehose/AtmosphereEventHandlerSpec.scala +++ /dev/null @@ -1,855 +0,0 @@ -package services.firehose - -import models.atmosphere.{ - RecordMeta, GenotypeRecord, PopulationBreakdownRecord, HaplogroupReconciliationRecord, - ProjectRecord, PopulationComponent => AtmospherePopulationComponent, - SuperPopulationSummary => AtmosphereSuperPopulationSummary, - ReconciliationStatus => AtmosphereReconciliationStatus, - RunHaplogroupCall, IdentityVerification => AtmosphereIdentityVerification -} -import models.domain.Project -import models.domain.genomics.* -import org.mockito.ArgumentMatchers.{any, anyString} -import org.mockito.Mockito.{never, verify, when} -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer -import org.scalatestplus.mockito.MockitoSugar -import org.scalatest.concurrent.ScalaFutures -import org.scalatestplus.play.PlaySpec -import play.api.libs.json.Json -import repositories.* -import services.TestTypeService -import services.ibd.PopulationAnalysisService - -import java.time.{Instant, LocalDateTime} -import java.util.UUID -import scala.concurrent.{ExecutionContext, Future} - -class AtmosphereEventHandlerSpec extends PlaySpec with MockitoSugar with ScalaFutures { - - implicit val ec: ExecutionContext = ExecutionContext.global - - // --- Test Fixtures --- - - def createRecordMeta(version: Int = 1): RecordMeta = RecordMeta( - version = version, - createdAt = Instant.now(), - updatedAt = Some(Instant.now()), - lastModifiedField = None - ) - - def createMocks(): ( - CitizenBiosampleRepository, - SequenceLibraryRepository, - SequenceFileRepository, - AlignmentRepository, - SpecimenDonorRepository, - ProjectRepository, - TestTypeService, - GenotypeDataRepository, - PopulationBreakdownRepository, - HaplogroupReconciliationRepository, - InstrumentObservationRepository, - GroupProjectRepository, - GroupProjectMemberRepository - ) = ( - mock[CitizenBiosampleRepository], - mock[SequenceLibraryRepository], - mock[SequenceFileRepository], - mock[AlignmentRepository], - mock[SpecimenDonorRepository], - mock[ProjectRepository], - mock[TestTypeService], - mock[GenotypeDataRepository], - mock[PopulationBreakdownRepository], - mock[HaplogroupReconciliationRepository], - mock[InstrumentObservationRepository], - mock[GroupProjectRepository], - mock[GroupProjectMemberRepository] - ) - - def createHandler(mocks: ( - CitizenBiosampleRepository, - SequenceLibraryRepository, - SequenceFileRepository, - AlignmentRepository, - SpecimenDonorRepository, - ProjectRepository, - TestTypeService, - GenotypeDataRepository, - PopulationBreakdownRepository, - HaplogroupReconciliationRepository, - InstrumentObservationRepository, - GroupProjectRepository, - GroupProjectMemberRepository - )): AtmosphereEventHandler = { - val (biosampleRepo, seqLibRepo, seqFileRepo, alignmentRepo, donorRepo, projectRepo, testTypeService, genotypeRepo, popRepo, reconRepo, instrObsRepo, gpRepo, gpmRepo) = mocks - new AtmosphereEventHandler( - biosampleRepo, - seqLibRepo, - seqFileRepo, - alignmentRepo, - donorRepo, - projectRepo, - testTypeService, - genotypeRepo, - popRepo, - reconRepo, - instrObsRepo, - gpRepo, - gpmRepo, - mock[MatchConsentTrackingRepository], - mock[MatchRequestTrackingRepository], - mock[PopulationAnalysisService] - ) - } - - // ==================== GENOTYPE TESTS ==================== - - "AtmosphereEventHandler - Genotype" should { - - "create a new genotype successfully" in { - val mocks = createMocks() - val (biosampleRepo, _, _, _, _, _, testTypeService, genotypeRepo, _, _, _, _, _) = mocks - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.genotype/rkey1" - val biosampleAtUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/sample1" - val sampleGuid = UUID.randomUUID() - - val record = GenotypeRecord( - atUri = atUri, - meta = createRecordMeta(), - biosampleRef = biosampleAtUri, - testTypeCode = "ARRAY_23ANDME_V5", - provider = "23andMe", - chipType = None, - chipVersion = Some("v5"), - totalMarkersCalled = Some(600000), - totalMarkersPossible = Some(650000), - callRate = Some(0.923), - noCallRate = Some(0.077), - yMarkersCalled = Some(3500), - yMarkersTotal = Some(4000), - mtMarkersCalled = Some(3000), - mtMarkersTotal = Some(3200), - autosomalMarkersCalled = Some(590000), - hetRate = Some(0.32), - testDate = Some(Instant.now()), - processedAt = Some(Instant.now()), - buildVersion = Some("GRCh37"), - sourceFileHash = Some("abc123hash"), - derivedHaplogroups = None, - populationBreakdownRef = None, - files = None, - imputationRef = None - ) - - val event = GenotypeEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Create, - payload = Some(record) - ) - - // Mock biosample lookup - val biosample = CitizenBiosample( - id = Some(1), - atUri = Some(biosampleAtUri), - accession = Some("SAMPLE-001"), - alias = None, - sourcePlatform = None, - collectionDate = None, - sex = None, - geocoord = None, - description = None, - sampleGuid = sampleGuid, - deleted = false, - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - when(biosampleRepo.findByAtUri(biosampleAtUri)).thenReturn(Future.successful(Some(biosample))) - - // Mock test type lookup - val testType = TestTypeRow( - id = Some(1), - code = "ARRAY_23ANDME_V5", - displayName = "23andMe v5", - category = DataGenerationMethod.Genotyping, - targetType = TargetType.WholeGenome, - supportsHaplogroupY = true, - supportsHaplogroupMt = true, - supportsAutosomalIbd = true, - supportsAncestry = true, - typicalFileFormats = List("TXT", "CSV") - ) - when(testTypeService.getByCode("ARRAY_23ANDME_V5")).thenReturn(Future.successful(Some(testType))) - - // Mock genotype creation - when(genotypeRepo.create(any[GenotypeData])).thenAnswer(new Answer[Future[GenotypeData]] { - override def answer(invocation: InvocationOnMock): Future[GenotypeData] = { - val data = invocation.getArgument[GenotypeData](0) - Future.successful(data.copy(id = Some(100))) - } - }) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - val success = result.asInstanceOf[FirehoseResult.Success] - success.message must include("Genotype Created") - success.sampleGuid mustBe Some(sampleGuid) - - verify(biosampleRepo).findByAtUri(biosampleAtUri) - verify(testTypeService).getByCode("ARRAY_23ANDME_V5") - verify(genotypeRepo).create(any[GenotypeData]) - } - } - - "return validation error when biosample not found for genotype" in { - val mocks = createMocks() - val (biosampleRepo, _, _, _, _, _, _, _, _, _, _, _, _) = mocks - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.genotype/rkey1" - val biosampleAtUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/nonexistent" - - val record = GenotypeRecord( - atUri = atUri, - meta = createRecordMeta(), - biosampleRef = biosampleAtUri, - testTypeCode = "ARRAY_23ANDME_V5", - provider = "23andMe", - chipType = None, - chipVersion = None, - totalMarkersCalled = None, - totalMarkersPossible = None, - callRate = None, - noCallRate = None, - yMarkersCalled = None, - yMarkersTotal = None, - mtMarkersCalled = None, - mtMarkersTotal = None, - autosomalMarkersCalled = None, - hetRate = None, - testDate = None, - processedAt = None, - buildVersion = None, - sourceFileHash = None, - derivedHaplogroups = None, - populationBreakdownRef = None, - files = None, - imputationRef = None - ) - - val event = GenotypeEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Create, - payload = Some(record) - ) - - when(biosampleRepo.findByAtUri(biosampleAtUri)).thenReturn(Future.successful(None)) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.ValidationError] - result.asInstanceOf[FirehoseResult.ValidationError].message must include("Biosample not found") - } - } - - "return validation error when test type code is invalid" in { - val mocks = createMocks() - val (biosampleRepo, _, _, _, _, _, testTypeService, _, _, _, _, _, _) = mocks - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.genotype/rkey1" - val biosampleAtUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/sample1" - val sampleGuid = UUID.randomUUID() - - val record = GenotypeRecord( - atUri = atUri, - meta = createRecordMeta(), - biosampleRef = biosampleAtUri, - testTypeCode = "INVALID_TEST_TYPE", - provider = "Unknown", - chipType = None, - chipVersion = None, - totalMarkersCalled = None, - totalMarkersPossible = None, - callRate = None, - noCallRate = None, - yMarkersCalled = None, - yMarkersTotal = None, - mtMarkersCalled = None, - mtMarkersTotal = None, - autosomalMarkersCalled = None, - hetRate = None, - testDate = None, - processedAt = None, - buildVersion = None, - sourceFileHash = None, - derivedHaplogroups = None, - populationBreakdownRef = None, - files = None, - imputationRef = None - ) - - val event = GenotypeEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Create, - payload = Some(record) - ) - - val biosample = CitizenBiosample( - id = Some(1), - atUri = Some(biosampleAtUri), - accession = Some("SAMPLE-001"), - alias = None, - sourcePlatform = None, - collectionDate = None, - sex = None, - geocoord = None, - description = None, - sampleGuid = sampleGuid, - deleted = false, - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - when(biosampleRepo.findByAtUri(biosampleAtUri)).thenReturn(Future.successful(Some(biosample))) - when(testTypeService.getByCode("INVALID_TEST_TYPE")).thenReturn(Future.successful(None)) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.ValidationError] - result.asInstanceOf[FirehoseResult.ValidationError].message must include("Invalid test type code") - } - } - - "delete genotype successfully" in { - val mocks = createMocks() - val (_, _, _, _, _, _, _, genotypeRepo, _, _, _, _, _) = mocks - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.genotype/rkey1" - val sampleGuid = UUID.randomUUID() - - val existing = GenotypeData( - id = Some(100), - atUri = Some(atUri), - atCid = Some("cid123"), - sampleGuid = sampleGuid, - testTypeId = Some(1), - provider = Some("23andMe"), - chipVersion = None, - buildVersion = None, - sourceFileHash = None, - metrics = GenotypeMetrics(), - populationBreakdownId = None, - deleted = false, - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - - val event = GenotypeEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Delete, - payload = None - ) - - when(genotypeRepo.findByAtUri(atUri)).thenReturn(Future.successful(Some(existing))) - when(genotypeRepo.softDelete(100)).thenReturn(Future.successful(true)) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - verify(genotypeRepo).softDelete(100) - } - } - } - - // ==================== POPULATION BREAKDOWN TESTS ==================== - - "AtmosphereEventHandler - PopulationBreakdown" should { - - "create a population breakdown with components successfully" in { - val mocks = createMocks() - val (biosampleRepo, _, _, _, _, _, _, _, popRepo, _, _, _, _) = mocks - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.populationBreakdown/rkey1" - val biosampleAtUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/sample1" - val sampleGuid = UUID.randomUUID() - - val record = PopulationBreakdownRecord( - atUri = atUri, - meta = createRecordMeta(), - biosampleRef = biosampleAtUri, - analysisMethod = "PCA_PROJECTION_GMM", - panelType = Some("aims"), - referencePopulations = Some("1000G_HGDP_v1"), - referenceVersion = Some("v1.0"), - kValue = Some(33), - snpsAnalyzed = Some(5000), - snpsWithGenotype = Some(4800), - snpsMissing = Some(200), - confidenceLevel = Some(0.95), - pcaCoordinates = Some(Seq(0.5, -0.3, 0.1)), - components = Seq( - AtmospherePopulationComponent("GBR", Some("British"), Some("European"), 45.5, Some(Map("lower" -> 40.0, "upper" -> 51.0)), Some(1)), - AtmospherePopulationComponent("IBS", Some("Iberian"), Some("European"), 25.3, Some(Map("lower" -> 20.0, "upper" -> 30.0)), Some(2)), - AtmospherePopulationComponent("TSI", Some("Tuscan"), Some("European"), 15.2, None, Some(3)) - ), - superPopulationSummary = Some(Seq( - AtmosphereSuperPopulationSummary("European", 86.0, Seq("GBR", "IBS", "TSI")) - )), - analysisDate = Some(Instant.now()), - pipelineVersion = Some("1.0.0") - ) - - val event = PopulationBreakdownEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Create, - payload = Some(record) - ) - - // Mock biosample lookup - val biosample = CitizenBiosample( - id = Some(1), - atUri = Some(biosampleAtUri), - accession = Some("SAMPLE-001"), - alias = None, - sourcePlatform = None, - collectionDate = None, - sex = None, - geocoord = None, - description = None, - sampleGuid = sampleGuid, - deleted = false, - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - when(biosampleRepo.findByAtUri(biosampleAtUri)).thenReturn(Future.successful(Some(biosample))) - - // Mock population breakdown creation - when(popRepo.create(any[PopulationBreakdown])).thenAnswer(new Answer[Future[PopulationBreakdown]] { - override def answer(invocation: InvocationOnMock): Future[PopulationBreakdown] = { - val data = invocation.getArgument[PopulationBreakdown](0) - Future.successful(data.copy(id = Some(100))) - } - }) - - // Mock component upsert - when(popRepo.upsertComponentsByBreakdownId(any[Int], any[Seq[models.domain.genomics.PopulationComponent]])).thenReturn(Future.successful(Seq.empty)) - when(popRepo.upsertSummariesByBreakdownId(any[Int], any[Seq[models.domain.genomics.SuperPopulationSummary]])).thenReturn(Future.successful(Seq.empty)) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - val success = result.asInstanceOf[FirehoseResult.Success] - success.message must include("Population Breakdown Created") - success.sampleGuid mustBe Some(sampleGuid) - - verify(biosampleRepo).findByAtUri(biosampleAtUri) - verify(popRepo).create(any[PopulationBreakdown]) - verify(popRepo).upsertComponentsByBreakdownId(any[Int], any[Seq[models.domain.genomics.PopulationComponent]]) - verify(popRepo).upsertSummariesByBreakdownId(any[Int], any[Seq[models.domain.genomics.SuperPopulationSummary]]) - } - } - - "return validation error when biosample not found for population breakdown" in { - val mocks = createMocks() - val (biosampleRepo, _, _, _, _, _, _, _, _, _, _, _, _) = mocks - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.populationBreakdown/rkey1" - val biosampleAtUri = "at://did:plc:test123/com.decodingus.atmosphere.biosample/nonexistent" - - val record = PopulationBreakdownRecord( - atUri = atUri, - meta = createRecordMeta(), - biosampleRef = biosampleAtUri, - analysisMethod = "PCA_PROJECTION_GMM", - panelType = None, - referencePopulations = None, - referenceVersion = None, - kValue = None, - snpsAnalyzed = None, - snpsWithGenotype = None, - snpsMissing = None, - confidenceLevel = None, - pcaCoordinates = None, - components = Seq.empty, - superPopulationSummary = None, - analysisDate = None, - pipelineVersion = None - ) - - val event = PopulationBreakdownEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Create, - payload = Some(record) - ) - - when(biosampleRepo.findByAtUri(biosampleAtUri)).thenReturn(Future.successful(None)) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.ValidationError] - result.asInstanceOf[FirehoseResult.ValidationError].message must include("Biosample not found") - } - } - - "delete population breakdown successfully" in { - val mocks = createMocks() - val (_, _, _, _, _, _, _, _, popRepo, _, _, _, _) = mocks - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.populationBreakdown/rkey1" - val sampleGuid = UUID.randomUUID() - - val existing = PopulationBreakdown( - id = Some(100), - atUri = Some(atUri), - atCid = Some("cid123"), - sampleGuid = sampleGuid, - analysisMethod = "PCA_PROJECTION_GMM", - panelType = None, - referencePopulations = None, - referenceVersion = None, - snpsAnalyzed = None, - snpsWithGenotype = None, - snpsMissing = None, - confidenceLevel = None, - pcaCoordinates = None, - analysisDate = None, - pipelineVersion = None, - deleted = false, - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - - val event = PopulationBreakdownEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Delete, - payload = None - ) - - when(popRepo.findByAtUri(atUri)).thenReturn(Future.successful(Some(existing))) - when(popRepo.softDelete(100)).thenReturn(Future.successful(true)) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - verify(popRepo).softDelete(100) - } - } - } - - // ==================== HAPLOGROUP RECONCILIATION TESTS ==================== - - "AtmosphereEventHandler - HaplogroupReconciliation" should { - - "create a haplogroup reconciliation successfully" in { - val mocks = createMocks() - val (_, _, _, _, donorRepo, _, _, _, _, reconRepo, _, _, _) = mocks - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.haplogroupReconciliation/rkey1" - val donorAtUri = "at://did:plc:test123/specimen-donor/donor1" - - val record = HaplogroupReconciliationRecord( - atUri = atUri, - meta = createRecordMeta(), - specimenDonorRef = donorAtUri, - dnaType = "Y_DNA", - status = AtmosphereReconciliationStatus( - compatibilityLevel = "COMPATIBLE", - consensusHaplogroup = "R-BY18291", - confidence = Some(0.98), - divergencePoint = None, - branchCompatibilityScore = Some(0.95), - snpConcordance = Some(0.99), - runCount = Some(2), - warnings = None - ), - runCalls = Seq( - RunHaplogroupCall( - sourceRef = "at://run1", - haplogroup = "R-BY18291", - confidence = 0.97, - callMethod = "SNP_PHYLOGENETIC", - score = Some(0.97), - supportingSnps = Some(500), - conflictingSnps = Some(2), - noCalls = Some(10), - technology = Some("WGS"), - meanCoverage = Some(30.0), - treeVersion = Some("ISOGG2024"), - strPrediction = None - ), - RunHaplogroupCall( - sourceRef = "at://run2", - haplogroup = "R-BY18291", - confidence = 0.95, - callMethod = "SNP_PHYLOGENETIC", - score = Some(0.95), - supportingSnps = Some(450), - conflictingSnps = Some(5), - noCalls = Some(15), - technology = Some("BIG_Y"), - meanCoverage = Some(100.0), - treeVersion = Some("ISOGG2024"), - strPrediction = None - ) - ), - snpConflicts = None, - heteroplasmyObservations = None, - identityVerification = Some(AtmosphereIdentityVerification( - kinshipCoefficient = Some(0.5), - fingerprintSnpConcordance = Some(0.999), - yStrDistance = Some(0), - verificationStatus = Some("VERIFIED_SAME"), - verificationMethod = Some("Y_STR") - )), - lastReconciliationAt = Some(Instant.now()), - manualOverride = None, - auditLog = None - ) - - val event = HaplogroupReconciliationEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Create, - payload = Some(record) - ) - - // Mock donor lookup - val donor = SpecimenDonor( - id = Some(1), - donorIdentifier = "DONOR-001", - originBiobank = "TestLab", - donorType = BiosampleType.Citizen, - sex = Some(BiologicalSex.Male), - geocoord = None, - atUri = Some(donorAtUri) - ) - when(donorRepo.findByAtUri(donorAtUri)).thenReturn(Future.successful(Some(donor))) - - // Mock reconciliation upsert - when(reconRepo.upsertBySpecimenDonorAndDnaType(any[HaplogroupReconciliation])).thenAnswer(new Answer[Future[HaplogroupReconciliation]] { - override def answer(invocation: InvocationOnMock): Future[HaplogroupReconciliation] = { - val data = invocation.getArgument[HaplogroupReconciliation](0) - Future.successful(data.copy(id = Some(100))) - } - }) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - val success = result.asInstanceOf[FirehoseResult.Success] - success.message must include("Haplogroup Reconciliation Created") - - verify(donorRepo).findByAtUri(donorAtUri) - verify(reconRepo).upsertBySpecimenDonorAndDnaType(any[HaplogroupReconciliation]) - } - } - - "return validation error when specimen donor not found" in { - val mocks = createMocks() - val (_, _, _, _, donorRepo, _, _, _, _, _, _, _, _) = mocks - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.haplogroupReconciliation/rkey1" - val donorAtUri = "at://did:plc:test123/specimen-donor/nonexistent" - - val record = HaplogroupReconciliationRecord( - atUri = atUri, - meta = createRecordMeta(), - specimenDonorRef = donorAtUri, - dnaType = "Y_DNA", - status = AtmosphereReconciliationStatus( - compatibilityLevel = "COMPATIBLE", - consensusHaplogroup = "R-M269", - confidence = None, - divergencePoint = None, - branchCompatibilityScore = None, - snpConcordance = None, - runCount = None, - warnings = None - ), - runCalls = Seq.empty, - snpConflicts = None, - heteroplasmyObservations = None, - identityVerification = None, - lastReconciliationAt = None, - manualOverride = None, - auditLog = None - ) - - val event = HaplogroupReconciliationEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Create, - payload = Some(record) - ) - - when(donorRepo.findByAtUri(donorAtUri)).thenReturn(Future.successful(None)) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.ValidationError] - result.asInstanceOf[FirehoseResult.ValidationError].message must include("Specimen donor not found") - } - } - - "delete haplogroup reconciliation successfully" in { - val mocks = createMocks() - val (_, _, _, _, _, _, _, _, _, reconRepo, _, _, _) = mocks - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.haplogroupReconciliation/rkey1" - - val existing = HaplogroupReconciliation( - id = Some(100), - atUri = Some(atUri), - atCid = Some("cid123"), - specimenDonorId = 1, - dnaType = DnaType.Y_DNA, - status = models.domain.genomics.ReconciliationStatus( - compatibilityLevel = Some("COMPATIBLE"), - consensusHaplogroup = Some("R-M269"), - statusConfidence = None, - branchCompatibilityScore = None, - snpConcordance = None, - runCount = None, - warnings = None - ), - runCalls = Json.arr(), - snpConflicts = None, - heteroplasmyObservations = None, - identityVerification = None, - manualOverride = None, - auditLog = None, - lastReconciliationAt = None, - deleted = false, - createdAt = LocalDateTime.now(), - updatedAt = LocalDateTime.now() - ) - - val event = HaplogroupReconciliationEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Delete, - payload = None - ) - - when(reconRepo.findByAtUri(atUri)).thenReturn(Future.successful(Some(existing))) - when(reconRepo.softDelete(100)).thenReturn(Future.successful(true)) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - verify(reconRepo).softDelete(100) - } - } - } - - // ==================== PROJECT TESTS ==================== - - "AtmosphereEventHandler - Project" should { - - "create a project successfully" in { - val mocks = createMocks() - val (_, _, _, _, _, projectRepo, _, _, _, _, _, _, _) = mocks - - val atUri = "at://did:plc:admin/com.decodingus.atmosphere.project/rkey1" - - val record = ProjectRecord( - atUri = atUri, - meta = createRecordMeta(), - projectName = "Test Research Project", - description = Some("A test project for testing"), - administrator = "did:plc:admin", - memberRefs = Seq("did:plc:member1", "did:plc:member2") - ) - - val event = AtmosphereProjectEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Create, - payload = Some(record) - ) - - when(projectRepo.create(any[Project])).thenAnswer(new Answer[Future[Project]] { - override def answer(invocation: InvocationOnMock): Future[Project] = { - val proj = invocation.getArgument[Project](0) - Future.successful(proj.copy(id = Some(100))) - } - }) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - val success = result.asInstanceOf[FirehoseResult.Success] - success.message must include("Project Created") - - verify(projectRepo).create(any[Project]) - } - } - - "delete project successfully" in { - val mocks = createMocks() - val (_, _, _, _, _, projectRepo, _, _, _, _, _, _, _) = mocks - - val atUri = "at://did:plc:admin/com.decodingus.atmosphere.project/rkey1" - - val event = AtmosphereProjectEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Delete, - payload = None - ) - - when(projectRepo.softDeleteByAtUri(atUri)).thenReturn(Future.successful(true)) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - verify(projectRepo).softDeleteByAtUri(atUri) - } - } - } - - // ==================== VALIDATION ERROR TESTS ==================== - - "AtmosphereEventHandler - Validation" should { - - "return validation error when payload is missing for create" in { - val mocks = createMocks() - - val atUri = "at://did:plc:test123/com.decodingus.atmosphere.genotype/rkey1" - - val event = GenotypeEvent( - atUri = atUri, - atCid = None, - action = FirehoseAction.Create, - payload = None - ) - - val handler = createHandler(mocks) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.ValidationError] - result.asInstanceOf[FirehoseResult.ValidationError].message must include("Payload required") - } - } - } -} diff --git a/test/services/firehose/InstrumentObservationHandlerSpec.scala b/test/services/firehose/InstrumentObservationHandlerSpec.scala deleted file mode 100644 index ecf67ebf..00000000 --- a/test/services/firehose/InstrumentObservationHandlerSpec.scala +++ /dev/null @@ -1,192 +0,0 @@ -package services.firehose - -import helpers.ServiceSpec -import models.atmosphere.{InstrumentObservationRecord, RecordMeta} -import models.domain.genomics.{InstrumentObservation, ObservationConfidence} -import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito.{never, reset, verify, when} -import repositories.* -import services.TestTypeService - -import java.time.{Instant, LocalDateTime} -import java.util.UUID -import scala.concurrent.Future - -class InstrumentObservationHandlerSpec extends ServiceSpec { - - val mockCitizenBiosampleRepo: CitizenBiosampleRepository = mock[CitizenBiosampleRepository] - val mockSeqLibraryRepo: SequenceLibraryRepository = mock[SequenceLibraryRepository] - val mockSeqFileRepo: SequenceFileRepository = mock[SequenceFileRepository] - val mockAlignmentRepo: AlignmentRepository = mock[AlignmentRepository] - val mockDonorRepo: SpecimenDonorRepository = mock[SpecimenDonorRepository] - val mockProjectRepo: ProjectRepository = mock[ProjectRepository] - val mockTestTypeService: TestTypeService = mock[TestTypeService] - val mockGenotypeRepo: GenotypeDataRepository = mock[GenotypeDataRepository] - val mockPopBreakdownRepo: PopulationBreakdownRepository = mock[PopulationBreakdownRepository] - val mockHgReconciliationRepo: HaplogroupReconciliationRepository = mock[HaplogroupReconciliationRepository] - val mockInstrumentObsRepo: InstrumentObservationRepository = mock[InstrumentObservationRepository] - val mockGroupProjectRepo: GroupProjectRepository = mock[GroupProjectRepository] - val mockGroupProjectMemberRepo: GroupProjectMemberRepository = mock[GroupProjectMemberRepository] - - val mockMatchConsentRepo: MatchConsentTrackingRepository = mock[MatchConsentTrackingRepository] - val mockMatchRequestRepo: MatchRequestTrackingRepository = mock[MatchRequestTrackingRepository] - val mockPopAnalysisService: services.ibd.PopulationAnalysisService = mock[services.ibd.PopulationAnalysisService] - - val handler = new AtmosphereEventHandler( - mockCitizenBiosampleRepo, mockSeqLibraryRepo, mockSeqFileRepo, - mockAlignmentRepo, mockDonorRepo, mockProjectRepo, mockTestTypeService, - mockGenotypeRepo, mockPopBreakdownRepo, mockHgReconciliationRepo, mockInstrumentObsRepo, - mockGroupProjectRepo, mockGroupProjectMemberRepo, - mockMatchConsentRepo, mockMatchRequestRepo, mockPopAnalysisService - ) - - override def beforeEach(): Unit = { - reset(mockInstrumentObsRepo) - } - - val testAtUri = "at://did:plc:abc123/us.decoding.instrument.observation/1" - - val testRecord: InstrumentObservationRecord = InstrumentObservationRecord( - atUri = testAtUri, - meta = RecordMeta(version = 1, createdAt = Instant.now(), updatedAt = Some(Instant.now()), lastModifiedField = None), - instrumentId = "A00123", - labName = "Dante Labs", - biosampleRef = "at://did:plc:abc123/us.decoding.biosample/1", - sequenceRunRef = Some("at://did:plc:abc123/us.decoding.sequenceRun/1"), - platform = Some("ILLUMINA"), - instrumentModel = Some("NovaSeq 6000"), - flowcellId = Some("FC001"), - runDate = Some(Instant.now()), - confidence = Some("KNOWN") - ) - - def makeEvent(action: FirehoseAction, payload: Option[InstrumentObservationRecord] = Some(testRecord)): InstrumentObservationEvent = - InstrumentObservationEvent( - atUri = testAtUri, - atCid = Some(UUID.randomUUID().toString), - action = action, - payload = payload - ) - - "AtmosphereEventHandler - InstrumentObservation" should { - - "create observation from firehose event" in { - val event = makeEvent(FirehoseAction.Create) - when(mockInstrumentObsRepo.findByAtUri(testAtUri)).thenReturn(Future.successful(None)) - when(mockInstrumentObsRepo.create(any[InstrumentObservation])).thenReturn( - Future.successful(InstrumentObservation( - id = Some(1), atUri = testAtUri, instrumentId = "A00123", labName = "Dante Labs", - biosampleRef = "at://did:plc:abc123/us.decoding.biosample/1" - )) - ) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - result.asInstanceOf[FirehoseResult.Success].message must include("created") - verify(mockInstrumentObsRepo).create(any[InstrumentObservation]) - } - } - - "return Conflict when observation already exists on create" in { - val event = makeEvent(FirehoseAction.Create) - val existing = InstrumentObservation( - id = Some(1), atUri = testAtUri, instrumentId = "A00123", labName = "Dante Labs", - biosampleRef = "at://did:plc:abc123/us.decoding.biosample/1" - ) - when(mockInstrumentObsRepo.findByAtUri(testAtUri)).thenReturn(Future.successful(Some(existing))) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Conflict] - verify(mockInstrumentObsRepo, never()).create(any[InstrumentObservation]) - } - } - - "return ValidationError when create has no payload" in { - val event = makeEvent(FirehoseAction.Create, payload = None) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.ValidationError] - } - } - - "update existing observation" in { - val event = makeEvent(FirehoseAction.Update) - val existing = InstrumentObservation( - id = Some(1), atUri = testAtUri, instrumentId = "A00123", labName = "Old Lab", - biosampleRef = "at://did:plc:abc123/us.decoding.biosample/1" - ) - when(mockInstrumentObsRepo.findByAtUri(testAtUri)).thenReturn(Future.successful(Some(existing))) - when(mockInstrumentObsRepo.update(any[InstrumentObservation])).thenReturn(Future.successful(true)) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - result.asInstanceOf[FirehoseResult.Success].message must include("updated") - verify(mockInstrumentObsRepo).update(any[InstrumentObservation]) - } - } - - "return NotFound when updating nonexistent observation" in { - val event = makeEvent(FirehoseAction.Update) - when(mockInstrumentObsRepo.findByAtUri(testAtUri)).thenReturn(Future.successful(None)) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.NotFound] - } - } - - "return ValidationError when update has no payload" in { - val event = makeEvent(FirehoseAction.Update, payload = None) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.ValidationError] - } - } - - "delete existing observation" in { - val event = makeEvent(FirehoseAction.Delete, payload = None) - when(mockInstrumentObsRepo.deleteByAtUri(testAtUri)).thenReturn(Future.successful(true)) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - result.asInstanceOf[FirehoseResult.Success].message must include("deleted") - } - } - - "return NotFound when deleting nonexistent observation" in { - val event = makeEvent(FirehoseAction.Delete, payload = None) - when(mockInstrumentObsRepo.deleteByAtUri(testAtUri)).thenReturn(Future.successful(false)) - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.NotFound] - } - } - - "map confidence field correctly" in { - val event = makeEvent(FirehoseAction.Create, payload = Some(testRecord.copy(confidence = Some("GUESSED")))) - when(mockInstrumentObsRepo.findByAtUri(testAtUri)).thenReturn(Future.successful(None)) - when(mockInstrumentObsRepo.create(any[InstrumentObservation])).thenAnswer { invocation => - val obs = invocation.getArgument[InstrumentObservation](0) - obs.confidence mustBe ObservationConfidence.Guessed - Future.successful(obs.copy(id = Some(1))) - } - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - } - } - - "default confidence to INFERRED when not specified" in { - val event = makeEvent(FirehoseAction.Create, payload = Some(testRecord.copy(confidence = None))) - when(mockInstrumentObsRepo.findByAtUri(testAtUri)).thenReturn(Future.successful(None)) - when(mockInstrumentObsRepo.create(any[InstrumentObservation])).thenAnswer { invocation => - val obs = invocation.getArgument[InstrumentObservation](0) - obs.confidence mustBe ObservationConfidence.Inferred - Future.successful(obs.copy(id = Some(1))) - } - - whenReady(handler.handle(event)) { result => - result mustBe a[FirehoseResult.Success] - } - } - } -} diff --git a/test/services/genomics/SequencerInstrumentServiceSpec.scala b/test/services/genomics/SequencerInstrumentServiceSpec.scala deleted file mode 100644 index 3e948a8b..00000000 --- a/test/services/genomics/SequencerInstrumentServiceSpec.scala +++ /dev/null @@ -1,177 +0,0 @@ -package services.genomics - -import helpers.ServiceSpec -import models.api.SequencerLabInfo -import models.domain.genomics.* -import org.mockito.Mockito.{reset, when} -import repositories.* - -import scala.concurrent.Future - -class SequencerInstrumentServiceSpec extends ServiceSpec { - - val mockInstrumentRepo: SequencerInstrumentRepository = mock[SequencerInstrumentRepository] - val mockProposalRepo: InstrumentProposalRepository = mock[InstrumentProposalRepository] - val mockObservationRepo: InstrumentObservationRepository = mock[InstrumentObservationRepository] - - val service = new SequencerInstrumentService( - mockInstrumentRepo, mockProposalRepo, mockObservationRepo - ) - - override def beforeEach(): Unit = { - reset(mockInstrumentRepo, mockProposalRepo, mockObservationRepo) - } - - val confirmedLab: SequencerLabInfo = SequencerLabInfo( - instrumentId = "A00123", - labName = "Dante Labs", - isD2c = true, - manufacturer = Some("Illumina"), - model = Some("NovaSeq 6000"), - websiteUrl = Some("https://dantelabs.com") - ) - - val pendingProposal: InstrumentAssociationProposal = InstrumentAssociationProposal( - id = Some(5), - instrumentId = "A00123", - proposedLabName = "Nebula Genomics", - proposedManufacturer = Some("Illumina"), - proposedModel = Some("NovaSeq X"), - observationCount = 7, - distinctCitizenCount = 4, - confidenceScore = 0.85, - status = ProposalStatus.ReadyForReview - ) - - def makeObservation(instrumentId: String = "A00123"): InstrumentObservation = - InstrumentObservation( - id = Some(1), - atUri = s"at://did:plc:abc/us.decoding.instrument.observation/${System.nanoTime()}", - instrumentId = instrumentId, - labName = "Dante Labs", - biosampleRef = "at://did:plc:abc/us.decoding.biosample/1" - ) - - "SequencerInstrumentService.lookupLab" should { - - "return confirmed lab with confidence metadata" in { - val observations = (1 to 3).map(_ => makeObservation()) - when(mockInstrumentRepo.findLabByInstrumentId("A00123")) - .thenReturn(Future.successful(Some(confirmedLab))) - when(mockProposalRepo.findActiveByInstrumentId("A00123")) - .thenReturn(Future.successful(None)) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(observations)) - - whenReady(service.lookupLab("A00123")) { result => - result mustBe defined - val resp = result.get - resp.labName mustBe Some("Dante Labs") - resp.source mustBe "CURATOR" - resp.confidenceScore mustBe 1.0 - resp.observationCount mustBe 3 - resp.pendingProposal mustBe None - resp.isD2c mustBe Some(true) - resp.websiteUrl mustBe Some("https://dantelabs.com") - } - } - - "return confirmed lab with pending proposal when proposal differs" in { - when(mockInstrumentRepo.findLabByInstrumentId("A00123")) - .thenReturn(Future.successful(Some(confirmedLab))) - when(mockProposalRepo.findActiveByInstrumentId("A00123")) - .thenReturn(Future.successful(Some(pendingProposal))) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(Seq(makeObservation()))) - - whenReady(service.lookupLab("A00123")) { result => - result mustBe defined - val resp = result.get - resp.labName mustBe Some("Dante Labs") - resp.source mustBe "CURATOR" - resp.pendingProposal mustBe defined - resp.pendingProposal.get.proposedLabName mustBe "Nebula Genomics" - resp.pendingProposal.get.confidenceScore mustBe 0.85 - } - } - - "not include pending proposal when proposal matches confirmed lab" in { - val matchingProposal = pendingProposal.copy(proposedLabName = "Dante Labs") - when(mockInstrumentRepo.findLabByInstrumentId("A00123")) - .thenReturn(Future.successful(Some(confirmedLab))) - when(mockProposalRepo.findActiveByInstrumentId("A00123")) - .thenReturn(Future.successful(Some(matchingProposal))) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(Seq(makeObservation()))) - - whenReady(service.lookupLab("A00123")) { result => - result mustBe defined - result.get.pendingProposal mustBe None - } - } - - "return consensus result from proposal when no confirmed lab" in { - when(mockInstrumentRepo.findLabByInstrumentId("B00456")) - .thenReturn(Future.successful(None)) - when(mockProposalRepo.findActiveByInstrumentId("B00456")) - .thenReturn(Future.successful(Some(pendingProposal.copy(instrumentId = "B00456")))) - when(mockObservationRepo.findByInstrumentId("B00456")) - .thenReturn(Future.successful(Seq.empty)) - - whenReady(service.lookupLab("B00456")) { result => - result mustBe defined - val resp = result.get - resp.labName mustBe Some("Nebula Genomics") - resp.source mustBe "CONSENSUS" - resp.confidenceScore mustBe 0.85 - resp.pendingProposal mustBe defined - resp.pendingProposal.get.status mustBe "READY_FOR_REVIEW" - } - } - - "return None when no confirmed lab and no proposal" in { - when(mockInstrumentRepo.findLabByInstrumentId("UNKNOWN")) - .thenReturn(Future.successful(None)) - when(mockProposalRepo.findActiveByInstrumentId("UNKNOWN")) - .thenReturn(Future.successful(None)) - when(mockObservationRepo.findByInstrumentId("UNKNOWN")) - .thenReturn(Future.successful(Seq.empty)) - - whenReady(service.lookupLab("UNKNOWN")) { result => - result mustBe None - } - } - - "include observation count from confirmed observations" in { - val observations = (1 to 15).map(_ => makeObservation()) - when(mockInstrumentRepo.findLabByInstrumentId("A00123")) - .thenReturn(Future.successful(Some(confirmedLab))) - when(mockProposalRepo.findActiveByInstrumentId("A00123")) - .thenReturn(Future.successful(None)) - when(mockObservationRepo.findByInstrumentId("A00123")) - .thenReturn(Future.successful(observations)) - - whenReady(service.lookupLab("A00123")) { result => - result mustBe defined - result.get.observationCount mustBe 15 - } - } - } - - "SequencerInstrumentService.associateLabWithInstrument" should { - - "reject empty instrument ID" in { - whenReady(service.associateLabWithInstrument("", "Dante Labs").failed) { ex => - ex mustBe an[IllegalArgumentException] - ex.getMessage must include("Instrument ID") - } - } - - "reject empty lab name" in { - whenReady(service.associateLabWithInstrument("A00123", "").failed) { ex => - ex mustBe an[IllegalArgumentException] - ex.getMessage must include("Lab name") - } - } - } -} diff --git a/test/utils/VariantViewUtilsSpec.scala b/test/utils/VariantViewUtilsSpec.scala deleted file mode 100644 index fb208639..00000000 --- a/test/utils/VariantViewUtilsSpec.scala +++ /dev/null @@ -1,62 +0,0 @@ -package utils - -import models.domain.genomics.{MutationType, NamingStatus, StrCoordinates, VariantV2} -import org.scalatestplus.play.PlaySpec -import play.api.libs.json.Json - -class VariantViewUtilsSpec extends PlaySpec { - - "VariantViewUtils" should { - "format position correctly for STRs" in { - val strCoords = StrCoordinates( - contig = "chrY", - start = 100, - end = 110, - period = 4, - repeatMotif = Some("ATCG"), - referenceRepeats = Some(10) - ) - - val variant = VariantV2( - canonicalName = Some("DYS393"), - mutationType = MutationType.STR, - namingStatus = NamingStatus.Named, - coordinates = Json.obj("GRCh38" -> Json.toJson(strCoords)) - ) - - // Current behavior fails this, returns "chrY:0" - VariantViewUtils.formatPosition(variant, "GRCh38") mustBe "chrY:100-110" - - val (ref, alt) = VariantViewUtils.formatAllelesTuple(variant, "GRCh38") - ref mustBe "ATCG x 10" - alt mustBe "?" - } - - "format alleles correctly for STRs with N/A motif" in { - val strCoords = StrCoordinates( - contig = "chrY", - start = 200, - end = 210, - period = 4, - repeatMotif = Some("N/A"), // Motif explicitly "N/A" - referenceRepeats = Some(43) - ) - - val variant = VariantV2( - canonicalName = Some("DYS385_2"), - mutationType = MutationType.STR, - namingStatus = NamingStatus.Named, - coordinates = Json.obj("GRCh38" -> Json.toJson(strCoords)) - ) - - val (ref, alt) = VariantViewUtils.formatAllelesTuple(variant, "GRCh38") - ref mustBe "(repeats: 43)" - alt mustBe "?" - } - - "format position correctly for SNPs" in { - val variant = VariantV2.snp("rs123", "GRCh38", "chr1", 500, "A", "T") - VariantViewUtils.formatPosition(variant, "GRCh38") mustBe "chr1:500" - } - } -}